ADD: default context must be explicitly passed in for start and end tokens

lhz1029 · madisonmay · commit fb47e1487886 · 2020-01-10T17:31:23.000-05:00
diff --git a/.gitignore b/.gitignore
@@ -146,3 +146,5 @@ spyder_crash.log
 # Rope project folders
 .ropeproject/
 .vscode/
+
+Data/**/*
diff --git a/docs/auxiliary.rst b/docs/auxiliary.rst
@@ -7,23 +7,26 @@ as long as they describe specific spans of text.
 .. code-block:: python
 
     # First we define the extra features we will be providing, through a dictionary.
-    # We do this by defining values and the defaults that tokens will receive if they are not explicitly labeled.
-    # Auxiliary info can take the form of strings, booleans, floats, or ints.
-    default = {'capitalized':False, 'part_of_speech':'unknown'}
-    
-    # Next we create context tags in a similar format to SequenceLabeling labels, as a list of lists of dictionaries:
+    # Auxiliary info can take the form of booleans, floats, or ints. We currently cannot accept categorical inputs.
+
+    # First we create context tags as a list of lists of dictionaries. Every token should have a context.
     train_text = ['Intelligent process automation']
     train_context = [[
         {'text': 'Intelligent', 'capitalized': True, 'end': 11, 'start': 0, 'part_of_speech': 'ADJ'},
         {'text': 'process automation', 'capitalized': False, 'end': 30, 'start': 12, 'part_of_speech': 'NOUN'}, 
     ]]
 
-    # Our input to the model is now a list containing the text, and the context
-    trainX = [train_text, train_context]
-
-    # Examples with no context must have an empty list as their context
-    assert len(train_text) == len(train_context)
+    # We then define the defaults that start and end tokens will receive.
+    default = {'capitalized':False, 'part_of_speech':'unknown'}
 
     # We indicate to the model that we are including auxiliary info by passing our default dictionary in with the default_context kwarg.
     model = Classifier(default_context=default)
-    model.fit(trainX, trainY)
+    # We finally pass in the context when fitting and predicting with our model.
+    model.fit(trainX, trainY, context=train_context)
+
+    # Note that context format adapts with the text.
+    # For most tasks, the context for a sequence of text is a list of dictionaries.
+    # For comparison and comparison_regressor, where the input X is a list of two text sequences, the context is also a list of two dictionary lists.
+    # For multiple_choice, context must be given to both the question and answers. Specifically, for a given input, the context should be a list of n dictionary lists where the first corresponds to the question and the subsequent n-1 correspond to the answers.
+
+    # See tests/test_auxiliary.py for examples.
diff --git a/finetune/base.py b/finetune/base.py
@@ -139,6 +139,10 @@ def _target_model(
         reuse=None,
         **kwargs
     ):
+        # Overridden by subclass to attach a target model onto the shared base featurizer.
+        raise NotImplementedError
+
+    def _add_context_embed(self, featurizer_state):
         if "context" in featurizer_state:
             context_embed = featurizer_state["context"]
             for key in ['features', 'explain_out']:
diff --git a/finetune/config.py b/finetune/config.py
@@ -373,6 +373,7 @@ def get_default_config():
         # Auxiliary Information
         use_auxiliary_info=False,
         n_context_embed=32,
+        default_context=None,
         context_dim=None  # number of context dimensions to be inserted
     )
     return settings
diff --git a/finetune/encoding/input_encoder.py b/finetune/encoding/input_encoder.py
@@ -222,37 +222,24 @@ def __setstate__(self, state):
     def __getstate__(self):
         return {"Encoder": None}
 
-def get_default_context(context_by_char_loc):
-    """ Use mean for numeric values, majority otherwise. """
-    context_values = [c[1] for c in context_by_char_loc]
-    num_keys = len(context_values[0])
-    default_values = []
-    for k in range(num_keys):
-        values = [c[k] for c in context_values]
-        if isinstance(values[0], str) or isinstance(values[0], bool):
-            default_value = Counter(values).most_common(1)[0][0]
-        else:
-            default_value = np.mean(values)
-        default_values.append(default_value)
-    return default_values
 
-def tokenize_context(context, encoded_output):
+def tokenize_context(context, encoded_output, config):
     """ Tokenize the context corresponding to a single sequence of text """
     seq_len = len(encoded_output.token_ids)
     context_keys = list(k for k in sorted(context[0].keys()) if k not in ['token', 'start', 'end'])
     context_by_char_loc = sorted([(c['end'], [c[k] for k in context_keys]) for c in context], key=lambda c: c[0])
     # default context is the sequence majority
-    default_context = get_default_context(context_by_char_loc)
-    current_context = 0
+    default_context = [config.default_context[k] for k in context_keys]
+    current_char_loc = 0
     tokenized_context = []
     for char_loc in encoded_output.char_locs:
         # Note: this assumes that the tokenization will never lump multiple tokens into one
         if char_loc == -1:
             tokenized_context.append(default_context)
         else:
-            if char_loc > context_by_char_loc[current_context][0]:
-                current_context += 1
-            tokenized_context.append(context_by_char_loc[current_context][1])
+            if char_loc > context_by_char_loc[current_char_loc][0]:
+                current_char_loc += 1
+            tokenized_context.append(context_by_char_loc[current_char_loc][1])
     # padded value doesn't matter since it will be masked out
     expanded_context = np.pad(tokenized_context, ((0, seq_len - len(tokenized_context)), (0, 0)), 'constant')
     assert len(expanded_context) == len(encoded_output.token_ids)
diff --git a/finetune/input_pipeline.py b/finetune/input_pipeline.py
@@ -121,7 +121,7 @@ def text_to_tokens_mask(self, X, Y=None, context=None):
             if context is None:
                 feats = {"tokens": out.token_ids, "mask": out.mask}
             else:
-                tokenized_context = tokenize_context(context, out)
+                tokenized_context = tokenize_context(context, out, self.config)
                 feats = {"tokens": out.token_ids, "mask": out.mask, "context": tokenized_context}
             if Y is None:
                 yield feats
@@ -151,13 +151,13 @@ def _compute_class_counts(self, encoded_dataset):
         return Counter(self.label_encoder.inverse_transform(target_arrs))
 
     def _dataset_with_targets(self, Xs, Y, train, context=None):
-        if context:
-            if not callable(Xs) and not callable(Y):
+        if context is not None:
+            if not callable(Xs) and not callable(Y) and not callable(context):
                 dataset = lambda: zip(Xs, Y, context)
-            elif callable(Xs) and callable(Y):
+            elif callable(Xs) and callable(Y) and callable(context):
                 dataset = lambda: zip(Xs(), Y(), context)
             else:
-                raise ValueError( "Either neither or both of Xs and Y should be callable, not a mixture")
+                raise ValueError( "Either none or all of Xs and Y and context should be callable, not a mixture")
 
             dataset_encoded = lambda: itertools.chain.from_iterable(
                 map(lambda xyc: self.text_to_tokens_mask(*xyc), dataset())
@@ -187,7 +187,7 @@ def _dataset_with_targets(self, Xs, Y, train, context=None):
         )
 
     def _dataset_without_targets(self, Xs, train, context=None):
-        if context:
+        if context is not None:
             # we assume that X must have known length if we also provide context so this is safe
             if callable(Xs):
                 Xs_ = Xs()
@@ -360,7 +360,7 @@ def get_train_input_fns(self, Xs, Y=None, batch_size=None, val_size=None, contex
             )
         else:
             self._skip_tqdm = 0
-            if context:
+            if context is not None:
                 to_shuffle = (Xs, Y, context)
 
                 if self.config.val_size > 0 and self.config.val_set is None:
diff --git a/finetune/nn/auxiliary.py b/finetune/nn/auxiliary.py
@@ -4,18 +4,18 @@
 
 
 def embed_context(context, featurizer_state, config, train):
-    context_dim = shape_list(context)[-1]
-    context_weight = tf.get_variable(
-        name="ce",	
-        shape=[context_dim, config.n_context_embed],	
-        initializer=tf.random_normal_initializer(stddev=config.weight_stddev),	
-    )
-    context_bias = tf.get_variable(
-        name="ca",	
-        shape=[config.n_context_embed],	
-        initializer=tf.zeros_initializer(),	
-    )
     with tf.variable_scope("context_embedding"):
+        context_dim = shape_list(context)[-1]
+        context_weight = tf.get_variable(
+            name="ce",	
+            shape=[context_dim, config.n_context_embed],	
+            initializer=tf.random_normal_initializer(stddev=config.weight_stddev),	
+        )
+        context_bias = tf.get_variable(
+            name="ca",	
+            shape=[config.n_context_embed],	
+            initializer=tf.zeros_initializer(),	
+        )
         c_embed = tf.add(tf.multiply(context, context_weight), context_bias)
     featurizer_state['context'] = c_embed
     return featurizer_state
diff --git a/finetune/target_models/classifier.py b/finetune/target_models/classifier.py
@@ -15,7 +15,7 @@
 
 class ClassificationPipeline(BasePipeline):
     def resampling(self, Xs, Y, context=None):
-        if context:
+        if context is not None:
             if self.config.oversample:
                 idxs, Ys, contexts = shuffle(
                     *RandomOverSampler().fit_sample([[i] for i in range(len(Xs))], Y, context)
@@ -131,9 +131,7 @@ def get_eval_fn(cls):
     def _target_model(
         self, *, config, featurizer_state, targets, n_outputs, train=False, reuse=None, **kwargs
     ):
-        super(Classifier, self)._target_model(
-            config=config, featurizer_state=featurizer_state, targets=targets, n_outputs=n_outputs,
-            train=train, reuse=reuse, **kwargs)
+        self._add_context_embed(featurizer_state)
         if "explain_out" in featurizer_state:
             shape = tf.shape(featurizer_state["explain_out"])  # batch, seq, hidden
             flat_explain = tf.reshape(
diff --git a/finetune/target_models/comparison.py b/finetune/target_models/comparison.py
@@ -5,7 +5,7 @@
 from finetune.errors import FinetuneError
 from finetune.base import BaseModel
 from finetune.target_models.classifier import Classifier, ClassificationPipeline
-from finetune.encoding.input_encoder import ArrayEncodedOutput
+from finetune.encoding.input_encoder import ArrayEncodedOutput, tokenize_context
 
 
 class ComparisonPipeline(ClassificationPipeline):
@@ -32,14 +32,46 @@ def _text_to_ids(self, pair, Y=None, pad_token=None):
         kwargs["mask"] = np.stack([arr_forward.mask, arr_backward.mask], 0)
         yield ArrayEncodedOutput(**kwargs)
 
+    def text_to_tokens_mask(self, pair, Y=None, context=None):
+        out_gen = self._text_to_ids(pair, pad_token=self.config.pad_token)
+        for i, out in enumerate(out_gen):
+            if context is None:
+                feats = {"tokens": out.token_ids, "mask": out.mask}
+            else:
+                out_forward = ArrayEncodedOutput(
+                    token_ids=out.token_ids[0],
+                    tokens=out.token_ids[0],
+                    labels=None,
+                    char_locs=out.char_locs,
+                    mask=out.mask[0],
+                )
+                out_backward = ArrayEncodedOutput(
+                    token_ids=out.token_ids[1],
+                    tokens=out.token_ids[1],
+                    labels=None,
+                    char_locs=out.char_locs,
+                    mask=out.mask[1],
+                )
+                tokenized_context_forward = tokenize_context(context[0], out_forward, self.config)
+                tokenized_context_backward = tokenize_context(context[1], out_backward, self.config)
+                tokenized_context = [tokenized_context_forward, tokenized_context_backward]
+                feats = {"tokens": out.token_ids, "mask": out.mask, "context": tokenized_context}
+            if Y is None:
+                yield feats
+            else:
+                yield feats, self.label_encoder.transform([Y])[0]
+
     def feed_shape_type_def(self):
         TS = tf.TensorShape
         types = {"tokens": tf.int32, "mask": tf.float32}
         shapes = {
             "tokens": TS([2, self.config.max_length, 2]),
             "mask": TS([None, self.config.max_length]),
         }
-        types, shapes = self._add_context_info_if_present(types, shapes)
+        if self.config.use_auxiliary_info:
+            TS = tf.TensorShape
+            types["context"] = tf.float32
+            shapes["context"] = TS([2, self.config.max_length, self.config.context_dim])
         return (
             (types, tf.float32,),
             (shapes, TS([self.target_dim]),),
@@ -78,12 +110,18 @@ def _target_model(
         **kwargs
     ):
         featurizer_state = featurizer_state.copy()
+        print('features before ', featurizer_state['features'])
         featurizer_state["sequence_features"] = tf.abs(
             tf.reduce_sum(featurizer_state["sequence_features"], 1)
         )
         featurizer_state["features"] = tf.abs(
             tf.reduce_sum(featurizer_state["features"], 1)
         )
+        # to go from [batch, 2, max_length, n_context_embed] -> [batch, max_length, n_context_embed]
+        if 'context' in featurizer_state:
+            featurizer_state["context"] = tf.abs(
+                tf.reduce_sum(featurizer_state["context"], 1)
+            )
         return super(Comparison, self)._target_model(
             config=config,
             featurizer_state=featurizer_state,
diff --git a/finetune/target_models/comparison_regressor.py b/finetune/target_models/comparison_regressor.py
@@ -16,7 +16,10 @@ def feed_shape_type_def(self):
         TS = tf.TensorShape
         types = {"tokens": tf.int32, "mask": tf.int32}
         shapes = {"tokens": TS([2, self.config.max_length, 2]), "mask": TS([2, self.config.max_length])}
-        types, shapes = self._add_context_info_if_present(types, shapes)
+        if self.config.use_auxiliary_info:
+            TS = tf.TensorShape
+            types["context"] = tf.float32
+            shapes["context"] = TS([2, self.config.max_length, self.config.context_dim])
         return (
             (types, tf.float32,),
             (shapes, TS([self.target_dim]),),
@@ -36,11 +39,11 @@ def _get_input_pipeline(self):
         return ComparisonRegressionPipeline(self.config)
 
     def _target_model(self, *, config, featurizer_state, targets, n_outputs, train=False, reuse=None, **kwargs):
-        super(ComparisonRegressor, self)._target_model(
-            config=config, featurizer_state=featurizer_state, targets=targets, n_outputs=n_outputs,
-            train=train, reuse=reuse, **kwargs)
         featurizer_state["sequence_features"] = tf.abs(tf.reduce_sum(featurizer_state["sequence_features"], 1))
         featurizer_state["features"] = tf.abs(tf.reduce_sum(featurizer_state["features"], 1))
+        if 'context' in featurizer_state:
+            featurizer_state["context"] = tf.abs(tf.reduce_sum(featurizer_state["context"], 1))
+        self._add_context_embed(featurizer_state)
         return regressor(
             hidden=featurizer_state['features'],
             targets=targets, 
diff --git a/finetune/target_models/mtl.py b/finetune/target_models/mtl.py
@@ -252,9 +252,7 @@ def _target_model(
         task_id=None,
         **kwargs
     ):
-        super(MultiTask, self)._target_model(
-            config=config, featurizer_state=featurizer_state, targets=targets, n_outputs=n_outputs,
-            train=train, reuse=reuse, task_id=task_id, **kwargs)
+        self._add_context_embed(featurizer_state)
         pred_fn_pairs = []
         featurizer_state["features"] = tf.cond(
             tf.equal(tf.shape(featurizer_state["features"])[1], 1),
diff --git a/finetune/target_models/multi_label_classifier.py b/finetune/target_models/multi_label_classifier.py
@@ -74,7 +74,7 @@ def predict_proba(self, X, context=None):
         :param X: list or array of text to embed.
         :returns: list of dictionaries.  Each dictionary maps from a class label to its assigned class probability.
         """
-        return super().predict_proba(X, context=None)
+        return super().predict_proba(X, context=context)
 
     def finetune(self, X, Y=None, batch_size=None, context=None):
         """
@@ -86,9 +86,7 @@ def finetune(self, X, Y=None, batch_size=None, context=None):
         return super().finetune(X, Y=Y, batch_size=batch_size, context=context)
 
     def _target_model(self, *, config, featurizer_state, targets, n_outputs, train=False, reuse=None, **kwargs):
-        super(MultiLabelClassifier, self)._target_model(
-            config=config, featurizer_state=featurizer_state, targets=targets, n_outputs=n_outputs,
-            train=train, reuse=reuse, **kwargs)
+        self._add_context_embed(featurizer_state)
         return multi_classifier(
             hidden=featurizer_state['features'],
             targets=targets,
diff --git a/finetune/target_models/multiple_choice.py b/finetune/target_models/multiple_choice.py
diff --git a/finetune/target_models/ordinal_regressor.py b/finetune/target_models/ordinal_regressor.py
diff --git a/finetune/target_models/regressor.py b/finetune/target_models/regressor.py
diff --git a/finetune/target_models/sequence_labeling.py b/finetune/target_models/sequence_labeling.py
diff --git a/tests/test_auxiliary.py b/tests/test_auxiliary.py

Original file line number	Diff line number	Diff line change
`@@ -373,6 +373,7 @@ def get_default_config():`
`373`	`373`	`# Auxiliary Information`
`374`	`374`	`use_auxiliary_info=False,`
`375`	`375`	`n_context_embed=32,`
	`376`	`+ default_context=None,`
`376`	`377`	`context_dim=None # number of context dimensions to be inserted`
`377`	`378`	`)`
`378`	`379`	`return settings`
-Original file line number
+Diff line change
 from finetune.nn.target_blocks import multi_choice_question
 from finetune.util import list_transpose
 +from finetune.encoding.input_encoder import tokenize_context
 class MultipleChoicePipeline(BasePipeline):
         kwargs["mask"] = np.stack([arr.mask for arr in arrays], 0)
         yield ArrayEncodedOutput(**kwargs)
 +    def text_to_tokens_mask(self, pair, Y=None, context=None):
 +        out_gen = self._text_to_ids(pair, pad_token=self.config.pad_token)
 +        for i, out in enumerate(out_gen):
 +            if context is None:
 +                feats = {"tokens": out.token_ids, "mask": out.mask}
 +            else:
 +                num_answers = len(out.tokens)
 +                tokenized_context = []
 +                for answer_idx in range(num_answers):
 +                    out_instance = ArrayEncodedOutput(
 +                        token_ids=out.token_ids[answer_idx],
 +                        tokens=out.token_ids[answer_idx],
 +                        labels=None,
 +                        char_locs=out.char_locs,
 +                        mask=out.mask[answer_idx],
 +                    )
 +                    context_instance = context[0] + context[answer_idx + 1]
 +                    tokenized_context.append(tokenize_context(context_instance, out_instance, self.config))
 +                feats = {"tokens": out.token_ids, "mask": out.mask, "context": tokenized_context}
 +            if Y is None:
 +                yield feats
 +            else:
 +                yield feats, self.label_encoder.transform([Y])[0]
++
++
     def _format_for_encoding(self, X):
         return [[field] for field in X]
             "tokens": TS([self.target_dim, self.config.max_length, 2]),
             "mask": TS([self.target_dim, self.config.max_length]),
+        }
 -        types, shapes = self._add_context_info_if_present(types, shapes)
 +        if self.config.use_auxiliary_info:
 +            TS = tf.TensorShape
 +            types["context"] = tf.float32
 +            shapes["context"] = TS([self.target_dim, self.config.max_length, self.config.context_dim])
         return (
             (types, tf.float32,),
             (shapes, TS([]),),
     def _target_model(
         self, *, config, featurizer_state, targets, n_outputs, train=False, reuse=None, **kwargs
     ):
 -        super(MultipleChoice, self)._target_model(
 -            config=config, featurizer_state=featurizer_state, targets=targets, n_outputs=n_outputs,
 -            train=train, reuse=reuse, **kwargs)
 +        if "context" in featurizer_state:
 +            context_embed = featurizer_state["context"]
 +            featurizer_state['features'] = tf.concat(
 +                (featurizer_state['features'], tf.reduce_mean(context_embed, 2)), -1
 +            )
         return multi_choice_question(
             hidden=featurizer_state["features"],
             targets=targets,
-Original file line number
+Diff line change
         out_gen = self._text_to_ids(X, Y=Y, pad_token=pad_token)
         for out in out_gen:
             feats = {"tokens": out.token_ids, "mask": out.mask}
 -            if context:
 -                tokenized_context = tokenize_context(context, out)
 +            if context is not None:
 +                tokenized_context = tokenize_context(context, out, self.config)
                 feats['context'] = tokenized_context
             if Y is None:
                 yield feats
         step_size = chunk_size // 3
         doc_idx = -1
         for position_seq, start_of_doc, end_of_doc, label_seq, proba_seq in self.process_long_sequence(X, context=context):
 -            print('position_seq', position_seq)
             start, end = 0, None
             if start_of_doc:
                 # if this is the first chunk in a document, start accumulating from scratch
             none_value=self.config.pad_token,
             subtoken_predictions=self.config.subtoken_predictions,
+        )
 -        print(    X,
 -                    all_subseqs,
 -                    all_labels,
 -                    all_probs,
 -                    all_positions,
 -                    doc_annotations,
 -                )
         if per_token:
             return [
     def _target_model(
         self, *, config, featurizer_state, targets, n_outputs, train=False, reuse=None, **kwargs
     ):
 -        super(SequenceLabeler, self)._target_model(
 -            config=config, featurizer_state=featurizer_state, targets=targets, n_outputs=n_outputs,
 -            train=train, reuse=reuse, **kwargs)
 +        self._add_context_embed(featurizer_state)
         return sequence_labeler(
             hidden=featurizer_state["sequence_features"],
             targets=targets,
-Original file line number
+Diff line change
 from sklearn.metrics import accuracy_score, recall_score
 from sklearn.model_selection import train_test_split
 -from finetune import Classifier, SequenceLabeler
 +from finetune import Classifier, SequenceLabeler, Comparison, ComparisonRegressor, MultipleChoice
 from finetune.base_models import TextCNN, BERTModelCased, GPT2Model, GPTModel, RoBERTa, GPT
 from finetune.config import get_config
 from finetune.util.metrics import (
     sequence_labeling_token_precision,
     sequence_labeling_token_recall,
+)
 from finetune.datasets.reuters import Reuters
 -from finetune.encoding.input_encoder import get_default_context, tokenize_context, ArrayEncodedOutput
 +from finetune.encoding.input_encoder import tokenize_context, ArrayEncodedOutput
 # prevent excessive warning logs
 warnings.filterwarnings("ignore")
 os.environ["TF_CPP_MIN_LOG_LEVEL"] = "3"
 class TestAuxiliaryTokenization(unittest.TestCase):
 -    def test_get_default_context(self):
 -        context = [
 -            (2, ["single", True, 23.3, 4]),
 -            (4, ["double", True, 24.3, 2]),
 -            (8, ["single", False, 25.3, 3]),
 -        ]
+-
 -        expected = ["single", True, 24.3, 3]
 -        self.assertEqual(get_default_context(context), expected)
+-
     def test_tokenize_context(self):
         encoded_output = ArrayEncodedOutput(
             token_ids=[
             {'token': "only", 'start': 13, 'end': 17, 'left': 20, 'bold': False},
             {'token': "$80", 'start': 18, 'end': 21, 'left': 30, 'bold': True},
+        ]
 -        expanded_context = tokenize_context(context, encoded_output)
 +        config = get_config(**{'default_context': {'left': 0, 'bold': False}})
 +        expanded_context = tokenize_context(context, encoded_output, config)
         expected = [
 -            [False, 20],
 +            [False, 0],
             [False, 10],
             [False, 10],
             [False, 20],
             [True, 30],
             [True, 30],
 -            [False, 20]
 +            [False, 0]
+        ]
         print(expanded_context)
         np.testing.assert_array_equal(expected, expanded_context)
         defaults = {
             "batch_size": 2,
             "max_length": 256,
 -            "n_epochs": 1000,
 +            "n_epochs": 1,  # we mostly are making sure nothing errors out
             "base_model": self.base_model,
             "val_size": 0,
             "use_auxiliary_info": True,
             "context_dim": 1,
 -            "val_set": (self.trainX, self.trainY, self.train_context)
 +            "val_set": (self.trainX, self.trainY, self.train_context),
 +            "default_context": {'bold': False}
+        }
         defaults.update(kwargs)
         return dict(get_config(**defaults))
         Ensure model training does not error out
         Ensure model returns reasonable predictions
         """
+-
         model = SequenceLabeler(**self.default_config(use_auxiliary_info=False, val_set=(self.trainX, self.trainY)))
         model.fit(self.trainX, self.trainY_seq)
         preds = model.predict(self.trainX)
         Ensure model training does not error out
         Ensure model returns reasonable predictions
         """
+-
 -        model = SequenceLabeler(**self.default_config())
 +        # here we want to make sure we're actually using context
 +        model = SequenceLabeler(**self.default_config(n_epochs=1500))
         model.fit(self.trainX, self.trainY_seq, context=self.train_context)
         preds = model.predict(self.trainX, context=self.train_context)
         self._evaluate_sequence_preds(preds, True)
+-
++
 +    def test_comparison_auxiliary(self):
 +        """
 +        Ensure model training does not error out
 +        Ensure model returns reasonable predictions
 +        """
 +        model = Comparison(**self.default_config(chunk_long_sequences=False, max_length=50, batch_size=4))
 +        trainX = [['i like apples', 'i like apples']] * 4
 +        trainY = ['A', 'B', 'C', 'D']
 +        train_context = [
 +            [self.train_context[i], self.train_context[j]] for i in [0, 1] for j in [0, 1]
 +        ]
 +        print(train_context)
 +        model.fit(trainX, trainY, context=train_context)
 +        preds = model.predict(trainX, context=train_context)
++
 +    def test_comparison_regressor_auxiliary(self):
 +        """
 +        Ensure model training does not error out
 +        Ensure model returns reasonable predictions
 +        """
 +        model = ComparisonRegressor(**self.default_config(chunk_long_sequences=False, max_length=50, batch_size=4))
 +        trainX = [['i like apples', 'i like apples']] * 4
 +        trainY = [0, .5, .5, 1]
 +        train_context = [
 +            [self.train_context[i], self.train_context[j]] for i in [0, 1] for j in [0, 1]
 +        ]
 +        print(train_context)
 +        model.fit(trainX, trainY, context=train_context)
 +        preds = model.predict(trainX, context=train_context)
++
 +    def test_multiple_choice_auxiliary(self):
 +        """
 +        Ensure model training does not error out
 +        Ensure model returns reasonable predictions
 +        """
 +        model = MultipleChoice(**self.default_config(chunk_long_sequences=False, max_length=50, batch_size=4))
 +        questions = ['i like apples'] * 2
 +        answers = [['happy', 'sad', 'neutral', 'not satisfied'], ['happy', 'sad', 'neutral', 'not satisfied']]
 +        correct_answers = ['happy', 'sad']
 +        answer_context = [
 +            [{'start': 0, 'end': 5, 'token': 'happy', 'bold': False}],
 +            [{'start': 0, 'end': 3, 'token': 'sad', 'bold': False}],
 +            [{'start': 0, 'end': 7, 'token': 'neutral', 'bold': False}],
 +            [{'start': 0, 'end': 3, 'token': 'not', 'bold': False}, {'start': 4, 'end': 13, 'token': 'satisfied', 'bold': False}],
 +        ]
 +        # context looks like [[{}, {}, {}], [{}], [{}], [{}], [{}, {}]] where the first list is for the question
 +        # and the subsequent ones are for each answer
 +        train_context = [[self.train_context[0]] + answer_context] + [[self.train_context[1]] + answer_context]
 +        print(train_context)
 +        model.fit(questions, answers, correct_answers, context=train_context)
 +        preds = model.predict(questions, answers, context=train_context)
++
     def test_save_load(self):
         """
         Ensure saving + loading does not cause errors