tstadel
diff --git a/‎examples/dpr_encoder.py
+13-13 b/‎examples/dpr_encoder.py
+13-13
diff --git a/‎farm/data_handler/dataset.py
+12-12 b/‎farm/data_handler/dataset.py
+12-12
@@ -72,19 +72,19 @@ def dense_passage_retrieval():
     # i.e., nq-train.json, nq-dev.json or trivia-train.json, trivia-dev.json
     label_list = ["hard_negative", "positive"]
     metric = "text_similarity_metric"
-    processor = TextSimilarityProcessor(tokenizer=query_tokenizer,
-                             passage_tokenizer=passage_tokenizer,
-                             max_seq_len_query=64,
-                             max_seq_len_passage=256,
-                             label_list=label_list,
-                             metric=metric,
-                             data_dir="../data/retriever",
-                             train_filename=train_filename,
-                             dev_filename=dev_filename,
-                             test_filename=test_filename,
-                             embed_title=embed_title,
-                             num_hard_negatives=num_hard_negatives,
-                             max_samples=max_samples)
+    processor = TextSimilarityProcessor(query_tokenizer=query_tokenizer,
+                                        passage_tokenizer=passage_tokenizer,
+                                        max_seq_len_query=64,
+                                        max_seq_len_passage=256,
+                                        label_list=label_list,
+                                        metric=metric,
+                                        data_dir="../data/retriever",
+                                        train_filename=train_filename,
+                                        dev_filename=dev_filename,
+                                        test_filename=test_filename,
+                                        embed_title=embed_title,
+                                        num_hard_negatives=num_hard_negatives,
+                                        max_samples=max_samples)
 
     # 3. Create a DataSilo that loads several datasets (train/dev/test), provides DataLoaders for them and calculates a few descriptive statistics of our datasets
     # NOTE: In FARM, the dev set metrics differ from test set metrics in that they are calculated on a token level instead of a word level
 
@@ -3,11 +3,12 @@
 import logging
 import torch
 from torch.utils.data import TensorDataset
+from collections.abc import Iterable
+from farm.utils import flatten_list
 
 logger = logging.getLogger(__name__)
 
 
-# TODO we need the option to handle different dtypes
 def convert_features_to_dataset(features):
     """
     Converts a list of feature dictionaries (one for each sample) into a PyTorch Dataset.
@@ -16,8 +17,7 @@ def convert_features_to_dataset(features):
                      names of the type of feature and the keys are the features themselves.
     :Return: a Pytorch dataset and a list of tensor names.
     """
-    # features can be an empty list in cases where down sampling occurs (e.g. Natural Questions downsamples
-    # instances of is_impossible
+    # features can be an empty list in cases where down sampling occurs (e.g. Natural Questions downsamples instances of is_impossible)
     if len(features) == 0:
         return None, None
     tensor_names = list(features[0].keys())
@@ -29,15 +29,15 @@ def convert_features_to_dataset(features):
         else:
             try:
                 # Checking weather a non-integer will be silently converted to torch.long
-                if isinstance(features[0][t_name], numbers.Number):
-                    base = features[0][t_name]
-                elif isinstance(features[0][t_name], list):
-                    if len(features[0][t_name]) > 0:
-                        base = features[0][t_name][0]
-                    else:
-                        base = 1
+                check = features[0][t_name]
+                if isinstance(check, numbers.Number):
+                    base = check
+                # extract a base variable from a nested lists or tuples
+                elif isinstance(check, Iterable):
+                    base = list(flatten_list(check))[0]
+                # extract a base variable from numpy arrays
                 else:
-                    base = features[0][t_name].ravel()[0]
+                    base = check.ravel()[0]
                 if not np.issubdtype(type(base), np.integer):
                     logger.warning(f"Problem during conversion to torch tensors:\n"
                                    f"A non-integer value for feature '{t_name}' with a value of: "
@@ -51,4 +51,4 @@ def convert_features_to_dataset(features):
         all_tensors.append(cur_tensor)
 
     dataset = TensorDataset(*all_tensors)
-    return dataset, tensor_names
+    return dataset, tensor_names