Skip to content

Commit 0f10976

Browse files
tholorTimoeller
andauthored
Replace 'none' titles with empty string in DPR (#663)
* replace 'none' titles with empty string Co-authored-by: Timo Moeller <[email protected]>
1 parent 94c6b8d commit 0f10976

File tree

1 file changed

+13
-5
lines changed

1 file changed

+13
-5
lines changed

farm/data_handler/processor.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -3216,11 +3216,19 @@ def _dict_to_samples(self, dictionary: dict, **kwargs) -> [Sample]:
32163216

32173217
# featurize context passages
32183218
if self.embed_title:
3219-
# embed title with positive context passages + negative context passages
3220-
all_ctx = [tuple((title, ctx)) for title, ctx in
3221-
zip(positive_ctx_titles, positive_ctx_texts)] + \
3222-
[tuple((title, ctx)) for title, ctx in
3223-
zip(hard_negative_ctx_titles, hard_negative_ctx_texts)]
3219+
# concatenate title with positive context passages + negative context passages
3220+
def _combine_title_context(titles, texts):
3221+
res = []
3222+
for title, ctx in zip(titles, texts):
3223+
if title is None:
3224+
title = ""
3225+
logger.warning(
3226+
f"Couldn't find title although `embed_title` is set to True for DPR. Using title='' now. Related passage text: '{ctx}' ")
3227+
res.append(tuple((title, ctx)))
3228+
return res
3229+
3230+
all_ctx = _combine_title_context(positive_ctx_titles, positive_ctx_texts) + _combine_title_context(
3231+
hard_negative_ctx_titles, hard_negative_ctx_texts)
32243232
else:
32253233
all_ctx = positive_ctx_texts + hard_negative_ctx_texts
32263234

0 commit comments

Comments
 (0)