Skip to content

Commit 84e99a3

Browse files
authored
Merge pull request #294 from EleutherAI/multiprocessing-duplication
inference server duplication check
2 parents 937e71d + 6b2798c commit 84e99a3

File tree

1 file changed

+9
-4
lines changed

1 file changed

+9
-4
lines changed

elk/extraction/inference_server.py

+9-4
Original file line numberDiff line numberDiff line change
@@ -216,13 +216,18 @@ def imap(
216216
q.put((closure_pkl, model_kwargs_pkl, shard))
217217

218218
generator = round_robin(self._result_queues) # type: ignore[arg-type]
219-
seen_dummy = False
219+
seen_ids = set()
220220
for out in tqdm(generator, total=len(dataset), disable=not use_tqdm):
221221
if out[0] == dummy_id:
222-
if seen_dummy:
222+
if dummy_id in seen_ids:
223223
continue # ignore any extra dummy rows
224-
else:
225-
seen_dummy = True
224+
elif out[0] in seen_ids:
225+
raise RuntimeError(
226+
"Round robin yielded duplicate items. "
227+
"This may be due to multiprocessing queues returning "
228+
"items repeatedly."
229+
)
230+
seen_ids.add(out[0])
226231
yield out
227232

228233

0 commit comments

Comments
 (0)