Skip to content

Commit

Permalink
Duplicate content search message and error adjustment.
Browse files Browse the repository at this point in the history
  • Loading branch information
mattbeardey committed Dec 21, 2024
1 parent 60d4292 commit dd4a654
Showing 1 changed file with 9 additions and 4 deletions.
13 changes: 9 additions & 4 deletions autogen/agentchat/contrib/vectordb/mongodb.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,17 @@ def _wait_for_document(self, collection: Collection, index_name: str, doc: Docum
if query_result and query_result[0][0]["_id"] == doc["id"]:
return
sleep(_DELAY)
if query_result and float(query_result[0][1]) == 1.0:
if (
query_result
and float(query_result[0][1]) == 1.0
and query_result[0][0].get("metadata") == doc.get("metadata")
):
# Handles edge case where document is uploaded with a specific user-generated ID, then the identical content is uploaded with a hash generated ID.
raise TimeoutError(
f"""Documents may be ready, but the search has found an identical file with a different ID. Duplicate content may be present. Duplicate ID: {str(query_result[0][0]["_id"])}"""
logger.warning(
f"""Documents may be ready, the search has found identical content with a different ID and {"identical" if query_result[0][0].get("metadata") == doc.get("metadata") else "different"} metadata. Duplicate ID: {str(query_result[0][0]["_id"])}"""
)
raise TimeoutError(f"Document {self.index_name} is not ready!")
else:
raise TimeoutError(f"Document {self.index_name} is not ready!")

def _get_embedding_size(self):
return len(self.embedding_function(_SAMPLE_SENTENCE)[0])
Expand Down

0 comments on commit dd4a654

Please sign in to comment.