Skip to content

Commit 2305fc1

Browse files
committed
More logging, save knowledge graph after transforms
1 parent a839b33 commit 2305fc1

File tree

3 files changed

+835889
-5211
lines changed

3 files changed

+835889
-5211
lines changed

evals/generate_ground_truth.py

+19-11
Original file line numberDiff line numberDiff line change
@@ -79,20 +79,18 @@ def generate_ground_truth_ragas(num_questions=200, num_search_documents=None, kg
7979
)
8080

8181
# Load or create the knowledge graph
82-
if kg_file and os.path.exists(kg_file):
83-
logger.info("Loading existing knowledge graph from %s", kg_file)
84-
kg = KnowledgeGraph.load(kg_file)
82+
if kg_file:
83+
full_path_to_kg = root_dir / kg_file
84+
if not os.path.exists(full_path_to_kg):
85+
raise FileNotFoundError(f"Knowledge graph file {full_path_to_kg} not found.")
86+
logger.info("Loading existing knowledge graph from %s", full_path_to_kg)
87+
kg = KnowledgeGraph.load(full_path_to_kg)
8588
else:
8689
# Make a knowledge_graph from Azure AI Search documents
90+
logger.info("Fetching %d document chunks from Azure AI Search", num_search_documents)
8791
search_docs = get_search_documents(azure_credential, num_search_documents)
88-
# Create the transforms
89-
transforms = default_transforms(
90-
documents=[LCDocument(page_content=doc["content"]) for doc in search_docs],
91-
llm=generator_llm,
92-
embedding_model=generator_embeddings,
93-
)
9492

95-
# Convert the documents to RAGAS nodes
93+
logger.info("Creating a RAGAS knowledge graph with based off of %d search documents", len(search_docs))
9694
nodes = []
9795
for doc in search_docs:
9896
content = doc["content"]
@@ -107,9 +105,18 @@ def generate_ground_truth_ragas(num_questions=200, num_search_documents=None, kg
107105
nodes.append(node)
108106

109107
kg = KnowledgeGraph(nodes=nodes)
108+
109+
logger.info("Using RAGAS to apply transforms to knowledge graph", len(search_docs))
110+
transforms = default_transforms(
111+
documents=[LCDocument(page_content=doc["content"]) for doc in search_docs],
112+
llm=generator_llm,
113+
embedding_model=generator_embeddings,
114+
)
115+
apply_transforms(kg, transforms)
116+
110117
kg.save(root_dir / "ground_truth_kg.json")
111118

112-
apply_transforms(kg, transforms)
119+
logger.info("Using RAGAS knowledge graph to generate %d questions", num_questions)
113120
generator = TestsetGenerator(llm=generator_llm, embedding_model=generator_embeddings, knowledge_graph=kg)
114121
dataset = generator.generate(testset_size=num_questions, with_debugging_logs=True)
115122

@@ -128,6 +135,7 @@ def generate_ground_truth_ragas(num_questions=200, num_search_documents=None, kg
128135
qa_pairs.append({"question": question, "truth": truth})
129136

130137
with open(root_dir / "ground_truth.jsonl", "a") as f:
138+
logger.info("Writing %d QA pairs to %s", len(qa_pairs), f.name)
131139
for qa_pair in qa_pairs:
132140
f.write(json.dumps(qa_pair) + "\n")
133141

0 commit comments

Comments
 (0)