@@ -79,20 +79,18 @@ def generate_ground_truth_ragas(num_questions=200, num_search_documents=None, kg
79
79
)
80
80
81
81
# Load or create the knowledge graph
82
- if kg_file and os .path .exists (kg_file ):
83
- logger .info ("Loading existing knowledge graph from %s" , kg_file )
84
- kg = KnowledgeGraph .load (kg_file )
82
+ if kg_file :
83
+ full_path_to_kg = root_dir / kg_file
84
+ if not os .path .exists (full_path_to_kg ):
85
+ raise FileNotFoundError (f"Knowledge graph file { full_path_to_kg } not found." )
86
+ logger .info ("Loading existing knowledge graph from %s" , full_path_to_kg )
87
+ kg = KnowledgeGraph .load (full_path_to_kg )
85
88
else :
86
89
# Make a knowledge_graph from Azure AI Search documents
90
+ logger .info ("Fetching %d document chunks from Azure AI Search" , num_search_documents )
87
91
search_docs = get_search_documents (azure_credential , num_search_documents )
88
- # Create the transforms
89
- transforms = default_transforms (
90
- documents = [LCDocument (page_content = doc ["content" ]) for doc in search_docs ],
91
- llm = generator_llm ,
92
- embedding_model = generator_embeddings ,
93
- )
94
92
95
- # Convert the documents to RAGAS nodes
93
+ logger . info ( "Creating a RAGAS knowledge graph with based off of %d search documents" , len ( search_docs ))
96
94
nodes = []
97
95
for doc in search_docs :
98
96
content = doc ["content" ]
@@ -107,9 +105,18 @@ def generate_ground_truth_ragas(num_questions=200, num_search_documents=None, kg
107
105
nodes .append (node )
108
106
109
107
kg = KnowledgeGraph (nodes = nodes )
108
+
109
+ logger .info ("Using RAGAS to apply transforms to knowledge graph" , len (search_docs ))
110
+ transforms = default_transforms (
111
+ documents = [LCDocument (page_content = doc ["content" ]) for doc in search_docs ],
112
+ llm = generator_llm ,
113
+ embedding_model = generator_embeddings ,
114
+ )
115
+ apply_transforms (kg , transforms )
116
+
110
117
kg .save (root_dir / "ground_truth_kg.json" )
111
118
112
- apply_transforms ( kg , transforms )
119
+ logger . info ( "Using RAGAS knowledge graph to generate %d questions" , num_questions )
113
120
generator = TestsetGenerator (llm = generator_llm , embedding_model = generator_embeddings , knowledge_graph = kg )
114
121
dataset = generator .generate (testset_size = num_questions , with_debugging_logs = True )
115
122
@@ -128,6 +135,7 @@ def generate_ground_truth_ragas(num_questions=200, num_search_documents=None, kg
128
135
qa_pairs .append ({"question" : question , "truth" : truth })
129
136
130
137
with open (root_dir / "ground_truth.jsonl" , "a" ) as f :
138
+ logger .info ("Writing %d QA pairs to %s" , len (qa_pairs ), f .name )
131
139
for qa_pair in qa_pairs :
132
140
f .write (json .dumps (qa_pair ) + "\n " )
133
141
0 commit comments