|
4 | 4 | from dataclasses import dataclass, field
|
5 | 5 | from typing import Any, Callable
|
6 | 6 |
|
| 7 | +import typechat |
| 8 | + |
7 | 9 | from .interfaces import (
|
8 | 10 | # Interfaces.
|
9 | 11 | IConversation,
|
| 12 | + IConversationSecondaryIndexes, |
10 | 13 | IMessage,
|
11 | 14 | ITermToSemanticRefIndex,
|
12 | 15 | # Other imports.
|
|
20 | 23 | SemanticRef,
|
21 | 24 | TermToSemanticRefIndexItemData,
|
22 | 25 | TermToSemanticRefIndexData,
|
| 26 | + TextIndexingResult, |
23 | 27 | TextLocation,
|
24 | 28 | TextRange,
|
25 | 29 | Topic,
|
26 | 30 | )
|
27 |
| -from . import kplib |
| 31 | +from . import convknowledge, importing, kplib |
28 | 32 |
|
29 | 33 |
|
30 | 34 | def text_range_from_location(
|
@@ -91,12 +95,14 @@ def add_facet(
|
91 | 95 |
|
92 | 96 |
|
93 | 97 | def add_topic_to_index(
|
94 |
| - topic: Topic, |
| 98 | + topic: Topic | str, |
95 | 99 | semantic_refs: list[SemanticRef],
|
96 | 100 | semantic_ref_index: ITermToSemanticRefIndex,
|
97 | 101 | message_ordinal: MessageOrdinal,
|
98 | 102 | chunk_ordinal: int = 0,
|
99 | 103 | ) -> None:
|
| 104 | + if isinstance(topic, str): |
| 105 | + topic = Topic(text=topic) |
100 | 106 | ref_ordinal = len(semantic_refs)
|
101 | 107 | semantic_refs.append(
|
102 | 108 | SemanticRef(
|
@@ -143,6 +149,22 @@ def add_action_to_index(
|
143 | 149 | add_facet(action.subject_entity_facet, ref_ordinal, semantic_ref_index)
|
144 | 150 |
|
145 | 151 |
|
| 152 | +def add_knowledge_to_index( |
| 153 | + semantic_refs: list[SemanticRef], |
| 154 | + semantic_ref_index: ITermToSemanticRefIndex, |
| 155 | + message_ordinal: MessageOrdinal, |
| 156 | + knowledge: kplib.KnowledgeResponse, |
| 157 | +) -> None: |
| 158 | + for entity in knowledge.entities: |
| 159 | + add_entity_to_index(entity, semantic_refs, semantic_ref_index, message_ordinal) |
| 160 | + for action in knowledge.actions: |
| 161 | + add_action_to_index(action, semantic_refs, semantic_ref_index, message_ordinal) |
| 162 | + for inverse_action in knowledge.inverse_actions: |
| 163 | + add_action_to_index(inverse_action, semantic_refs, semantic_ref_index, message_ordinal) |
| 164 | + for topic in knowledge.topics: |
| 165 | + add_topic_to_index(topic, semantic_refs, semantic_ref_index, message_ordinal) |
| 166 | + |
| 167 | + |
146 | 168 | def add_metadata_to_index[TMessage: IMessage](
|
147 | 169 | messages: list[TMessage],
|
148 | 170 | semantic_refs: list[SemanticRef],
|
@@ -242,16 +264,83 @@ def _prepare_term(self, term: str) -> str:
|
242 | 264 | # ...
|
243 | 265 |
|
244 | 266 |
|
| 267 | +def create_knowledge_extractor( |
| 268 | + model: typechat.TypeChatLanguageModel | None = None, |
| 269 | +) -> convknowledge.KnowledgeExtractor: |
| 270 | + return convknowledge.KnowledgeExtractor(model) |
| 271 | + |
| 272 | + |
245 | 273 | async def build_conversation_index(
|
246 | 274 | conversation: IConversation,
|
247 |
| - conversation_settings: Any, # TODO: ConversationSettings |
| 275 | + conversation_settings: importing.ConversationSettings, |
248 | 276 | event_handler: IndexingEventHandlers | None = None,
|
249 | 277 | ) -> IndexingResults:
|
250 | 278 | result = IndexingResults()
|
| 279 | + result.semantic_refs = await build_semantic_ref_index( |
| 280 | + conversation, None, event_handler |
| 281 | + ) |
251 | 282 | # TODO
|
252 |
| - # result.semantic_refs = await build_semantic_ref_index(conversation, None, event_handler) |
253 | 283 | # if result.semantic_refs and not result.semantic_refs.error and conversation.semantic_ref_index:
|
254 | 284 | # result.secondary_index_results = await build_secondary_indexes(
|
255 | 285 | # conversation, conversation_settings, event_handler
|
256 | 286 | # )
|
257 | 287 | return result
|
| 288 | + |
| 289 | + |
| 290 | +async def build_semantic_ref_index[TM: IMessage, TC: IConversationSecondaryIndexes]( |
| 291 | + conversation: IConversation[TM, ConversationIndex, TC], |
| 292 | + extractor: convknowledge.KnowledgeExtractor | None = None, |
| 293 | + event_handler: IndexingEventHandlers | None = None, |
| 294 | +) -> TextIndexingResult: |
| 295 | + semantic_ref_index = conversation.semantic_ref_index |
| 296 | + if semantic_ref_index is None: |
| 297 | + conversation.semantic_ref_index = semantic_ref_index = ConversationIndex() |
| 298 | + |
| 299 | + semantic_refs = conversation.semantic_refs |
| 300 | + if semantic_refs is None: |
| 301 | + conversation.semantic_refs = semantic_refs = [] |
| 302 | + |
| 303 | + if extractor is None: |
| 304 | + extractor = create_knowledge_extractor() |
| 305 | + |
| 306 | + indexing_result = TextIndexingResult() |
| 307 | + |
| 308 | + for message_ordinal, message in enumerate(conversation.messages): |
| 309 | + print(f"\nPROCESSING MESSAGE {message_ordinal}") |
| 310 | + chunk_ordinal = 0 |
| 311 | + # Only one chunk per message for now. |
| 312 | + text = message.text_chunks[chunk_ordinal] |
| 313 | + # TODO: retries |
| 314 | + knowledge = await extractor.extract(text) |
| 315 | + if knowledge is None: |
| 316 | + indexing_result.error = f"Failed to extract knowledge from message {message_ordinal}: {text}" |
| 317 | + print(indexing_result.error) |
| 318 | + break |
| 319 | + if knowledge.entities or knowledge.actions or knowledge.inverse_actions or knowledge.topics: |
| 320 | + add_knowledge_to_index( |
| 321 | + semantic_refs, |
| 322 | + semantic_ref_index, |
| 323 | + message_ordinal, |
| 324 | + knowledge, |
| 325 | + ) |
| 326 | + completed_chunk = TextLocation(message_ordinal, chunk_ordinal) |
| 327 | + indexing_result.completed_upto = completed_chunk |
| 328 | + if event_handler and event_handler.on_knowledge_extracted: |
| 329 | + if not event_handler.on_knowledge_extracted(completed_chunk, knowledge): |
| 330 | + print("BREAK") |
| 331 | + break |
| 332 | + |
| 333 | + # dump(semantic_ref_index, semantic_refs) |
| 334 | + |
| 335 | + return indexing_result |
| 336 | + |
| 337 | + |
| 338 | +def dump(semantic_ref_index: ConversationIndex, semantic_refs: list[SemanticRef]) -> None: |
| 339 | + print("semantic_ref_index = {") |
| 340 | + for k, v in semantic_ref_index._map.items(): |
| 341 | + print(f" {k!r}: {v},") |
| 342 | + print("}\n") |
| 343 | + print("semantic_refs = {") |
| 344 | + for semantic_ref in semantic_refs: |
| 345 | + print(f" {semantic_ref},") |
| 346 | + print("}\n") |
0 commit comments