From 442a9d875d3c424efc52966cbce139751a6d8ea2 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 29 Nov 2024 12:31:21 -0800 Subject: [PATCH 01/93] initial checkin --- python/.gitignore | 2 +- python/packages/autogen-ext/pyproject.toml | 1 + .../samples/run_magentic_one_with_memory.py | 163 ++++++++ .../autogen_ext/agentic_memory/__init__.py | 4 + .../agentic_memory/_agentic_memory.py | 162 ++++++++ .../agentic_memory/_knowledge_archive.py | 94 +++++ .../autogen_ext/agentic_memory/_memo_store.py | 121 ++++++ .../autogen_ext/agentic_memory/_page_log.py | 371 ++++++++++++++++++ .../autogen_ext/agentic_memory/_prompter.py | 178 +++++++++ .../src/autogen_ext/agentic_memory/_utils.py | 58 +++ 10 files changed, 1153 insertions(+), 1 deletion(-) create mode 100644 python/packages/autogen-ext/samples/run_magentic_one_with_memory.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_memo_store.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py diff --git a/python/.gitignore b/python/.gitignore index 677a888f2f49..e2c779af7d0f 100644 --- a/python/.gitignore +++ b/python/.gitignore @@ -157,7 +157,7 @@ cython_debug/ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore # and can be added to the global gitignore or merged into this file. For a more nuclear # option (not recommended) you can uncomment the following to ignore the entire idea folder. -#.idea/ +.idea/ .ruff_cache/ diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index fb751ecf026d..e81ee1dc0ee2 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -31,6 +31,7 @@ web-surfer = [ "playwright>=1.48.0", "pillow>=11.0.0", ] +agentic-memory = ["chromadb"] [tool.hatch.build.targets.wheel] packages = ["src/autogen_ext"] diff --git a/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py b/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py new file mode 100644 index 000000000000..1c31d4365646 --- /dev/null +++ b/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py @@ -0,0 +1,163 @@ +import asyncio +from autogen_ext.models import OpenAIChatCompletionClient +from autogen_agentchat.agents import AssistantAgent +from autogen_agentchat.teams import MagenticOneGroupChat +from autogen_ext.agents import MultimodalWebSurfer +from autogen_ext.agents.web_surfer._utils import message_content_to_str +from autogen_agentchat.task import Console +from autogen_core.components.models import ( + AssistantMessage, + ChatCompletionClient, + FunctionExecutionResult, + FunctionExecutionResultMessage, + LLMMessage, + SystemMessage, + UserMessage, +) +from typing import ( + Tuple, +) +from autogen_ext.agentic_memory import AgenticMemory, PageLog + + +USE_AGENTIC_MEMORY = 1 # 1 = Assign task to AgenticMemory instead of directly to the completion agent +CREATE_NEW_MEMORIES = 1 # 1 = Let AgenticMemory try to create new memories +RESET_MEMORY = 1 # 1 = Reset the memory before starting each task + + +async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str, str]: + page = page_log.begin_page( + summary="assign_task_to_magentic_one", + details='', + method_call="assign_task_to_magentic_one") + + page.add_lines(task) + + general_agent = AssistantAgent( + "general_agent", + model_client, + description="A general GPT-4o AI assistant capable of performing a variety of tasks.", ) + + web_surfer = MultimodalWebSurfer( + name="web_surfer", + model_client=model_client, + downloads_folder="logs", + debug_dir="logs", + to_save_screenshots=True, + ) + + team = MagenticOneGroupChat( + [general_agent, web_surfer], + model_client=model_client, + max_turns=20, + ) + + # user_input = await asyncio.get_event_loop().run_in_executor(None, input, ">: ") + stream = team.run_stream(task=task) + task_result = await Console(stream) + + # Use the entire task_result (with images removed) as the work history. + work_history = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) + + # Extract the final response as the last line of the last message. + # This assumes that the task statement specified that the answer should be on the last line. + final_message_string = task_result.messages[-1].content + final_message_lines = final_message_string.split("\n") + final_response = final_message_lines[-1] + + page_log.finish_page(page) + + return final_response, work_history + + +async def assign_task_to_client(task, client, page_log): + # The client is a ChatCompletionClient. Pass the task to it, and return the response. + system_message = SystemMessage(content="""You are a helpful and thoughtful assistant. +In responding to every user message, you follow the same multi-step process given here: +1. Explain your understanding of the user message in detail, covering all the important points. +2. List as many possible responses as you can think of. +3. Carefully list and weigh the pros and cons (if any) of each possible response. +4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. +5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. +6. Finish by providing your final response in the particular format requested by the user.""") + user_message = UserMessage(content=task, source="human") + + input_messages = [system_message] + [user_message] + response = await client.create(input_messages) + + # Log the model call + page_log.add_model_call(description="Ask the model", + details="to complete the task", input_messages=input_messages, + response=response, + num_input_tokens=0, caller='assign_task_to_client') + + # Split the response into lines. + response_lines = response.content.split("\n") + + # The final line contains the answer. Extract it. + answer = response_lines[-1] + + return answer, response.content + + +async def task_assignment_callback(task, client, page_log) -> str: + page = page_log.begin_page( + summary="task_assignment_callback", + details='', + method_call="task_assignment_callback") + + # Send the task to an agent, team or client. + # response, work_history = await assign_task_to_client(task.strip(), client, page_log) + response, work_history = await assign_task_to_magentic_one(task.strip(), client, page_log) + + page.update_details(" " + response) + page_log.finish_page(page) + return response, work_history + + +async def main() -> None: + # Select the task + task = """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? The final line of your response must contain nothing but the answer as a number.""" + expected_answer = "100" + + # Create the OpenAI client + model_name = "gpt-4o-2024-05-13" + temp = 0.1 + max_tokens = 4096 + client = OpenAIChatCompletionClient( + model=model_name, + api_key="", + temperature=temp, + max_tokens=max_tokens, + presence_penalty=0.0, + frequency_penalty=0.0, + top_p=1.0, + max_retries=65535, + ) + + # Create the PageLog. + page_log = PageLog("~/pagelogs/", "m1") + page = page_log.begin_page( + summary="main", + details='', + method_call="main") + page_log.append_entry_line(f"Using {model_name} on OAI, temp={temp}, max_tokens={max_tokens}") + + if USE_AGENTIC_MEMORY: + memory = AgenticMemory(reset=RESET_MEMORY, client=client, page_log=page_log, path_to_archive_dir="~/agentic_memory_archive") + prepared_response = await memory.assign_task( + task, expected_answer, CREATE_NEW_MEMORIES, task_assignment_callback, final_format_instructions="") + else: + prepared_response, _ = await task_assignment_callback(task, client, page_log) + + print("FINAL RESPONSE AFTER LEARNING: ", prepared_response) + page.add_lines(prepared_response) + page_log.flush(final=True) # Finalize the page log + page_log.finish_page(page) + +if __name__ == "__main__": + # logger = logging.getLogger(EVENT_LOGGER_NAME) + # logger.setLevel(logging.INFO) + # log_handler = LogHandler() + # logger.handlers = [log_handler] + asyncio.run(main()) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py new file mode 100644 index 000000000000..95efac2a718b --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -0,0 +1,4 @@ +from ._agentic_memory import AgenticMemory +from ._page_log import PageLog + +__all__ = ["AgenticMemory", "PageLog"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py new file mode 100644 index 000000000000..c4ecbfd2a884 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -0,0 +1,162 @@ +from typing import Callable +from ._prompter import Prompter +from ._knowledge_archive import KnowledgeArchive + + +class AgenticMemory: + def __init__(self, reset, client, page_log, path_to_archive_dir): + self.client = client + self.page_log = page_log + self.prompter = Prompter(client, page_log) + self.archive = KnowledgeArchive(verbosity=0, reset=reset, path_to_archive_dir=path_to_archive_dir, + page_log=page_log) + + async def assign_task(self, task: str, expected_answer: str, create_new_memories: bool, assign_task_to_completer: Callable, + final_format_instructions: str): + """ + Assigns a task to the completion agent, using any relevant memories, and tries to learn from failures. + If the first attempt succeeds, that response is returned without attempting to learn. + """ + page = self.page_log.begin_page( + summary="AgenticMemory.assign_task", + details="", + method_call="AgenticMemory.assign_task") + + if create_new_memories: + page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) + # Attempt to create useful new memories. + _, insight = await self.iterate_on_task(task, expected_answer, assign_task_to_completer, final_format_instructions) + if insight is not None: + page.add_lines("A new insight was created:\n{}".format(insight), flush=True) + # Add this insight to memory. + await self.add_insight_to_memory(task, insight) + + # Retrieve insights from memory. + filtered_insights = await self.retrieve_relevant_insights(task) + + # Try to solve the task, looking up relevant memories from the DB. + page.add_lines("Try to solve the task, using any insights from the db.\n", flush=True) + if len(filtered_insights) > 0: + page.add_lines("Relevant insights were found in memory.\n", flush=True) + memory_section = self.format_memory_section(filtered_insights) + task = task + '\n\n' + memory_section + + # Attempt to solve the task. + final_response, _ = await assign_task_to_completer(task, self.client, self.page_log) + + page.add_lines("\n{}\n".format(final_response), flush=True) + self.page_log.finish_page(page) + return final_response + + async def add_insight_to_memory(self, task: str, insight: str): + # Adds an insight to the DB. + page = self.page_log.begin_page( + summary="AgenticMemory.add_insight_to_memory", + details="", + method_call="AgenticMemory.add_insight_to_memory") + + # Get a combined list of topics from the task and insight. + task_plus_insight = task.strip() + "\n(Hint: " + insight + ")" + topics = await self.prompter.find_index_topics(task_plus_insight) + page.add_lines("\nTOPICS EXTRACTED FROM TASK AND INSIGHT:") + page.add_lines("\n".join(topics)) + page.add_lines("") + + # Add the insight to the archive. + self.archive.add_insight(insight, task, topics) + + self.page_log.finish_page(page) + + async def retrieve_relevant_insights(self, task: str): + # Retrieve insights from the DB that are relevant to the task. + page = self.page_log.begin_page( + summary="AgenticMemory.retrieve_relevant_insights", + details="", + method_call="AgenticMemory.retrieve_relevant_insights") + + # Get a list of topics from the task. + topics = await self.prompter.find_index_topics(task) + page.add_lines("\nTOPICS EXTRACTED FROM TASK:") + page.add_lines("\n".join(topics)) + page.add_lines("") + + # Retrieve insights from the archive. + unfiltered_insights = self.archive.get_relevant_insights(topics=topics) + filtered_insights = [] + page.add_lines("\nUNFILTERED INSIGHTS") + for insight, relevance in unfiltered_insights.items(): + page.add_lines(" INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) + if relevance > 5.0: + filtered_insights.append(insight) + page.add_lines("\nFiltered to top {} insights".format(len(filtered_insights))) + + self.page_log.finish_page(page) + return filtered_insights + + def format_memory_section(self, memories): + memory_section = "" + if len(memories) > 0: + memory_section = "## Important insights that may help solve tasks like this\n" + for mem in memories: + memory_section += ('- ' + mem + '\n') + return memory_section + + async def iterate_on_task(self, task: str, expected_answer: str, assign_task_to_completer: Callable, final_format_instructions: str): + page = self.page_log.begin_page( + summary="AgenticMemory.iterate_on_task", + details="", + method_call="AgenticMemory.iterate_on_task") + + page.add_lines("\nTask description: {}".format(task)) + page.add_lines("\nExpected answer: {}\n".format(expected_answer)) + + final_response = None + old_insights = await self.retrieve_relevant_insights(task) + new_insights = [] + insight = None + successful_insight = None + + # Loop until success (or timeout) while learning from failures. + max_trials = 4 + for trial in range(1, max_trials + 1): + page.add_lines("----- TRIAL {} -----\n".format(trial), flush=True) + + # Add any new insights we've accumulated so far. + memory_section = self.format_memory_section(old_insights + new_insights) + task_plus_insights = task + '\n\n' + memory_section + + # Attempt to solve the task. + response, work_history = await assign_task_to_completer(task_plus_insights, self.client, self.page_log) + + # Is this answer correct? + page.add_lines("\nResponse: {}".format(response), flush=True) + response_is_correct = (response.lower() == expected_answer.lower()) + if response_is_correct: + # Yes. Time to exit the loop. + page.add_lines("\nResponse is CORRECT. No learning needed.\n", flush=True) + # Was this the first trial? + if trial == 1: + # Yes. We should return the successful response, and no insight. + final_response = response + else: + # No. We learned a successful insight, which should be returned. + successful_insight = insight + break + + # Will we try again? + if trial == max_trials: + # No. We're out of tries. + page.add_lines("\nNo more trials will be attempted.\n", flush=True) + break + + # Try to learn from this failure. + page.add_lines("\nResponse is INCORRECT. Try to learn from this failure.\n", flush=True) + insight = await self.prompter.learn_from_failure( + task, memory_section, response, expected_answer, work_history, final_format_instructions) + page.add_lines("\nInsight: {}\n".format(insight), flush=True) + new_insights.append(insight) + + # Return the answer from the last loop. + page.add_lines("\n{}\n".format(final_response), flush=True) + self.page_log.finish_page(page) + return final_response, successful_insight diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py new file mode 100644 index 000000000000..9b89249ff8e4 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py @@ -0,0 +1,94 @@ +import os +from dataclasses import dataclass +import pickle +from typing import Dict, Optional, Union, List +from ._memo_store import MemoStore + + +@dataclass +class Insight: + id: str + insight_str: str + task_str: str + topics: List[str] + + +class KnowledgeArchive: + """ + Stores task-completion insights in a vector DB for later retrieval. + """ + def __init__( + self, + verbosity: Optional[int] = 0, + reset: Optional[bool] = False, + path_to_archive_dir: Optional[str] = "tmp/archive", + page_log=None, + ): + """ + Args: + - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. + - reset (Optional, bool): True to clear the DB before starting. Default False + - path_to_archive_dir (Optional, str): path to the directory where the archive is stored. + """ + self.path_to_archive_dir = path_to_archive_dir + path_to_db_dir = os.path.join(path_to_archive_dir, "memo_store") + self.page_log = page_log + parent_page = self.page_log.last_page() + parent_page.add_lines("Creating KnowedgeArchive object", flush=True) + + self.memo_store = MemoStore(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir) + + # Load or create the associated memo dict on disk. + self.path_to_dict = os.path.join(path_to_archive_dir, "uid_insight_dict.pkl") + self.uid_insight_dict = {} + self.last_insight_id = 0 + if (not reset) and os.path.exists(self.path_to_dict): + parent_page.add_lines("\nLOADING INSIGHTS FROM DISK {}".format(self.path_to_dict)) + parent_page.add_lines(" Location = {}".format(self.path_to_dict)) + with open(self.path_to_dict, "rb") as f: + self.uid_insight_dict = pickle.load(f) + self.last_insight_id = len(self.uid_insight_dict) + parent_page.add_lines("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) + + def add_insight(self, insight_str: str, task_str: Optional[str] = None, topics: Optional[List[str]] = None): + """Adds an insight to the knowledge archive.""" + assert topics is not None, "For now, the topics list must be provided." + self.last_insight_id += 1 + id_str = str(self.last_insight_id) + insight = Insight(id=id_str, insight_str=insight_str, task_str=task_str, topics=topics) + for topic in topics: + # Add a mapping in the vec DB from each topic to the insight. + self.memo_store.add_input_output_pair(topic, id_str) + self.uid_insight_dict[str(id_str)] = insight + self.save_archive() + + def save_archive(self): + self.memo_store.save_memos() + parent_page = self.page_log.last_page() + parent_page.add_lines("\nSAVING INSIGHTS TO DISK {}".format(self.path_to_dict)) + with open(self.path_to_dict, "wb") as file: + pickle.dump(self.uid_insight_dict, file) + + def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional[List[str]] = None): + """Returns any insights from the knowledge archive that are relevant to the given task or topics.""" + assert (task_str is not None) or (topics is not None), "Either the task string or the topics list must be provided." + assert topics is not None, "For now, the topics list is always required, because it won't be generated." + + # Maintain a dict of insight-relevance pairs. + insight_relevance_dict = {} + relevance_conversion_threshold = 1.7 + + # Process the matching topics. + matches = [] # Each match is a tuple: (topic, insight, distance) + for topic in topics: + matches.extend(self.memo_store.get_related_memos(topic, 25, 100)) + for match in matches: + relevance = relevance_conversion_threshold - match[2] + insight_id = match[1] + insight_str = self.uid_insight_dict[insight_id].insight_str + if insight_str in insight_relevance_dict: + insight_relevance_dict[insight_str] += relevance + else: + insight_relevance_dict[insight_str] = relevance + + return insight_relevance_dict diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_memo_store.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_memo_store.py new file mode 100644 index 000000000000..8eed6d45c52e --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_memo_store.py @@ -0,0 +1,121 @@ +import os +import pickle +import chromadb +from chromadb.config import Settings +from typing import Optional, Union + + +class MemoStore: + """ + Provides memory storage and retrieval using a vector database. + Each DB entry (called a memo) is a pair of strings: an input text and an output text. + The input text is embedded and used as the retrieval key. + The output text can be anything, but it's typically used as a dict key. + Vector embeddings are currently supplied by Chroma's default Sentence Transformers. + """ + + def __init__( + self, + verbosity: Optional[int] = 0, + reset: Optional[bool] = False, + path_to_db_dir: Optional[str] = None, + ): + """ + Args: + - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. + - reset (Optional, bool): True to clear the DB before starting. Default False. + - path_to_db_dir (Optional, str): path to the directory where the DB is stored. + """ + self.verbosity = verbosity + self.path_to_db_dir = path_to_db_dir + + # Load or create the vector DB on disk. + settings = Settings( + anonymized_telemetry=False, allow_reset=True, is_persistent=True, persist_directory=path_to_db_dir + ) + self.db_client = chromadb.Client(settings) + self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. + + # Load or create the associated memo dict on disk. + self.path_to_dict = os.path.join(path_to_db_dir, "uid_text_dict.pkl") + self.uid_text_dict = {} + self.last_memo_id = 0 + if (not reset) and os.path.exists(self.path_to_dict): + print("\nLOADING MEMORY FROM DISK {}".format(self.path_to_dict)) + print(" Location = {}".format(self.path_to_dict)) + with open(self.path_to_dict, "rb") as f: + self.uid_text_dict = pickle.load(f) + self.last_memo_id = len(self.uid_text_dict) + print("\n{} MEMOS LOADED".format(len(self.uid_text_dict))) + if self.verbosity >= 3: + self.list_memos() + + # Clear the DB if requested. + if reset: + self.reset_db() + + def list_memos(self): + """Prints the contents of MemoStore.""" + print("LIST OF MEMOS") + for uid, text in self.uid_text_dict.items(): + input_text, output_text = text + print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + + def save_memos_to_text_files(self): + """Saves the contents of MemoStore to text files.""" + # Delete all files in mem_text dir. + for file in os.listdir("mem_text"): + os.remove(os.path.join("mem_text", file)) + + print("LIST OF MEMOS") + for uid, text in self.uid_text_dict.items(): + input_text, output_text = text + print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + # Save the input text to a file with the same name as the memo ID in the mem_text dir, which is a subdir of the dir containing this file. + with open("mem_text/{}.txt".format(uid), "w") as file: + file.write(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + + def save_memos(self): + """Saves self.uid_text_dict to disk.""" + with open(self.path_to_dict, "wb") as file: + pickle.dump(self.uid_text_dict, file) + + def reset_db(self): + """Forces immediate deletion of the DB's contents, in memory and on disk.""" + print("\nCLEARING MEMORY") + self.db_client.delete_collection("memos") + self.vec_db = self.db_client.create_collection("memos") + self.uid_text_dict = {} + self.save_memos() + + def add_input_output_pair(self, input_text: str, output_text: str): + """Adds an input-output pair to the vector DB.""" + self.last_memo_id += 1 + self.vec_db.add(documents=[input_text], ids=[str(self.last_memo_id)]) + self.uid_text_dict[str(self.last_memo_id)] = input_text, output_text + if self.verbosity >= 1: + print("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( + self.last_memo_id, input_text, output_text)) + if self.verbosity >= 3: + self.list_memos() + + def get_related_memos(self, query_text: str, n_results: int, threshold: Union[int, float]): + """Retrieves memos that are related to the given query text within the specified distance threshold.""" + if n_results > len(self.uid_text_dict): + n_results = len(self.uid_text_dict) + if n_results > 0: + results = self.vec_db.query(query_texts=[query_text], n_results=n_results) + num_results = len(results["ids"][0]) + else: + num_results = 0 + memos = [] + for i in range(num_results): + uid, input_text, distance = results["ids"][0][i], results["documents"][0][i], results["distances"][0][i] + if distance < threshold: + input_text_2, output_text = self.uid_text_dict[uid] + assert input_text == input_text_2 + if self.verbosity >= 1: + print("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance)) + memos.append((input_text, output_text, distance)) + return memos diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py new file mode 100644 index 000000000000..e87a9b0c38c6 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py @@ -0,0 +1,371 @@ +import os +import shutil +import time +from typing import List + +from autogen_core.components import Image +from autogen_core.components.models import ( + AssistantMessage, + ChatCompletionClient, + LLMMessage, + SystemMessage, + UserMessage, + AssistantMessage, + FunctionExecutionResultMessage, +) + + +class Page: + def __init__(self, page_log, index, summary, details, method_call, indent_level, show_in_overview=True, final=True): + self.page_log = page_log + self.index_str = str(index) + self.link_text = None + self.full_link = None + self.file_title = None + self.unindented_line_text = None + self.line_text = None + self.indentation_text = None + self.summary = summary + self.details = details + self.method_call = method_call + self.indent_level = indent_level + self.show_in_overview = show_in_overview + self.final = final + self.compose_line(details) + self.lines = [] + self.flush() + + def compose_line(self, details, flush=False): + self.details = details + self.link_text = self.index_str + ' ' + self.summary + self.indentation_text = "" + for i in range(self.indent_level): + self.indentation_text += "| " + self.file_title = self.link_text + ' ' + self.details + self.full_link = self.link_to_page_file() + self.unindented_line_text = self.full_link + ' ' + self.details + self.line_text = self.indentation_text + self.unindented_line_text + if flush: + self.flush() + + def update_details(self, details): + self.compose_line(details, flush=True) + self.page_log.flush() + + def link_to_page_file(self): + return f'{self.link_text}' + + def add_lines(self, line, flush=False): + # If the string 'line' consists of multiple lines, separate them into a list. + lines_to_add = [] + if "\n" in line: + lines_to_add = line.split("\n") + else: + lines_to_add.append(line) + + self.lines.extend(lines_to_add) + + if flush: + self.flush() + + def link_to_image(self, image_path, description): + # Add a thumbnail that links to the image. + # If the following html string is indented, underscores appear to the left of thumbnails. + link = f"""{description}""" + return link + + def add_link_to_image(self, description, source_image_path): + # Copy the image to the run directory. + # Remove every character from the string 'description' that is not alphanumeric or a space. + description = ''.join(e for e in description if e.isalnum() or e.isspace()) + target_image_filename = (str(self.page_log.get_next_page_id()) + ' - ' + description) + local_image_path = os.path.join(self.page_log.run_dir_path, target_image_filename) + shutil.copyfile(source_image_path, local_image_path) + self.add_lines('\n' + description) + self.add_lines(self.link_to_image(target_image_filename, description), flush=True) + + def delete_last_line(self): + if len(self.lines) > 0: + self.lines.pop() + + def flush(self): + page_path = os.path.join(self.page_log.run_dir_path, self.index_str + ".html") + with open(page_path, "w") as f: + f.write(self.page_log.html_opening(self.file_title, final=self.final)) + f.write(f"

{self.file_title}

\n") + for line in self.lines: + # Call f.write in a try block to catch any UnicodeEncodeErrors. + try: + f.write(f"{line}\n") + except UnicodeEncodeError: + f.write(f"UnicodeEncodeError in this line.\n") + f.write(self.page_log.html_closing()) + f.flush() + time.sleep(0.1) + + +class PageLog: + def __init__(self, path, run_id): + self.log_dir = os.path.expanduser(path) + self.run_id = run_id + self.page_stack = PageStack() + self.pages = [] + self.last_page_id = 0 + self.entry_lines = [] + self.exit_lines = [] + self.run_dir_path = None + self.name = "0 Overview" + self.create_run_dir() + self.token_counts_path = self.create_token_counts_file() + self.flush() + + def get_next_page_id(self): + self.last_page_id += 1 + return self.last_page_id + + def create_run_dir(self): + # Create a fresh run directory. + self.run_dir_path = os.path.join(self.log_dir, f"{self.run_id}") + if os.path.exists(self.run_dir_path): + shutil.rmtree(self.run_dir_path) + os.makedirs(self.run_dir_path) + + def create_token_counts_file(self): + token_counts_path = os.path.join(self.run_dir_path, "token_counts.csv") + f = open(token_counts_path, "w") + f.close() # The file starts empty and will be appended to later. + return token_counts_path + + def write_token_count(self, num_input_tokens, caller, details_path=None): + # Write the number of input tokens to the file, with caller and path to other details. + with open(self.token_counts_path, "a") as f: + f.write(f"{num_input_tokens},{caller},{details_path}\n") + + def num_subdirectories(self): + # Return the number of subdirectories in the log directory. + return len([name for name in os.listdir(self.log_dir) if os.path.isdir(os.path.join(self.log_dir, name))]) + + def html_opening(self, file_title, final=False): + # Return the opening text of a simple HTML file. + refresh_tag = '' if not final else "" + st = f""" + + + + {refresh_tag} + {file_title} + + + """ + return st + + def html_closing(self): + # Return the closing text of a simple HTML file. + return """""" + + def add_page(self, summary, details, method_call=None, show_in_overview=True, final=True): + # Add a page to the log. + page = Page(page_log=self, + index=self.get_next_page_id(), + summary=summary, + details=details, + method_call=method_call, + indent_level=len(self.page_stack.stack), + show_in_overview=show_in_overview, + final=final) + self.pages.append(page) + self.flush() + + if len(self.page_stack.stack) > 0: + # Insert a link to the new page into the calling page. + self.page_stack.stack[-1].add_lines(page.unindented_line_text, flush=True) + + return page + + def message_source(self, message): + source = "UNKNOWN" + color = "black" + if isinstance(message, SystemMessage): + source = "SYSTEM" + color = "purple" + elif isinstance(message, UserMessage): + source = "USER" + color = "blue" + elif isinstance(message, AssistantMessage): + source = "ASSISTANT" + color = "green" + elif isinstance(message, FunctionExecutionResultMessage): + source = "FUNCTION" + color = "red" + return self.decorate_text(source, color, demarcate=True) + + def decorate_text(self, text, color, weight="bold", demarcate=False): + if demarcate: + text = f"<<<<< {text} >>>>>" + return f'{text}' + + def message_content(self, page, message=None, message_content=None): + # Format the message content for logging. Either message or message_content must not be None. + # Start by converting the message content to a list of strings. + content_list = [] + if message_content is not None: + content = message_content + if message is not None: + content = message.content + if isinstance(content, str): + content_list.append(content) + elif isinstance(content, list): + for item in content: + if isinstance(item, str): + content_list.append(item.rstrip()) + elif isinstance(item, Image): + # Save the image to disk. + image_filename = str(self.get_next_page_id()) + " image.jpg" + image_path = os.path.join(self.run_dir_path, image_filename) + item.image.save(image_path) + # Add a link to the image. + content_list.append(page.link_to_image(image_filename, "message_image")) + else: + content_list.append(str(item).rstrip()) + else: + content_list.append("") + + # Convert the list of strings to a single string with newline separators. + output = "" + for item in content_list: + output += f"\n{item}\n" + return output + + def add_message_content(self, message_content, summary, details=""): + # Add a page containing a message's content. + page = self.add_page(summary=summary, + details=details, + show_in_overview=False) + self.page_stack.write_stack_to_page(page) + page.add_lines(self.message_content(page, message_content=message_content)) + page.flush() + + def add_broadcast_message(self, message, operation): + # Add a page containing a message being broadcast. + page = self.add_page(summary="Broadcast Message", + details=operation, + method_call="broadcast message", + show_in_overview=False) + self.page_stack.write_stack_to_page(page) + page.add_lines(self.message_source(message)) + page.add_lines(self.message_content(page, message=message)) + page.flush() + + def add_model_call(self, description, details, input_messages, response, + tools=None, json_output=None, extra_create_args=None, + num_input_tokens=None, caller=None): + # Add a model call to the log. + page = self.add_page(summary=description, + details=details, + method_call="model call", + show_in_overview=False) + self.page_stack.write_stack_to_page(page) + for i, m in enumerate(input_messages): + page.add_lines('\n' + self.message_source(m)) + page.add_lines(self.message_content(page, message=m)) + page.add_lines("\n" + self.decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) + if response is None: + page.add_lines("\n TOO MANY INPUT TOKENS, NO RESPONSE GENERATED") + else: + page.add_lines(self.message_content(page, message=response)) + page.flush() + if num_input_tokens is not None and caller is not None: + # Add a line to the token count file. + self.write_token_count(num_input_tokens, caller, page.index_str + ".html") + return page + + def prepend_entry_line(self, line): + self.entry_lines.insert(0, line) + + def append_entry_line(self, line): + self.entry_lines.append(line) + + def prepend_exit_line(self, line): + self.exit_lines.insert(0, line) + + def append_exit_line(self, line): + self.exit_lines.append(line) + + def link_to_local_file(self, file_path): + file_name = os.path.basename(file_path) + link = f'{file_name}' + return link + + def last_page(self): + if len(self.page_stack.stack) > 0: + return self.page_stack.stack[-1] + else: + return None + + def flush(self, final=False): + # Create an overview of the log. + overview_path = os.path.join(self.run_dir_path, self.name + ".html") + with open(overview_path, "w") as f: + f.write(self.html_opening("0 Overview", final=final)) + f.write(f"

{self.name}

\n") + for line in self.entry_lines: + f.write(line + "\n") + f.write("\n") + for page in self.pages: + if page.show_in_overview: + f.write(page.line_text + "\n") + f.write("\n") + for line in self.exit_lines: + f.write(line + "\n") + f.write(self.html_closing()) + time.sleep(0.1) + + def begin_page(self, summary, details, method_call, show_in_overview=True): + # Perform a set of logging actions that are often performed at the beginning of a caller's method. + page = self.add_page( + summary=summary, + details=details, + method_call=method_call, + show_in_overview=show_in_overview, + final=False) + + self.page_stack.push(page) + self.page_stack.write_stack_to_page(page) + + page.add_lines("\nENTER {}".format(method_call), flush=True) + return page + + def finish_page(self, page): + # Perform a set of logging actions that are often performed at the end of a caller's method. + page.final = True + page.add_lines("LEAVE {}".format(page.method_call), flush=True) + self.page_stack.pop() + + +class PageStack: + """ + A call stack containing a list of currently active tasks and policies in the order they called each other. + """ + def __init__(self): + self.stack = [] + + def push(self, page): + self.stack.append(page) + + def pop(self): + return self.stack.pop() + + def top(self): + return self.stack[-1] + + def write_stack_to_page(self, page): + # Log a properly indented string showing the current state of the call stack. + page.add_lines("\nCALL STACK") + for stack_page in self.stack: + page.add_lines(stack_page.line_text) + page.add_lines("") + page.add_lines("") + page.flush() diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py new file mode 100644 index 000000000000..ba23d7b4d1f5 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -0,0 +1,178 @@ +import time +from typing import List + +from autogen_core.components.models import ( + AssistantMessage, + LLMMessage, + SystemMessage, + UserMessage, + CreateResult, +) + +from autogen_core.components import FunctionCall, Image + +from ._utils import message_content_to_str, UserContent, text_from_user_content, single_image_from_user_content + + +class Prompter: + def __init__(self, client, page_log): + self.client = client + self.page_log = page_log + self.time_spent_in_model_calls = 0. + self.num_model_calls = 0 + self.start_time = time.time() + + # Create the chat history + self._chat_history: List[LLMMessage] = [] + + async def call_model(self, details, user_content: UserContent = None, system_message=None, keep_these_messages=True): + # Prepare the input message list + user_message = UserMessage(content=user_content, source="User") + if system_message is None: + system_message = self.default_system_message + system_message = SystemMessage(content=system_message) + + input_messages = [system_message] + self._chat_history + [user_message] + + # Double check the types of the input messages. + for message in input_messages: + for part in message.content: + if part is None: + print("part is None") + print("message = ", message) + assert isinstance(part, str) or isinstance(part, Image), "Invalid message content type: {}".format(type(part)) + + # Call the model + start_time = time.time() + + # create_result, num_input_tokens = self.core.call_model(input_messages) + num_input_tokens = self.client.count_tokens(input_messages) + max_input_tokens_per_call = None # This is a placeholder value. + if (max_input_tokens_per_call is not None) and (num_input_tokens > max_input_tokens_per_call): + # The input is too large. + response = None + else: + # Call the model. + response = await self.client.create(input_messages) + + + if response is None: + parent_page = self.page_log.add_model_call(description="Ask the model", + details=details + " ({:,} TOO MANY INPUT TOKENS)".format(num_input_tokens), + input_messages=input_messages, response=None, num_input_tokens=num_input_tokens, caller='Orchestrator') + assert False, "TOO MANY INPUT TOKENS" + response_string = "" + else: + assert isinstance(response, CreateResult) + response_string = response.content + assert isinstance(response_string, str) + response_message = AssistantMessage(content=response_string, source="Assistant") + assert isinstance(response_message, AssistantMessage) + + self.time_spent_in_model_calls += time.time() - start_time + self.num_model_calls += 1 + + # Log the model call + parent_page = self.page_log.add_model_call(description="Ask the model", + details=details, input_messages=input_messages, response=response_message, + num_input_tokens=num_input_tokens, caller='Orchestrator') + + # Manage the chat history + if keep_these_messages: + self._chat_history.append(user_message) + self._chat_history.append(response_message) + + # Return the response as a string for now + return response_string, parent_page + + def remove_last_turn(self): + if len(self._chat_history) > 0: + self._chat_history.pop() + + def clear_history(self): + self._chat_history = [] + + async def learn_from_failure(self, task_description, memory_section, final_response, expected_answer, + work_history, final_format_instructions): + # Try to create an insight to help avoid this failure in the future. + + sys_message = """- You are a patient and thorough teacher. +- Your job is to review work done by students and help them learn how to do better.""" + + user_message = [] + user_message.append("# A team of students made a mistake on the following task:\n") + user_message.extend([task_description]) + + if len(memory_section) > 0: + user_message.append(memory_section) + + if len(final_format_instructions) > 0: + user_message.append("# The following answer-formatting instructions were given to the students:\n") + user_message.append(final_format_instructions) + + user_message.append("# Here's the expected answer, which would have been correct:\n") + user_message.append(expected_answer) + + user_message.append("# Here is the students' answer, which was INCORRECT:\n") + user_message.append(final_response) + + user_message.append("# Please review the students' work which follows:\n") + user_message.append("**----- START OF STUDENTS' WORK -----**\n\n") + user_message.append(work_history) + user_message.append("\n**----- END OF STUDENTS' WORK -----**\n\n") + + user_message.append( + "# Now carefully review the students' work above, explaining in detail what the students did right and what they did wrong.\n") + + self.clear_history() + response1, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to learn from this failure") + + user_message = [ + "Now put yourself in the mind of the students. What misconception led them to their incorrect answer?"] + response2, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to state the misconception") + + user_message = ["Please express your key insights in the form of short, general advice that will be given to the students. Just one or two sentences, or they won't bother to read it."] + + insight, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to formulate a concise insight") + + return insight + + async def find_index_topics(self, input_string): + # Returns a list of topics related to the input string. + + sys_message = """You are an expert at semantic analysis.""" + + user_message = [] + user_message.append("""- My job is to create a thorough index for a book called Task Completion, and I need your help. +- Every paragraph in the book needs to be indexed by all the topics related to various kinds of tasks and strategies for completing them. +- Your job is to read the text below and extract the task-completion topics that are covered. +- The number of topics depends on the length and content of the text. But you should list at least one topic, and potentially many more. +- Each topic you list should be a meaningful phrase composed of a few words. Don't use whole sentences as topics. +- Don't include details that are unrelated to the general nature of the task, or a potential strategy for completing tasks. +- List each topic on a separate line, without any extra text like numbering, or bullets, or any other formatting, because we don't want those things in the index of the book.\n\n""") + + user_message.append("# Text to be indexed\n") + user_message.append(input_string) + + self.clear_history() + topics, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to extract topics") + + # Parse the topics into a python list. + topic_list = [] + for line in topics.split("\n"): + if (line is not None) and (len(line) > 0): + topic_list.append(line) + + return topic_list diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py new file mode 100644 index 000000000000..8dc976f07724 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py @@ -0,0 +1,58 @@ +from typing import Any, Dict, List, Union +from autogen_core.components import FunctionCall, Image +from autogen_core.components.models import FunctionExecutionResult, LLMMessage + +# Convenience type +UserContent = Union[str, List[Union[str, Image]]] +AssistantContent = Union[str, List[FunctionCall]] +FunctionExecutionContent = List[FunctionExecutionResult] +SystemContent = str + + +# Convert UserContent to a string +def message_content_to_str( + message_content: UserContent | AssistantContent | SystemContent | FunctionExecutionContent, +) -> str: + if message_content is None: + return "" + elif isinstance(message_content, str): + return message_content + elif isinstance(message_content, List): + converted: List[str] = list() + for item in message_content: + if isinstance(item, str): + converted.append(item) + elif isinstance(item, Image): + converted.append("") + else: + converted.append(str(item).rstrip()) + return "\n".join(converted) + else: + raise AssertionError("Unexpected response type.") + + +def text_from_user_content(user_content: UserContent) -> str: + if isinstance(user_content, str): + return user_content + elif isinstance(user_content, List): + text_list: List[str] = list() + for item in user_content: + if isinstance(item, str): + text_list.append(item.rstrip()) + return "\n\n".join(text_list) + else: + raise AssertionError("Unexpected response type.") + + +def single_image_from_user_content(user_content: UserContent) -> Union[Image, None]: + image_to_return = None + if isinstance(user_content, str): + return None + elif isinstance(user_content, List): + for item in user_content: + if isinstance(item, Image): + assert image_to_return is None, "Only one image is currently allowed in the user content." + image_to_return = item + else: + raise AssertionError("Unexpected response type.") + return image_to_return From f8584cddf866ebc179a7fa48953cca885f1abb90 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 2 Dec 2024 13:04:37 -0800 Subject: [PATCH 02/93] support for extensive evaluations --- .../samples/run_magentic_one_with_memory.py | 177 +++++++++++++----- .../agentic_memory/_agentic_memory.py | 162 +++++++++++----- .../autogen_ext/agentic_memory/_prompter.py | 7 +- 3 files changed, 249 insertions(+), 97 deletions(-) diff --git a/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py b/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py index 1c31d4365646..af5bbdee23a6 100644 --- a/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py +++ b/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py @@ -21,8 +21,57 @@ USE_AGENTIC_MEMORY = 1 # 1 = Assign task to AgenticMemory instead of directly to the completion agent -CREATE_NEW_MEMORIES = 1 # 1 = Let AgenticMemory try to create new memories -RESET_MEMORY = 1 # 1 = Reset the memory before starting each task + +PATH_TO_ARCHIVE_DIR = "~/agentic_memory_archive" + + +def define_tasks_with_answers(): + tasks_with_answers = [] + + # Task index 0 + tasks_with_answers.append({ + "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? + +The final line of your response must contain nothing but the answer as a number.""", + "expected_answer": "100"}) + + # Task index 1 + tasks_with_answers.append({ + "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. + +You travel to the village of \u0218irnea, and you begin your investigation. One night, just before dawn, you catch a glimpse of a man in a long black cape with red lining leaping from roof-top to roof-top with superhuman agility. It's a vampire! You try to chase the creature back to its home, but the creature is too fast. However, because of the remoteness of the village, you know with absolute certainty that the vampire must be a resident of the village. You decide that your best course of action will be to visit all 100 residents of the town during the day. You know something about vampires and humans that will make your investigation possible; humans always tell the truth, but vampires always lie. + +In the afternoon, you go from house to house, speaking with all 100 residents of \u0218irnea. You ask everyone the same question: \"How many vampires are living in \u0218irnea\". Everyone in the village gives the same response, \"At least one of us is a human.\"\n\nHow many residents of \u0218irnea have been turned into vampires? + +The final line of your response must contain nothing but the answer as a number.""", + "expected_answer": "100"}) + + # Task index 2 + tasks_with_answers.append({ + "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth? + +The final line of your response must contain nothing but the answer as a number.""", + "expected_answer": "3"}) + + return tasks_with_answers + + +def create_client(): + # Create an OpenAI client + model_name = "gpt-4o-2024-05-13" + temp = 0.1 + max_tokens = 4096 + client = OpenAIChatCompletionClient( + model=model_name, + api_key="", + temperature=temp, + max_tokens=max_tokens, + presence_penalty=0.0, + frequency_penalty=0.0, + top_p=1.0, + max_retries=65535, + ) + return client async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str, str]: @@ -66,12 +115,17 @@ async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str final_response = final_message_lines[-1] page_log.finish_page(page) - return final_response, work_history async def assign_task_to_client(task, client, page_log): - # The client is a ChatCompletionClient. Pass the task to it, and return the response. + page = page_log.begin_page( + summary="assign_task_to_client", + details='', + method_call="assign_task_to_client") + + page.add_lines(task) + system_message = SystemMessage(content="""You are a helpful and thoughtful assistant. In responding to every user message, you follow the same multi-step process given here: 1. Explain your understanding of the user message in detail, covering all the important points. @@ -97,61 +151,90 @@ async def assign_task_to_client(task, client, page_log): # The final line contains the answer. Extract it. answer = response_lines[-1] + page_log.finish_page(page) return answer, response.content -async def task_assignment_callback(task, client, page_log) -> str: - page = page_log.begin_page( - summary="task_assignment_callback", - details='', - method_call="task_assignment_callback") - - # Send the task to an agent, team or client. - # response, work_history = await assign_task_to_client(task.strip(), client, page_log) - response, work_history = await assign_task_to_magentic_one(task.strip(), client, page_log) - - page.update_details(" " + response) - page_log.finish_page(page) - return response, work_history +async def train(task_with_answer, max_train_trials, max_test_trials, task_assignment_callback, reset_memory, + client, page_log) -> None: + memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, path_to_archive_dir=PATH_TO_ARCHIVE_DIR) + await memory.train_on_task( + task=task_with_answer["task"], + expected_answer=task_with_answer["expected_answer"], + task_assignment_callback=task_assignment_callback, + final_format_instructions="", + max_train_trials=max_train_trials, + max_test_trials=max_test_trials) + + +async def test(task_with_answer, num_trials, task_assignment_callback, reset_memory, + client, page_log) -> Tuple[str, int, int]: + memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, path_to_archive_dir=PATH_TO_ARCHIVE_DIR) + response, num_successes, num_trials = await memory.test_on_task( + task=task_with_answer["task"], + expected_answer=task_with_answer["expected_answer"], + task_assignment_callback=task_assignment_callback, + num_trials=num_trials) + return response, num_successes, num_trials + + +async def train_and_test(task_index, max_train_trials, max_test_trials, task_assignment_callback, page_log): + tasklist = define_tasks_with_answers() + task_with_answer = tasklist[task_index] + + num_loops = 10 + total_num_successes = 0 + total_num_trials = 0 + for i in range(num_loops): + await train( + task_with_answer=task_with_answer, + max_train_trials=max_train_trials, + max_test_trials=max_test_trials, + task_assignment_callback=task_assignment_callback, + reset_memory=True, + client=create_client(), + page_log=page_log) + last_response, num_successes, num_trials = await test( + task_with_answer=task_with_answer, + num_trials=max_test_trials, + task_assignment_callback=task_assignment_callback, + reset_memory=False, + client=create_client(), + page_log=page_log) + print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) + total_num_successes += num_successes + total_num_trials += num_trials + return total_num_successes, total_num_trials + + +async def test_on_task_with_memory(task_index, task_assignment_callback, page_log, num_trials, reset_memory): + last_response, num_successes, num_trials = await test( + task_with_answer=define_tasks_with_answers()[task_index], + num_trials=num_trials, + task_assignment_callback=task_assignment_callback, + reset_memory=reset_memory, + client=create_client(), + page_log=page_log) + print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) async def main() -> None: - # Select the task - task = """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? The final line of your response must contain nothing but the answer as a number.""" - expected_answer = "100" - - # Create the OpenAI client - model_name = "gpt-4o-2024-05-13" - temp = 0.1 - max_tokens = 4096 - client = OpenAIChatCompletionClient( - model=model_name, - api_key="", - temperature=temp, - max_tokens=max_tokens, - presence_penalty=0.0, - frequency_penalty=0.0, - top_p=1.0, - max_retries=65535, - ) - - # Create the PageLog. - page_log = PageLog("~/pagelogs/", "m1") + # Create the PageLog. (This is optional) + page_log = PageLog("~/pagelogs/", "code_sample") page = page_log.begin_page( summary="main", details='', method_call="main") - page_log.append_entry_line(f"Using {model_name} on OAI, temp={temp}, max_tokens={max_tokens}") - if USE_AGENTIC_MEMORY: - memory = AgenticMemory(reset=RESET_MEMORY, client=client, page_log=page_log, path_to_archive_dir="~/agentic_memory_archive") - prepared_response = await memory.assign_task( - task, expected_answer, CREATE_NEW_MEMORIES, task_assignment_callback, final_format_instructions="") - else: - prepared_response, _ = await task_assignment_callback(task, client, page_log) + task_index = 1 + task_assignment_callback = assign_task_to_magentic_one # assign_task_to_client or assign_task_to_magentic_one + + # await test_on_task_with_memory(task_index, task_assignment_callback, page_log, num_trials=3, reset_memory=True) + + num_successes, num_trials = await train_and_test(task_index, 10, 3, task_assignment_callback, page_log) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) - print("FINAL RESPONSE AFTER LEARNING: ", prepared_response) - page.add_lines(prepared_response) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index c4ecbfd2a884..44417a055460 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -1,4 +1,4 @@ -from typing import Callable +from typing import Callable, List from ._prompter import Prompter from ._knowledge_archive import KnowledgeArchive @@ -11,42 +11,70 @@ def __init__(self, reset, client, page_log, path_to_archive_dir): self.archive = KnowledgeArchive(verbosity=0, reset=reset, path_to_archive_dir=path_to_archive_dir, page_log=page_log) - async def assign_task(self, task: str, expected_answer: str, create_new_memories: bool, assign_task_to_completer: Callable, - final_format_instructions: str): + async def train_on_task(self, + task: str, # The task to be completed. + expected_answer: str, # The expected answer to the task. + task_assignment_callback: Callable, # The function through which to assign the task. + final_format_instructions: str, # Instructions for formatting the final response, if any. + max_train_trials: int, # The maximum number of training trials to attempt. + max_test_trials: int, # The number of successful test trials to qualify as success. + ): """ - Assigns a task to the completion agent, using any relevant memories, and tries to learn from failures. - If the first attempt succeeds, that response is returned without attempting to learn. + Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ page = self.page_log.begin_page( - summary="AgenticMemory.assign_task", + summary="AgenticMemory.train_on_task", details="", - method_call="AgenticMemory.assign_task") - - if create_new_memories: - page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) - # Attempt to create useful new memories. - _, insight = await self.iterate_on_task(task, expected_answer, assign_task_to_completer, final_format_instructions) - if insight is not None: - page.add_lines("A new insight was created:\n{}".format(insight), flush=True) - # Add this insight to memory. - await self.add_insight_to_memory(task, insight) - - # Retrieve insights from memory. - filtered_insights = await self.retrieve_relevant_insights(task) - - # Try to solve the task, looking up relevant memories from the DB. - page.add_lines("Try to solve the task, using any insights from the db.\n", flush=True) - if len(filtered_insights) > 0: - page.add_lines("Relevant insights were found in memory.\n", flush=True) - memory_section = self.format_memory_section(filtered_insights) - task = task + '\n\n' + memory_section - - # Attempt to solve the task. - final_response, _ = await assign_task_to_completer(task, self.client, self.page_log) + method_call="AgenticMemory.train_on_task") + + # Attempt to create useful new memories. + page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) + _, insight = await self._iterate_on_task(task, expected_answer, task_assignment_callback, + final_format_instructions, max_train_trials, max_test_trials) + if insight is not None: + page.add_lines("A new insight was created:\n{}".format(insight), flush=True) + # Add this insight to memory. + await self.add_insight_to_memory(task, insight) - page.add_lines("\n{}\n".format(final_response), flush=True) self.page_log.finish_page(page) - return final_response + + async def test_on_task(self, task: str, expected_answer: str, task_assignment_callback: Callable, num_trials=1): + """ + Assigns a task to the completion agent, along with any relevant insights/memories. + """ + page = self.page_log.begin_page( + summary="AgenticMemory.test_on_task", + details="", + method_call="AgenticMemory.test_on_task") + + response = None + num_successes = 0 + + for trial in range(num_trials): + page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) + + # Try to retrieve any relevant memories from the DB. + filtered_insights = await self.retrieve_relevant_insights(task) + if len(filtered_insights) > 0: + page.add_lines("Relevant insights were retrieved from memory.\n", flush=True) + memory_section = self.format_memory_section(filtered_insights) + task = task + '\n\n' + memory_section + + # Attempt to solve the task. + page.add_lines("Try to solve the task.\n", flush=True) + response, _ = await task_assignment_callback(task, self.client, self.page_log) + + response_is_correct = (response.lower() == expected_answer.lower()) + if response_is_correct: + num_successes += 1 + + page.add_lines("Response: {}\n".format(response), flush=True) + + # Calculate the success rate as a percentage, rounded to the nearest whole number. + page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) + + self.page_log.finish_page(page) + return response, num_successes, num_trials async def add_insight_to_memory(self, task: str, insight: str): # Adds an insight to the DB. @@ -101,11 +129,45 @@ def format_memory_section(self, memories): memory_section += ('- ' + mem + '\n') return memory_section - async def iterate_on_task(self, task: str, expected_answer: str, assign_task_to_completer: Callable, final_format_instructions: str): + async def _test_for_failure(self, task_plus_insights: str, expected_answer: str, assign_task_to_completer: Callable, + num_trials: int): + """ + Attempts to solve the given task multiple times to find a failure case to learn from. + """ page = self.page_log.begin_page( - summary="AgenticMemory.iterate_on_task", + summary="AgenticMemory._test_for_failure", details="", - method_call="AgenticMemory.iterate_on_task") + method_call="AgenticMemory._test_for_failure") + + page.add_lines("\nTask description, including any insights: {}".format(task_plus_insights)) + page.add_lines("\nExpected answer: {}\n".format(expected_answer)) + + failure_found = False + response, work_history = None, None + + for trial in range(num_trials): + page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) + + # Attempt to solve the task. + page.add_lines("Try to solve the task.\n", flush=True) + response, work_history = await assign_task_to_completer(task_plus_insights, self.client, self.page_log) + page.add_lines("Response: {}\n".format(response), flush=True) + + response_is_correct = (response.lower() == expected_answer.lower()) + if not response_is_correct: + page.add_lines("\nResponse is INCORRECT. Return the details.\n", flush=True) + failure_found = True + break + + self.page_log.finish_page(page) + return failure_found, response, work_history + + async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to_completer: Callable, + final_format_instructions: str, max_train_trials: int, max_test_trials: int): + page = self.page_log.begin_page( + summary="AgenticMemory._iterate_on_task", + details="", + method_call="AgenticMemory._iterate_on_task") page.add_lines("\nTask description: {}".format(task)) page.add_lines("\nExpected answer: {}\n".format(expected_answer)) @@ -113,26 +175,27 @@ async def iterate_on_task(self, task: str, expected_answer: str, assign_task_to_ final_response = None old_insights = await self.retrieve_relevant_insights(task) new_insights = [] + last_insight = None insight = None successful_insight = None # Loop until success (or timeout) while learning from failures. - max_trials = 4 - for trial in range(1, max_trials + 1): - page.add_lines("----- TRIAL {} -----\n".format(trial), flush=True) + for trial in range(1, max_train_trials + 1): + page.add_lines("----- TRAIN TRIAL {} -----\n".format(trial), flush=True) # Add any new insights we've accumulated so far. - memory_section = self.format_memory_section(old_insights + new_insights) + # memory_section = self.format_memory_section(old_insights + new_insights) + if last_insight is not None: + memory_section = self.format_memory_section(old_insights + [last_insight]) + else: + memory_section = self.format_memory_section(old_insights) task_plus_insights = task + '\n\n' + memory_section - # Attempt to solve the task. - response, work_history = await assign_task_to_completer(task_plus_insights, self.client, self.page_log) - - # Is this answer correct? - page.add_lines("\nResponse: {}".format(response), flush=True) - response_is_correct = (response.lower() == expected_answer.lower()) - if response_is_correct: - # Yes. Time to exit the loop. + # Can we find a failure case to learn from? + failure_found, response, work_history = await self._test_for_failure( + task_plus_insights, expected_answer, assign_task_to_completer, max_test_trials) + if not failure_found: + # No. Time to exit the loop. page.add_lines("\nResponse is CORRECT. No learning needed.\n", flush=True) # Was this the first trial? if trial == 1: @@ -144,17 +207,18 @@ async def iterate_on_task(self, task: str, expected_answer: str, assign_task_to_ break # Will we try again? - if trial == max_trials: - # No. We're out of tries. + if trial == max_train_trials: + # No. We're out of training trials. page.add_lines("\nNo more trials will be attempted.\n", flush=True) break # Try to learn from this failure. page.add_lines("\nResponse is INCORRECT. Try to learn from this failure.\n", flush=True) insight = await self.prompter.learn_from_failure( - task, memory_section, response, expected_answer, work_history, final_format_instructions) + task, memory_section, response, expected_answer, work_history, final_format_instructions, new_insights) page.add_lines("\nInsight: {}\n".format(insight), flush=True) new_insights.append(insight) + last_insight = insight # Return the answer from the last loop. page.add_lines("\n{}\n".format(final_response), flush=True) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index ba23d7b4d1f5..882480165ae2 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -93,7 +93,7 @@ def clear_history(self): self._chat_history = [] async def learn_from_failure(self, task_description, memory_section, final_response, expected_answer, - work_history, final_format_instructions): + work_history, final_format_instructions, insights): # Try to create an insight to help avoid this failure in the future. sys_message = """- You are a patient and thorough teacher. @@ -138,6 +138,11 @@ async def learn_from_failure(self, task_description, memory_section, final_respo details="to state the misconception") user_message = ["Please express your key insights in the form of short, general advice that will be given to the students. Just one or two sentences, or they won't bother to read it."] + # if len(insights) > 0: + # memory_section = "\n## The following insights and advice were given to the students previously, but they didn't help. So do not repeat any of the following:\n" + # for insight in insights: + # memory_section += ('- ' + insight + '\n') + # user_message.append(memory_section) insight, page = await self.call_model( system_message=sys_message, From 607e7ff1cc9da023ef6dc45ecfceaab6cc775d9b Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 3 Dec 2024 17:34:10 -0800 Subject: [PATCH 03/93] Enhance retrieval with task generalization and insight validation --- .../agentic_memory/_agentic_memory.py | 21 +++++-- .../agentic_memory/_knowledge_archive.py | 14 ++--- .../autogen_ext/agentic_memory/_prompter.py | 62 +++++++++++++++++++ 3 files changed, 84 insertions(+), 13 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 44417a055460..d111cd6a47c9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -83,15 +83,18 @@ async def add_insight_to_memory(self, task: str, insight: str): details="", method_call="AgenticMemory.add_insight_to_memory") + # Generalize the task. + generalized_task = await self.prompter.generalize_task(task) + # Get a combined list of topics from the task and insight. - task_plus_insight = task.strip() + "\n(Hint: " + insight + ")" + task_plus_insight = generalized_task.strip() + "\n(Hint: " + insight + ")" topics = await self.prompter.find_index_topics(task_plus_insight) page.add_lines("\nTOPICS EXTRACTED FROM TASK AND INSIGHT:") page.add_lines("\n".join(topics)) page.add_lines("") # Add the insight to the archive. - self.archive.add_insight(insight, task, topics) + self.archive.add_insight(insight, generalized_task, topics) self.page_log.finish_page(page) @@ -102,8 +105,11 @@ async def retrieve_relevant_insights(self, task: str): details="", method_call="AgenticMemory.retrieve_relevant_insights") + # Generalize the task. + generalized_task = await self.prompter.generalize_task(task) + # Get a list of topics from the task. - topics = await self.prompter.find_index_topics(task) + topics = await self.prompter.find_index_topics(generalized_task) page.add_lines("\nTOPICS EXTRACTED FROM TASK:") page.add_lines("\n".join(topics)) page.add_lines("") @@ -114,10 +120,14 @@ async def retrieve_relevant_insights(self, task: str): page.add_lines("\nUNFILTERED INSIGHTS") for insight, relevance in unfiltered_insights.items(): page.add_lines(" INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) - if relevance > 5.0: - filtered_insights.append(insight) + filtered_insights.append(insight) page.add_lines("\nFiltered to top {} insights".format(len(filtered_insights))) + if len(filtered_insights) > 0: + # Apply a final filtering stage to keep only the insights that the LLM believes are relevant. + filtered_insights = await self.prompter.validate_insights(filtered_insights, task) + page.add_lines("\n{} insights were validated".format(len(filtered_insights))) + self.page_log.finish_page(page) return filtered_insights @@ -184,7 +194,6 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to page.add_lines("----- TRAIN TRIAL {} -----\n".format(trial), flush=True) # Add any new insights we've accumulated so far. - # memory_section = self.format_memory_section(old_insights + new_insights) if last_insight is not None: memory_section = self.format_memory_section(old_insights + [last_insight]) else: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py index 9b89249ff8e4..d85607bd001c 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py @@ -50,6 +50,13 @@ def __init__( self.last_insight_id = len(self.uid_insight_dict) parent_page.add_lines("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) + def save_archive(self): + self.memo_store.save_memos() + parent_page = self.page_log.last_page() + parent_page.add_lines("\nSAVING INSIGHTS TO DISK {}".format(self.path_to_dict)) + with open(self.path_to_dict, "wb") as file: + pickle.dump(self.uid_insight_dict, file) + def add_insight(self, insight_str: str, task_str: Optional[str] = None, topics: Optional[List[str]] = None): """Adds an insight to the knowledge archive.""" assert topics is not None, "For now, the topics list must be provided." @@ -62,13 +69,6 @@ def add_insight(self, insight_str: str, task_str: Optional[str] = None, topics: self.uid_insight_dict[str(id_str)] = insight self.save_archive() - def save_archive(self): - self.memo_store.save_memos() - parent_page = self.page_log.last_page() - parent_page.add_lines("\nSAVING INSIGHTS TO DISK {}".format(self.path_to_dict)) - with open(self.path_to_dict, "wb") as file: - pickle.dump(self.uid_insight_dict, file) - def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional[List[str]] = None): """Returns any insights from the knowledge archive that are relevant to the given task or topics.""" assert (task_str is not None) or (topics is not None), "Either the task string or the topics list must be provided." diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index 882480165ae2..495e212550a2 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -181,3 +181,65 @@ async def find_index_topics(self, input_string): topic_list.append(line) return topic_list + + async def generalize_task(self, task_description): + # Returns a list of topics related to the input string. + + sys_message = """You are a helpful and thoughtful assistant.""" + + user_message = ["We have been given a task description. Our job is not to complete the task, but merely rephrase the task in simpler, more general terms, if possible. Please reach through the following task description, then explain your understanding of the task in detail, as a single, flat list of all the important points."] + user_message.append("\n# Task description") + user_message.append(task_description) + + self.clear_history() + response1, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to rephrase the task in a list of important points") + + user_message = ["Do you see any parts of this list that are irrelevant to actually solving the task? If so, explain which items are irrelevant."] + response2, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to identify irrelevant points") + + user_message = ["Revise your original list to include only the most general terms, those that are critical to solving the task, removing any themes or descriptions that are not essential to the solution. Your final list may be shorter, but do not leave out any part of the task that is needed for solving the task. Do not add any additional commentary either before or after the list."] + generalized_task, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to make a final list of general terms") + + return generalized_task + + async def validate_insights(self, insights, task_description): + # Returns only the insights that the client verifies are relevant to the task. + + sys_message = """You are a helpful and thoughtful assistant.""" + + user_message = ["""We have been given a list of insights that may or may not be useful for solving the given task. +- First review the following task. +- Then review the list of insights that follow, and discuss which ones could be useful in solving the given task. +- Do not attempt to actually solve the task. That will come later."""] + user_message.append("\n# Task description") + user_message.append(task_description) + user_message.append("\n# Possibly useful insights") + user_message.extend(insights) + self.clear_history() + response1, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to review the task and insights") + + user_message = ["""Now output a verbatim copy the insights that you decided are relevant to the task. +- The original list of insights is provided below for reference. +- If an insight is not relevant to the task, simply omit it from your response. +- Do not add any additional commentary either before or after the relevant tasks. +- If none of the tasks are relevant, simply write "None"."""] + user_message.append("\n# Original list of possibly useful insights") + user_message.extend(insights) + validated_insights, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to list the relevant insights") + + return [validated_insights] if validated_insights != "None" else [] From b0456362b7c0547c6b91caaa4f68e0636c45b680 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 9 Dec 2024 15:33:39 -0800 Subject: [PATCH 04/93] Support TRAPI client. Make memory optional. Filter out insights with negative scores. --- .../samples/run_magentic_one_with_memory.py | 140 +++++++++++++++--- .../agentic_memory/_knowledge_archive.py | 9 +- 2 files changed, 129 insertions(+), 20 deletions(-) diff --git a/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py b/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py index af5bbdee23a6..c2c48e5bdeff 100644 --- a/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py +++ b/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py @@ -1,5 +1,7 @@ import asyncio from autogen_ext.models import OpenAIChatCompletionClient +from autogen_ext.models import AzureOpenAIChatCompletionClient +from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.teams import MagenticOneGroupChat from autogen_ext.agents import MultimodalWebSurfer @@ -19,9 +21,6 @@ ) from autogen_ext.agentic_memory import AgenticMemory, PageLog - -USE_AGENTIC_MEMORY = 1 # 1 = Assign task to AgenticMemory instead of directly to the completion agent - PATH_TO_ARCHIVE_DIR = "~/agentic_memory_archive" @@ -56,7 +55,7 @@ def define_tasks_with_answers(): return tasks_with_answers -def create_client(): +def create_oai_client(): # Create an OpenAI client model_name = "gpt-4o-2024-05-13" temp = 0.1 @@ -74,6 +73,65 @@ def create_client(): return client +def create_aoai_client(): + # Create the token provider + token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") + + # Create an OpenAI client + azure_deployment = "gpt-4o-2024-08-06-eval" + model = "gpt-4o-2024-08-06" + azure_endpoint = "https://agentic2.openai.azure.com/" + client = AzureOpenAIChatCompletionClient( + azure_endpoint=azure_endpoint, + azure_ad_token_provider=token_provider, + azure_deployment=azure_deployment, + api_version="2024-06-01", + model=model, + ) + return client + + +def create_trapi_client(): + # Create the token provider + token_provider = get_bearer_token_provider(ChainedTokenCredential( + AzureCliCredential(), + DefaultAzureCredential( + exclude_cli_credential=True, + # Exclude other credentials we are not interested in. + exclude_environment_credential=True, + exclude_shared_token_cache_credential=True, + exclude_developer_cli_credential=True, + exclude_powershell_credential=True, + exclude_interactive_browser_credential=True, + exclude_visual_studio_code_credentials=True, + # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs + ) + ), "api://trapi/.default") + + model = "gpt-4o-2024-08-06" # This is (for instance) the OpenAI model name, which is used to look up capabilities. + azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models + trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models + endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' + api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release + + client = AzureOpenAIChatCompletionClient( + azure_ad_token_provider=token_provider, + model=model, + azure_deployment=azure_deployment, + azure_endpoint=endpoint, + api_version=api_version, + ) + + return client + + +def create_client(): + # Choose one. + return create_oai_client() + # return create_aoai_client() + # return create_trapi_client() + + async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str, str]: page = page_log.begin_page( summary="assign_task_to_magentic_one", @@ -167,14 +225,33 @@ async def train(task_with_answer, max_train_trials, max_test_trials, task_assign max_test_trials=max_test_trials) -async def test(task_with_answer, num_trials, task_assignment_callback, reset_memory, +async def test(task_with_answer, num_trials, task_assignment_callback, use_memory, reset_memory, client, page_log) -> Tuple[str, int, int]: - memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, path_to_archive_dir=PATH_TO_ARCHIVE_DIR) - response, num_successes, num_trials = await memory.test_on_task( - task=task_with_answer["task"], - expected_answer=task_with_answer["expected_answer"], - task_assignment_callback=task_assignment_callback, - num_trials=num_trials) + if use_memory: + memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, path_to_archive_dir=PATH_TO_ARCHIVE_DIR) + response, num_successes, num_trials = await memory.test_on_task( + task=task_with_answer["task"], + expected_answer=task_with_answer["expected_answer"], + task_assignment_callback=task_assignment_callback, + num_trials=num_trials) + else: + page = page_log.begin_page( + summary="test without memory", + details="", + method_call="test without memory") + response = None + num_successes = 0 + for trial in range(num_trials): + page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) + page.add_lines("Try to solve the task.\n", flush=True) + response, _ = await task_assignment_callback(task_with_answer["task"], client, page_log) + response_is_correct = (response.lower() == task_with_answer["expected_answer"].lower()) + if response_is_correct: + num_successes += 1 + page.add_lines("Response: {}\n".format(response), flush=True) + page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) + page_log.finish_page(page) + return response, num_successes, num_trials @@ -182,7 +259,7 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass tasklist = define_tasks_with_answers() task_with_answer = tasklist[task_index] - num_loops = 10 + num_loops = 1 # Normally 10 total_num_successes = 0 total_num_trials = 0 for i in range(num_loops): @@ -198,6 +275,7 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass task_with_answer=task_with_answer, num_trials=max_test_trials, task_assignment_callback=task_assignment_callback, + use_memory=True, reset_memory=False, client=create_client(), page_log=page_log) @@ -212,28 +290,54 @@ async def test_on_task_with_memory(task_index, task_assignment_callback, page_lo task_with_answer=define_tasks_with_answers()[task_index], num_trials=num_trials, task_assignment_callback=task_assignment_callback, + use_memory=True, reset_memory=reset_memory, client=create_client(), page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) +async def test_on_task(task_index, task_assignment_callback, page_log, num_trials): + last_response, num_successes, num_trials = await test( + task_with_answer=define_tasks_with_answers()[task_index], + num_trials=num_trials, + task_assignment_callback=task_assignment_callback, + use_memory=False, + reset_memory=False, + client=create_client(), + page_log=page_log) + print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) + + async def main() -> None: # Create the PageLog. (This is optional) - page_log = PageLog("~/pagelogs/", "code_sample") + page_log = PageLog("~/pagelogs/", "stress_test") page = page_log.begin_page( summary="main", details='', method_call="main") - task_index = 1 - task_assignment_callback = assign_task_to_magentic_one # assign_task_to_client or assign_task_to_magentic_one + # Choose the task from those listed at the top. + task_index = 0 + + # Choose the client, agent or team to assign the task to. + task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one + + # Test, without using memory. + await test_on_task(task_index, task_assignment_callback, page_log, 1) + # Test, using memory. # await test_on_task_with_memory(task_index, task_assignment_callback, page_log, num_trials=3, reset_memory=True) - num_successes, num_trials = await train_and_test(task_index, 10, 3, task_assignment_callback, page_log) - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) + # Train and test, using memory. + # num_successes, num_trials = await train_and_test( + # task_index, + # 1, # Normally 10 + # 1, # Normally 3 + # task_assignment_callback, + # page_log) + # success_rate = round((num_successes / num_trials) * 100) + # page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py index d85607bd001c..28b557020590 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py @@ -74,9 +74,9 @@ def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional assert (task_str is not None) or (topics is not None), "Either the task string or the topics list must be provided." assert topics is not None, "For now, the topics list is always required, because it won't be generated." - # Maintain a dict of insight-relevance pairs. + # Build a dict of insight-relevance pairs. insight_relevance_dict = {} - relevance_conversion_threshold = 1.7 + relevance_conversion_threshold = 1.7 # The approximate borderline between relevant and irrelevant topic matches. # Process the matching topics. matches = [] # Each match is a tuple: (topic, insight, distance) @@ -91,4 +91,9 @@ def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional else: insight_relevance_dict[insight_str] = relevance + # Filter out insights with overall relevance below zero. + for insight in list(insight_relevance_dict.keys()): + if insight_relevance_dict[insight] < 0: + del insight_relevance_dict[insight] + return insight_relevance_dict From 63b28d73f7410f9a918fd33a0fd0febc286c45a9 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 23 Dec 2024 17:07:59 -0800 Subject: [PATCH 05/93] Restoring earlier results, and general cleanup. --- ...e_with_memory.py => learning_to_reason.py} | 94 ++++++++++--------- 1 file changed, 52 insertions(+), 42 deletions(-) rename python/packages/autogen-ext/samples/{run_magentic_one_with_memory.py => learning_to_reason.py} (89%) diff --git a/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py b/python/packages/autogen-ext/samples/learning_to_reason.py similarity index 89% rename from python/packages/autogen-ext/samples/run_magentic_one_with_memory.py rename to python/packages/autogen-ext/samples/learning_to_reason.py index c2c48e5bdeff..8b2d006bec33 100644 --- a/python/packages/autogen-ext/samples/run_magentic_one_with_memory.py +++ b/python/packages/autogen-ext/samples/learning_to_reason.py @@ -55,44 +55,62 @@ def define_tasks_with_answers(): return tasks_with_answers +# Default client parameters +TEMPERATURE = 0.1 +MAX_TOKENS = 4096 +PRESENCE_PENALTY = 0.0 +FREQUENCY_PENALTY = 0.0 +TOP_P = 1.0 +MAX_RETRIES = 65535 + + +def create_client(): + # Choose one. + # return create_oai_client() + # return create_aoai_client() + return create_trapi_client() + + def create_oai_client(): # Create an OpenAI client model_name = "gpt-4o-2024-05-13" - temp = 0.1 - max_tokens = 4096 client = OpenAIChatCompletionClient( model=model_name, api_key="", - temperature=temp, - max_tokens=max_tokens, - presence_penalty=0.0, - frequency_penalty=0.0, - top_p=1.0, - max_retries=65535, + temperature=TEMPERATURE, + max_tokens=MAX_TOKENS, + presence_penalty=PRESENCE_PENALTY, + frequency_penalty=FREQUENCY_PENALTY, + top_p=TOP_P, + max_retries=MAX_RETRIES, ) return client def create_aoai_client(): - # Create the token provider + # Create an Azure OpenAI client token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") - - # Create an OpenAI client - azure_deployment = "gpt-4o-2024-08-06-eval" - model = "gpt-4o-2024-08-06" - azure_endpoint = "https://agentic2.openai.azure.com/" + azure_deployment = "gpt-4o-2024-05-13-eval" + model = "gpt-4o-2024-05-13" + azure_endpoint = "https://agentic1.openai.azure.com/" client = AzureOpenAIChatCompletionClient( azure_endpoint=azure_endpoint, azure_ad_token_provider=token_provider, azure_deployment=azure_deployment, api_version="2024-06-01", model=model, + temperature=TEMPERATURE, + max_tokens=MAX_TOKENS, + presence_penalty=PRESENCE_PENALTY, + frequency_penalty=FREQUENCY_PENALTY, + top_p=TOP_P, + max_retries=MAX_RETRIES, ) return client def create_trapi_client(): - # Create the token provider + # Create an Azure OpenAI client through TRAPI token_provider = get_bearer_token_provider(ChainedTokenCredential( AzureCliCredential(), DefaultAzureCredential( @@ -107,31 +125,27 @@ def create_trapi_client(): # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs ) ), "api://trapi/.default") - - model = "gpt-4o-2024-08-06" # This is (for instance) the OpenAI model name, which is used to look up capabilities. - azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models + model = "gpt-4o-2024-05-13" # This is (for instance) the OpenAI model name, which is used to look up capabilities. + azure_deployment = 'gpt-4o_2024-05-13' # This is DeploymentName in the table at https://aka.ms/trapi/models trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - client = AzureOpenAIChatCompletionClient( azure_ad_token_provider=token_provider, model=model, azure_deployment=azure_deployment, azure_endpoint=endpoint, api_version=api_version, + temperature=TEMPERATURE, + max_tokens=MAX_TOKENS, + presence_penalty=PRESENCE_PENALTY, + frequency_penalty=FREQUENCY_PENALTY, + top_p=TOP_P, + max_retries=MAX_RETRIES, ) - return client -def create_client(): - # Choose one. - return create_oai_client() - # return create_aoai_client() - # return create_trapi_client() - - async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str, str]: page = page_log.begin_page( summary="assign_task_to_magentic_one", @@ -259,7 +273,7 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass tasklist = define_tasks_with_answers() task_with_answer = tasklist[task_index] - num_loops = 1 # Normally 10 + num_loops = 10 # Normally 10 total_num_successes = 0 total_num_trials = 0 for i in range(num_loops): @@ -311,7 +325,7 @@ async def test_on_task(task_index, task_assignment_callback, page_log, num_trial async def main() -> None: # Create the PageLog. (This is optional) - page_log = PageLog("~/pagelogs/", "stress_test") + page_log = PageLog("~/pagelogs/", "repro-9") page = page_log.begin_page( summary="main", details='', @@ -324,27 +338,23 @@ async def main() -> None: task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one # Test, without using memory. - await test_on_task(task_index, task_assignment_callback, page_log, 1) + # await test_on_task(task_index, task_assignment_callback, page_log, 1) # Test, using memory. # await test_on_task_with_memory(task_index, task_assignment_callback, page_log, num_trials=3, reset_memory=True) # Train and test, using memory. - # num_successes, num_trials = await train_and_test( - # task_index, - # 1, # Normally 10 - # 1, # Normally 3 - # task_assignment_callback, - # page_log) - # success_rate = round((num_successes / num_trials) * 100) - # page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) + num_successes, num_trials = await train_and_test( + task_index, + 10, # Normally 10 + 3, # Normally 3 + task_assignment_callback, + page_log) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) if __name__ == "__main__": - # logger = logging.getLogger(EVENT_LOGGER_NAME) - # logger.setLevel(logging.INFO) - # log_handler = LogHandler() - # logger.handlers = [log_handler] asyncio.run(main()) From 9dfb0742a415bc4dc814fbdd558c1f70a66bcdf3 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 24 Dec 2024 15:40:35 -0800 Subject: [PATCH 06/93] Modify imports after merge from main. --- .../autogen-ext/samples/learning_to_reason.py | 15 +++++++++------ .../src/autogen_ext/agentic_memory/_page_log.py | 4 ++-- .../src/autogen_ext/agentic_memory/_prompter.py | 4 ++-- .../src/autogen_ext/agentic_memory/_utils.py | 4 ++-- 4 files changed, 15 insertions(+), 12 deletions(-) diff --git a/python/packages/autogen-ext/samples/learning_to_reason.py b/python/packages/autogen-ext/samples/learning_to_reason.py index 8b2d006bec33..e65b7a67ecc8 100644 --- a/python/packages/autogen-ext/samples/learning_to_reason.py +++ b/python/packages/autogen-ext/samples/learning_to_reason.py @@ -1,13 +1,13 @@ import asyncio -from autogen_ext.models import OpenAIChatCompletionClient -from autogen_ext.models import AzureOpenAIChatCompletionClient +from autogen_ext.models.openai import OpenAIChatCompletionClient +from autogen_ext.models.openai import AzureOpenAIChatCompletionClient from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.teams import MagenticOneGroupChat -from autogen_ext.agents import MultimodalWebSurfer +from autogen_ext.agents.web_surfer import MultimodalWebSurfer from autogen_ext.agents.web_surfer._utils import message_content_to_str from autogen_agentchat.task import Console -from autogen_core.components.models import ( +from autogen_core.models import ( AssistantMessage, ChatCompletionClient, FunctionExecutionResult, @@ -93,6 +93,9 @@ def create_aoai_client(): azure_deployment = "gpt-4o-2024-05-13-eval" model = "gpt-4o-2024-05-13" azure_endpoint = "https://agentic1.openai.azure.com/" + # azure_deployment = "gpt-4o-2024-08-06-eval" + # model = "gpt-4o-2024-08-06" + # azure_endpoint = "https://agentic2.openai.azure.com/" client = AzureOpenAIChatCompletionClient( azure_endpoint=azure_endpoint, azure_ad_token_provider=token_provider, @@ -206,7 +209,7 @@ async def assign_task_to_client(task, client, page_log): 4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. 5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. 6. Finish by providing your final response in the particular format requested by the user.""") - user_message = UserMessage(content=task, source="human") + user_message = UserMessage(content=task, source="User") input_messages = [system_message] + [user_message] response = await client.create(input_messages) @@ -325,7 +328,7 @@ async def test_on_task(task_index, task_assignment_callback, page_log, num_trial async def main() -> None: # Create the PageLog. (This is optional) - page_log = PageLog("~/pagelogs/", "repro-9") + page_log = PageLog("~/pagelogs/", "repro-14") page = page_log.begin_page( summary="main", details='', diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py index e87a9b0c38c6..d78f012caee1 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py @@ -3,8 +3,8 @@ import time from typing import List -from autogen_core.components import Image -from autogen_core.components.models import ( +from autogen_core import Image +from autogen_core.models import ( AssistantMessage, ChatCompletionClient, LLMMessage, diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index 495e212550a2..5d3ff179cc1e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -1,7 +1,7 @@ import time from typing import List -from autogen_core.components.models import ( +from autogen_core.models import ( AssistantMessage, LLMMessage, SystemMessage, @@ -9,7 +9,7 @@ CreateResult, ) -from autogen_core.components import FunctionCall, Image +from autogen_core import FunctionCall, Image from ._utils import message_content_to_str, UserContent, text_from_user_content, single_image_from_user_content diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py index 8dc976f07724..7073f0d9c079 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py @@ -1,6 +1,6 @@ from typing import Any, Dict, List, Union -from autogen_core.components import FunctionCall, Image -from autogen_core.components.models import FunctionExecutionResult, LLMMessage +from autogen_core import FunctionCall, Image +from autogen_core.models import FunctionExecutionResult, LLMMessage # Convenience type UserContent = Union[str, List[Union[str, Image]]] From 93a5ca49f7b2226627e5c73d493fbdccc7514457 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 25 Dec 2024 21:05:35 -0800 Subject: [PATCH 07/93] Log model and token counts. --- .../autogen-ext/samples/learning_to_reason.py | 122 ++++++++++++++---- .../agentic_memory/_agentic_memory.py | 8 +- .../autogen_ext/agentic_memory/_page_log.py | 4 + .../autogen_ext/agentic_memory/_prompter.py | 3 +- 4 files changed, 106 insertions(+), 31 deletions(-) diff --git a/python/packages/autogen-ext/samples/learning_to_reason.py b/python/packages/autogen-ext/samples/learning_to_reason.py index e65b7a67ecc8..7502c84de309 100644 --- a/python/packages/autogen-ext/samples/learning_to_reason.py +++ b/python/packages/autogen-ext/samples/learning_to_reason.py @@ -21,36 +21,64 @@ ) from autogen_ext.agentic_memory import AgenticMemory, PageLog +GRADER = 0 + PATH_TO_ARCHIVE_DIR = "~/agentic_memory_archive" def define_tasks_with_answers(): tasks_with_answers = [] - # Task index 0 - tasks_with_answers.append({ - "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? + if GRADER: + # Task index 0 + tasks_with_answers.append({ + "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? + +Be sure to consider the cases where all are lying or all are telling the truth.""", + "expected_answer": "100"}) + + # Task index 1 + tasks_with_answers.append({ + "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. + +You travel to the village of \u0218irnea, and you begin your investigation. One night, just before dawn, you catch a glimpse of a man in a long black cape with red lining leaping from roof-top to roof-top with superhuman agility. It's a vampire! You try to chase the creature back to its home, but the creature is too fast. However, because of the remoteness of the village, you know with absolute certainty that the vampire must be a resident of the village. You decide that your best course of action will be to visit all 100 residents of the town during the day. You know something about vampires and humans that will make your investigation possible; humans always tell the truth, but vampires always lie. + +In the afternoon, you go from house to house, speaking with all 100 residents of \u0218irnea. You ask everyone the same question: \"How many vampires are living in \u0218irnea\". Everyone in the village gives the same response, \"At least one of us is a human.\"\n\nHow many residents of \u0218irnea have been turned into vampires? + +Be sure to consider the cases where all are lying or all are telling the truth.""", + "expected_answer": "100"}) + + # Task index 2 + tasks_with_answers.append({ + "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth? + +Be sure to consider the cases where all are lying or all are telling the truth.""", + "expected_answer": "3"}) + else: + # Task index 0 + tasks_with_answers.append({ + "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? The final line of your response must contain nothing but the answer as a number.""", - "expected_answer": "100"}) + "expected_answer": "100"}) - # Task index 1 - tasks_with_answers.append({ - "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. + # Task index 1 + tasks_with_answers.append({ + "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. You travel to the village of \u0218irnea, and you begin your investigation. One night, just before dawn, you catch a glimpse of a man in a long black cape with red lining leaping from roof-top to roof-top with superhuman agility. It's a vampire! You try to chase the creature back to its home, but the creature is too fast. However, because of the remoteness of the village, you know with absolute certainty that the vampire must be a resident of the village. You decide that your best course of action will be to visit all 100 residents of the town during the day. You know something about vampires and humans that will make your investigation possible; humans always tell the truth, but vampires always lie. In the afternoon, you go from house to house, speaking with all 100 residents of \u0218irnea. You ask everyone the same question: \"How many vampires are living in \u0218irnea\". Everyone in the village gives the same response, \"At least one of us is a human.\"\n\nHow many residents of \u0218irnea have been turned into vampires? The final line of your response must contain nothing but the answer as a number.""", - "expected_answer": "100"}) + "expected_answer": "100"}) - # Task index 2 - tasks_with_answers.append({ - "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth? + # Task index 2 + tasks_with_answers.append({ + "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth? The final line of your response must contain nothing but the answer as a number.""", - "expected_answer": "3"}) + "expected_answer": "3"}) return tasks_with_answers @@ -64,14 +92,14 @@ def define_tasks_with_answers(): MAX_RETRIES = 65535 -def create_client(): +def create_client(page_log=None): # Choose one. - # return create_oai_client() - # return create_aoai_client() - return create_trapi_client() + # return create_oai_client(page_log) + # return create_aoai_client(page_log) + return create_trapi_client(page_log) -def create_oai_client(): +def create_oai_client(page_log): # Create an OpenAI client model_name = "gpt-4o-2024-05-13" client = OpenAIChatCompletionClient( @@ -84,10 +112,14 @@ def create_oai_client(): top_p=TOP_P, max_retries=MAX_RETRIES, ) + if page_log is not None: + page_log.append_entry_line("Client: {}".format(client._resolved_model)) + page_log.append_entry_line(" created through OpenAI directly") + page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) return client -def create_aoai_client(): +def create_aoai_client(page_log): # Create an Azure OpenAI client token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") azure_deployment = "gpt-4o-2024-05-13-eval" @@ -109,10 +141,14 @@ def create_aoai_client(): top_p=TOP_P, max_retries=MAX_RETRIES, ) + if page_log is not None: + page_log.append_entry_line("Client: {}".format(client._resolved_model)) + page_log.append_entry_line(" created through Azure OpenAI") + page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) return client -def create_trapi_client(): +def create_trapi_client(page_log): # Create an Azure OpenAI client through TRAPI token_provider = get_bearer_token_provider(ChainedTokenCredential( AzureCliCredential(), @@ -128,8 +164,10 @@ def create_trapi_client(): # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs ) ), "api://trapi/.default") - model = "gpt-4o-2024-05-13" # This is (for instance) the OpenAI model name, which is used to look up capabilities. - azure_deployment = 'gpt-4o_2024-05-13' # This is DeploymentName in the table at https://aka.ms/trapi/models + # model = "gpt-4o-2024-05-13" # This is (for instance) the OpenAI model name, which is used to look up capabilities. + # azure_deployment = 'gpt-4o_2024-05-13' # This is DeploymentName in the table at https://aka.ms/trapi/models + model = "gpt-4o-2024-08-06" # This is (for instance) the OpenAI model name, which is used to look up capabilities. + azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release @@ -145,7 +183,12 @@ def create_trapi_client(): frequency_penalty=FREQUENCY_PENALTY, top_p=TOP_P, max_retries=MAX_RETRIES, + unknown_param="unknown_param", ) + if page_log is not None: + page_log.append_entry_line("Client: {}".format(client._resolved_model)) + page_log.append_entry_line(" created through TRAPI") + page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) return client @@ -286,7 +329,7 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass max_test_trials=max_test_trials, task_assignment_callback=task_assignment_callback, reset_memory=True, - client=create_client(), + client=create_client(page_log), page_log=page_log) last_response, num_successes, num_trials = await test( task_with_answer=task_with_answer, @@ -294,7 +337,7 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=False, - client=create_client(), + client=create_client(page_log), page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) total_num_successes += num_successes @@ -309,7 +352,7 @@ async def test_on_task_with_memory(task_index, task_assignment_callback, page_lo task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=reset_memory, - client=create_client(), + client=create_client(page_log), page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) @@ -321,14 +364,14 @@ async def test_on_task(task_index, task_assignment_callback, page_log, num_trial task_assignment_callback=task_assignment_callback, use_memory=False, reset_memory=False, - client=create_client(), + client=create_client(page_log), page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) async def main() -> None: # Create the PageLog. (This is optional) - page_log = PageLog("~/pagelogs/", "repro-14") + page_log = PageLog("~/pagelogs/", "repro-16") page = page_log.begin_page( summary="main", details='', @@ -359,5 +402,30 @@ async def main() -> None: page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) + +async def test_grader() -> None: + # Create the PageLog. (This is optional) + page_log = PageLog("~/pagelogs/", "test-grader-1") + page = page_log.begin_page( + summary="test_grader", + details='', + method_call="test_grader") + + # Choose the task from those listed at the top. + task_index = 0 + + # Choose the client, agent or team to assign the task to. + task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one + + # Test, without using memory. + await test_on_task(task_index, task_assignment_callback, page_log, 2) + + page_log.flush(final=True) # Finalize the page log + page_log.finish_page(page) + + if __name__ == "__main__": - asyncio.run(main()) + if GRADER: + asyncio.run(test_grader()) + else: + asyncio.run(main()) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index d111cd6a47c9..e96b1d2d06d6 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -58,7 +58,8 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca if len(filtered_insights) > 0: page.add_lines("Relevant insights were retrieved from memory.\n", flush=True) memory_section = self.format_memory_section(filtered_insights) - task = task + '\n\n' + memory_section + if len(memory_section) > 0: + task = task + '\n\n' + memory_section # Attempt to solve the task. page.add_lines("Try to solve the task.\n", flush=True) @@ -193,12 +194,15 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to for trial in range(1, max_train_trials + 1): page.add_lines("----- TRAIN TRIAL {} -----\n".format(trial), flush=True) + task_plus_insights = task + # Add any new insights we've accumulated so far. if last_insight is not None: memory_section = self.format_memory_section(old_insights + [last_insight]) else: memory_section = self.format_memory_section(old_insights) - task_plus_insights = task + '\n\n' + memory_section + if len(memory_section) > 0: + task_plus_insights += '\n\n' + memory_section # Can we find a failure case to learn from? failure_found, response, work_history = await self._test_for_failure( diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py index d78f012caee1..f18e6643412e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py @@ -268,6 +268,10 @@ def add_model_call(self, description, details, input_messages, response, method_call="model call", show_in_overview=False) self.page_stack.write_stack_to_page(page) + if num_input_tokens is not None and num_input_tokens > 0: + page.add_lines("{} prompt tokens from count_tokens".format(num_input_tokens)) + page.add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) + page.add_lines("{} completion tokens".format(response.usage.completion_tokens)) for i, m in enumerate(input_messages): page.add_lines('\n' + self.message_source(m)) page.add_lines(self.message_content(page, message=m)) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index 5d3ff179cc1e..f39ae4bf224b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -55,7 +55,6 @@ async def call_model(self, details, user_content: UserContent = None, system_mes # Call the model. response = await self.client.create(input_messages) - if response is None: parent_page = self.page_log.add_model_call(description="Ask the model", details=details + " ({:,} TOO MANY INPUT TOKENS)".format(num_input_tokens), @@ -74,7 +73,7 @@ async def call_model(self, details, user_content: UserContent = None, system_mes # Log the model call parent_page = self.page_log.add_model_call(description="Ask the model", - details=details, input_messages=input_messages, response=response_message, + details=details, input_messages=input_messages, response=response, num_input_tokens=num_input_tokens, caller='Orchestrator') # Manage the chat history From 2cb9344f5f94c33530bdb2bb78970f38d3e71f99 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 26 Dec 2024 11:02:00 -0800 Subject: [PATCH 08/93] Only instantiate the client once. --- .../autogen-ext/samples/learning_to_reason.py | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/python/packages/autogen-ext/samples/learning_to_reason.py b/python/packages/autogen-ext/samples/learning_to_reason.py index 7502c84de309..4b07a11a1446 100644 --- a/python/packages/autogen-ext/samples/learning_to_reason.py +++ b/python/packages/autogen-ext/samples/learning_to_reason.py @@ -84,7 +84,7 @@ def define_tasks_with_answers(): # Default client parameters -TEMPERATURE = 0.1 +TEMPERATURE = 1.0 MAX_TOKENS = 4096 PRESENCE_PENALTY = 0.0 FREQUENCY_PENALTY = 0.0 @@ -183,7 +183,6 @@ def create_trapi_client(page_log): frequency_penalty=FREQUENCY_PENALTY, top_p=TOP_P, max_retries=MAX_RETRIES, - unknown_param="unknown_param", ) if page_log is not None: page_log.append_entry_line("Client: {}".format(client._resolved_model)) @@ -315,7 +314,7 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor return response, num_successes, num_trials -async def train_and_test(task_index, max_train_trials, max_test_trials, task_assignment_callback, page_log): +async def train_and_test(task_index, max_train_trials, max_test_trials, task_assignment_callback, client, page_log): tasklist = define_tasks_with_answers() task_with_answer = tasklist[task_index] @@ -329,7 +328,7 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass max_test_trials=max_test_trials, task_assignment_callback=task_assignment_callback, reset_memory=True, - client=create_client(page_log), + client=client, page_log=page_log) last_response, num_successes, num_trials = await test( task_with_answer=task_with_answer, @@ -337,7 +336,7 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=False, - client=create_client(page_log), + client=client, page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) total_num_successes += num_successes @@ -345,38 +344,41 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass return total_num_successes, total_num_trials -async def test_on_task_with_memory(task_index, task_assignment_callback, page_log, num_trials, reset_memory): +async def test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials, reset_memory): last_response, num_successes, num_trials = await test( task_with_answer=define_tasks_with_answers()[task_index], num_trials=num_trials, task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=reset_memory, - client=create_client(page_log), + client=client, page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) -async def test_on_task(task_index, task_assignment_callback, page_log, num_trials): +async def test_on_task(task_index, task_assignment_callback, client, page_log, num_trials): last_response, num_successes, num_trials = await test( task_with_answer=define_tasks_with_answers()[task_index], num_trials=num_trials, task_assignment_callback=task_assignment_callback, use_memory=False, reset_memory=False, - client=create_client(page_log), + client=client, page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) async def main() -> None: # Create the PageLog. (This is optional) - page_log = PageLog("~/pagelogs/", "repro-16") + page_log = PageLog("~/pagelogs/", "repro-17") page = page_log.begin_page( summary="main", details='', method_call="main") + # Create the client. + client = create_client(page_log) + # Choose the task from those listed at the top. task_index = 0 @@ -384,10 +386,10 @@ async def main() -> None: task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one # Test, without using memory. - # await test_on_task(task_index, task_assignment_callback, page_log, 1) + # await test_on_task(task_index, task_assignment_callback, client, page_log, 1) # Test, using memory. - # await test_on_task_with_memory(task_index, task_assignment_callback, page_log, num_trials=3, reset_memory=True) + # await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials=3, reset_memory=True) # Train and test, using memory. num_successes, num_trials = await train_and_test( @@ -395,6 +397,7 @@ async def main() -> None: 10, # Normally 10 3, # Normally 3 task_assignment_callback, + client, page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) From 878f458d31cefa3586685a6c31c8c428d74fb305 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 26 Dec 2024 11:02:39 -0800 Subject: [PATCH 09/93] Fix bug that was duplicating insights across trials. --- .../src/autogen_ext/agentic_memory/_agentic_memory.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index e96b1d2d06d6..d4f61b7c2678 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -52,6 +52,7 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca for trial in range(num_trials): page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) + task_plus_insights = task # Try to retrieve any relevant memories from the DB. filtered_insights = await self.retrieve_relevant_insights(task) @@ -59,11 +60,11 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca page.add_lines("Relevant insights were retrieved from memory.\n", flush=True) memory_section = self.format_memory_section(filtered_insights) if len(memory_section) > 0: - task = task + '\n\n' + memory_section + task_plus_insights = task + '\n\n' + memory_section # Attempt to solve the task. page.add_lines("Try to solve the task.\n", flush=True) - response, _ = await task_assignment_callback(task, self.client, self.page_log) + response, _ = await task_assignment_callback(task_plus_insights, self.client, self.page_log) response_is_correct = (response.lower() == expected_answer.lower()) if response_is_correct: From 21562f15a0233e21b4634bd3e0d3886d8f9a9860 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 27 Dec 2024 15:35:28 -0800 Subject: [PATCH 10/93] Add the Grader class. Refactor memory paths. Enrich page logging. --- .../autogen-ext/samples/learning_to_reason.py | 171 +++++++----------- .../autogen_ext/agentic_memory/__init__.py | 3 +- .../agentic_memory/_agentic_memory.py | 69 ++++--- .../src/autogen_ext/agentic_memory/_grader.py | 100 ++++++++++ .../agentic_memory/_knowledge_archive.py | 14 +- 5 files changed, 219 insertions(+), 138 deletions(-) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py diff --git a/python/packages/autogen-ext/samples/learning_to_reason.py b/python/packages/autogen-ext/samples/learning_to_reason.py index 4b07a11a1446..9686cccf74bd 100644 --- a/python/packages/autogen-ext/samples/learning_to_reason.py +++ b/python/packages/autogen-ext/samples/learning_to_reason.py @@ -4,8 +4,8 @@ from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.teams import MagenticOneGroupChat -from autogen_ext.agents.web_surfer import MultimodalWebSurfer -from autogen_ext.agents.web_surfer._utils import message_content_to_str +# from autogen_ext.agents.web_surfer import MultimodalWebSurfer +# from autogen_ext.agents.web_surfer._utils import message_content_to_str from autogen_agentchat.task import Console from autogen_core.models import ( AssistantMessage, @@ -19,79 +19,47 @@ from typing import ( Tuple, ) -from autogen_ext.agentic_memory import AgenticMemory, PageLog +from autogen_ext.agentic_memory import AgenticMemory, PageLog, Grader -GRADER = 0 -PATH_TO_ARCHIVE_DIR = "~/agentic_memory_archive" +MEMORY_DIR = "~/agentic_memory_archive" +PAGELOG_DIR = "~/pagelogs/" +RUN_SUBDIR = "repro-19-08" + +# Default client parameters +TEMPERATURE = 0.8 +MAX_TOKENS = 4096 +PRESENCE_PENALTY = 0.0 +FREQUENCY_PENALTY = 0.0 +TOP_P = 1.0 +MAX_RETRIES = 65535 def define_tasks_with_answers(): tasks_with_answers = [] - if GRADER: - # Task index 0 - tasks_with_answers.append({ - "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? - -Be sure to consider the cases where all are lying or all are telling the truth.""", - "expected_answer": "100"}) - - # Task index 1 - tasks_with_answers.append({ - "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. - -You travel to the village of \u0218irnea, and you begin your investigation. One night, just before dawn, you catch a glimpse of a man in a long black cape with red lining leaping from roof-top to roof-top with superhuman agility. It's a vampire! You try to chase the creature back to its home, but the creature is too fast. However, because of the remoteness of the village, you know with absolute certainty that the vampire must be a resident of the village. You decide that your best course of action will be to visit all 100 residents of the town during the day. You know something about vampires and humans that will make your investigation possible; humans always tell the truth, but vampires always lie. - -In the afternoon, you go from house to house, speaking with all 100 residents of \u0218irnea. You ask everyone the same question: \"How many vampires are living in \u0218irnea\". Everyone in the village gives the same response, \"At least one of us is a human.\"\n\nHow many residents of \u0218irnea have been turned into vampires? - -Be sure to consider the cases where all are lying or all are telling the truth.""", - "expected_answer": "100"}) - - # Task index 2 - tasks_with_answers.append({ - "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth? - -Be sure to consider the cases where all are lying or all are telling the truth.""", - "expected_answer": "3"}) - else: - # Task index 0 - tasks_with_answers.append({ - "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars? + # Task index 0 + tasks_with_answers.append({ + "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars?""", + "expected_answer": "100"}) -The final line of your response must contain nothing but the answer as a number.""", - "expected_answer": "100"}) - - # Task index 1 - tasks_with_answers.append({ - "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. + # Task index 1 + tasks_with_answers.append({ + "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. You travel to the village of \u0218irnea, and you begin your investigation. One night, just before dawn, you catch a glimpse of a man in a long black cape with red lining leaping from roof-top to roof-top with superhuman agility. It's a vampire! You try to chase the creature back to its home, but the creature is too fast. However, because of the remoteness of the village, you know with absolute certainty that the vampire must be a resident of the village. You decide that your best course of action will be to visit all 100 residents of the town during the day. You know something about vampires and humans that will make your investigation possible; humans always tell the truth, but vampires always lie. -In the afternoon, you go from house to house, speaking with all 100 residents of \u0218irnea. You ask everyone the same question: \"How many vampires are living in \u0218irnea\". Everyone in the village gives the same response, \"At least one of us is a human.\"\n\nHow many residents of \u0218irnea have been turned into vampires? - -The final line of your response must contain nothing but the answer as a number.""", - "expected_answer": "100"}) +In the afternoon, you go from house to house, speaking with all 100 residents of \u0218irnea. You ask everyone the same question: \"How many vampires are living in \u0218irnea\". Everyone in the village gives the same response, \"At least one of us is a human.\"\n\nHow many residents of \u0218irnea have been turned into vampires?""", + "expected_answer": "100"}) - # Task index 2 - tasks_with_answers.append({ - "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth? - -The final line of your response must contain nothing but the answer as a number.""", - "expected_answer": "3"}) + # Task index 2 + tasks_with_answers.append({ + "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth?""", + "expected_answer": "3"}) return tasks_with_answers -# Default client parameters -TEMPERATURE = 1.0 -MAX_TOKENS = 4096 -PRESENCE_PENALTY = 0.0 -FREQUENCY_PENALTY = 0.0 -TOP_P = 1.0 -MAX_RETRIES = 65535 - - def create_client(page_log=None): # Choose one. # return create_oai_client(page_log) @@ -101,7 +69,7 @@ def create_client(page_log=None): def create_oai_client(page_log): # Create an OpenAI client - model_name = "gpt-4o-2024-05-13" + model_name = "gpt-4o-2024-08-06" client = OpenAIChatCompletionClient( model=model_name, api_key="", @@ -122,12 +90,9 @@ def create_oai_client(page_log): def create_aoai_client(page_log): # Create an Azure OpenAI client token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") - azure_deployment = "gpt-4o-2024-05-13-eval" - model = "gpt-4o-2024-05-13" - azure_endpoint = "https://agentic1.openai.azure.com/" - # azure_deployment = "gpt-4o-2024-08-06-eval" - # model = "gpt-4o-2024-08-06" - # azure_endpoint = "https://agentic2.openai.azure.com/" + azure_deployment = "gpt-4o-2024-08-06-eval" + model = "gpt-4o-2024-08-06" + azure_endpoint = "https://agentic2.openai.azure.com/" client = AzureOpenAIChatCompletionClient( azure_endpoint=azure_endpoint, azure_ad_token_provider=token_provider, @@ -164,8 +129,6 @@ def create_trapi_client(page_log): # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs ) ), "api://trapi/.default") - # model = "gpt-4o-2024-05-13" # This is (for instance) the OpenAI model name, which is used to look up capabilities. - # azure_deployment = 'gpt-4o_2024-05-13' # This is DeploymentName in the table at https://aka.ms/trapi/models model = "gpt-4o-2024-08-06" # This is (for instance) the OpenAI model name, which is used to look up capabilities. azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models @@ -274,7 +237,12 @@ async def assign_task_to_client(task, client, page_log): async def train(task_with_answer, max_train_trials, max_test_trials, task_assignment_callback, reset_memory, client, page_log) -> None: - memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, path_to_archive_dir=PATH_TO_ARCHIVE_DIR) + page = page_log.begin_page( + summary="train", + details='', + method_call="train") + memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, + memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) await memory.train_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], @@ -282,39 +250,54 @@ async def train(task_with_answer, max_train_trials, max_test_trials, task_assign final_format_instructions="", max_train_trials=max_train_trials, max_test_trials=max_test_trials) + page_log.finish_page(page) async def test(task_with_answer, num_trials, task_assignment_callback, use_memory, reset_memory, client, page_log) -> Tuple[str, int, int]: + page = page_log.begin_page( + summary="test", + details='', + method_call="test") + + grader = Grader(client, page_log) + if use_memory: - memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, path_to_archive_dir=PATH_TO_ARCHIVE_DIR) + page.add_lines("Testing with memory.\n", flush=True) + memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, + memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) response, num_successes, num_trials = await memory.test_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], task_assignment_callback=task_assignment_callback, num_trials=num_trials) else: - page = page_log.begin_page( - summary="test without memory", - details="", - method_call="test without memory") + page.add_lines("Testing without memory.\n", flush=True) response = None num_successes = 0 for trial in range(num_trials): page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) page.add_lines("Try to solve the task.\n", flush=True) response, _ = await task_assignment_callback(task_with_answer["task"], client, page_log) - response_is_correct = (response.lower() == task_with_answer["expected_answer"].lower()) + page.add_lines("Response: {}\n".format(response), flush=True) + + response_is_correct = await grader.response_is_correct( + task_with_answer["task"], response, task_with_answer["expected_answer"]) if response_is_correct: num_successes += 1 - page.add_lines("Response: {}\n".format(response), flush=True) - page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) - page_log.finish_page(page) + page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) + + page_log.finish_page(page) return response, num_successes, num_trials async def train_and_test(task_index, max_train_trials, max_test_trials, task_assignment_callback, client, page_log): + page = page_log.begin_page( + summary="train_and_test", + details='', + method_call="train_and_test") + tasklist = define_tasks_with_answers() task_with_answer = tasklist[task_index] @@ -338,9 +321,13 @@ async def train_and_test(task_index, max_train_trials, max_test_trials, task_ass reset_memory=False, client=client, page_log=page_log) + + page.add_lines("Success rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) total_num_successes += num_successes total_num_trials += num_trials + + page_log.finish_page(page) return total_num_successes, total_num_trials @@ -370,7 +357,7 @@ async def test_on_task(task_index, task_assignment_callback, client, page_log, n async def main() -> None: # Create the PageLog. (This is optional) - page_log = PageLog("~/pagelogs/", "repro-17") + page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) page = page_log.begin_page( summary="main", details='', @@ -406,29 +393,5 @@ async def main() -> None: page_log.finish_page(page) -async def test_grader() -> None: - # Create the PageLog. (This is optional) - page_log = PageLog("~/pagelogs/", "test-grader-1") - page = page_log.begin_page( - summary="test_grader", - details='', - method_call="test_grader") - - # Choose the task from those listed at the top. - task_index = 0 - - # Choose the client, agent or team to assign the task to. - task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one - - # Test, without using memory. - await test_on_task(task_index, task_assignment_callback, page_log, 2) - - page_log.flush(final=True) # Finalize the page log - page_log.finish_page(page) - - if __name__ == "__main__": - if GRADER: - asyncio.run(test_grader()) - else: - asyncio.run(main()) + asyncio.run(main()) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index 95efac2a718b..26e6c64f98a5 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,4 +1,5 @@ from ._agentic_memory import AgenticMemory from ._page_log import PageLog +from ._grader import Grader -__all__ = ["AgenticMemory", "PageLog"] +__all__ = ["AgenticMemory", "PageLog", "Grader"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index d4f61b7c2678..63667fcefbfc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -1,15 +1,17 @@ from typing import Callable, List from ._prompter import Prompter from ._knowledge_archive import KnowledgeArchive +from ._grader import Grader class AgenticMemory: - def __init__(self, reset, client, page_log, path_to_archive_dir): + def __init__(self, reset, client, page_log, memory_dir, run_subdir): self.client = client self.page_log = page_log self.prompter = Prompter(client, page_log) - self.archive = KnowledgeArchive(verbosity=0, reset=reset, path_to_archive_dir=path_to_archive_dir, + self.archive = KnowledgeArchive(verbosity=0, reset=reset, memory_dir=memory_dir, run_subdir=run_subdir, page_log=page_log) + self.grader = Grader(client, page_log) async def train_on_task(self, task: str, # The task to be completed. @@ -31,7 +33,9 @@ async def train_on_task(self, page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) _, insight = await self._iterate_on_task(task, expected_answer, task_assignment_callback, final_format_instructions, max_train_trials, max_test_trials) - if insight is not None: + if insight is None: + page.add_lines("No useful insight was discovered.\n", flush=True) + else: page.add_lines("A new insight was created:\n{}".format(insight), flush=True) # Add this insight to memory. await self.add_insight_to_memory(task, insight) @@ -51,7 +55,7 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca num_successes = 0 for trial in range(num_trials): - page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) + page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) task_plus_insights = task # Try to retrieve any relevant memories from the DB. @@ -65,12 +69,14 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca # Attempt to solve the task. page.add_lines("Try to solve the task.\n", flush=True) response, _ = await task_assignment_callback(task_plus_insights, self.client, self.page_log) + page.add_lines("Response: {}\n".format(response), flush=True) - response_is_correct = (response.lower() == expected_answer.lower()) + response_is_correct = await self.grader.response_is_correct(task, response, expected_answer) if response_is_correct: + page.add_lines("Response is CORRECT.\n", flush=True) num_successes += 1 - - page.add_lines("Response: {}\n".format(response), flush=True) + else: + page.add_lines("Response is INCORRECT.\n", flush=True) # Calculate the success rate as a percentage, rounded to the nearest whole number. page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) @@ -107,6 +113,9 @@ async def retrieve_relevant_insights(self, task: str): details="", method_call="AgenticMemory.retrieve_relevant_insights") + page.add_lines("\nCURRENT TASK:") + page.add_lines(task) + # Generalize the task. generalized_task = await self.prompter.generalize_task(task) @@ -116,22 +125,24 @@ async def retrieve_relevant_insights(self, task: str): page.add_lines("\n".join(topics)) page.add_lines("") - # Retrieve insights from the archive. - unfiltered_insights = self.archive.get_relevant_insights(topics=topics) - filtered_insights = [] - page.add_lines("\nUNFILTERED INSIGHTS") - for insight, relevance in unfiltered_insights.items(): - page.add_lines(" INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) - filtered_insights.append(insight) - page.add_lines("\nFiltered to top {} insights".format(len(filtered_insights))) - - if len(filtered_insights) > 0: - # Apply a final filtering stage to keep only the insights that the LLM believes are relevant. - filtered_insights = await self.prompter.validate_insights(filtered_insights, task) - page.add_lines("\n{} insights were validated".format(len(filtered_insights))) + # Retrieve relevant insights from the archive. + relevant_insights_and_relevances = self.archive.get_relevant_insights(topics=topics) + relevant_insights = [] + page.add_lines("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) + for insight, relevance in relevant_insights_and_relevances.items(): + page.add_lines("\n INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) + relevant_insights.append(insight) + + validated_insights = [] + if len(relevant_insights) > 0: + # Apply a final validation stage to keep only the insights that the LLM concludes are relevant. + validated_insights = await self.prompter.validate_insights(relevant_insights, task) + page.add_lines("\n{} VALIDATED INSIGHTS".format(len(validated_insights))) + for insight in validated_insights: + page.add_lines("\n INSIGHT: {}".format(insight)) self.page_log.finish_page(page) - return filtered_insights + return validated_insights def format_memory_section(self, memories): memory_section = "" @@ -158,16 +169,18 @@ async def _test_for_failure(self, task_plus_insights: str, expected_answer: str, response, work_history = None, None for trial in range(num_trials): - page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) + page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) # Attempt to solve the task. - page.add_lines("Try to solve the task.\n", flush=True) + page.add_lines("Try to solve the task.", flush=True) response, work_history = await assign_task_to_completer(task_plus_insights, self.client, self.page_log) page.add_lines("Response: {}\n".format(response), flush=True) - response_is_correct = (response.lower() == expected_answer.lower()) - if not response_is_correct: - page.add_lines("\nResponse is INCORRECT. Return the details.\n", flush=True) + response_is_correct = await self.grader.response_is_correct(task_plus_insights, response, expected_answer) + if response_is_correct: + page.add_lines("Response is CORRECT.\n", flush=True) + else: + page.add_lines("Response is INCORRECT.\n Stop testing, and return the details of the failure.\n", flush=True) failure_found = True break @@ -193,7 +206,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to # Loop until success (or timeout) while learning from failures. for trial in range(1, max_train_trials + 1): - page.add_lines("----- TRAIN TRIAL {} -----\n".format(trial), flush=True) + page.add_lines("\n----- TRAIN TRIAL {} -----\n".format(trial), flush=True) task_plus_insights = task @@ -210,7 +223,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to task_plus_insights, expected_answer, assign_task_to_completer, max_test_trials) if not failure_found: # No. Time to exit the loop. - page.add_lines("\nResponse is CORRECT. No learning needed.\n", flush=True) + page.add_lines("\nResponse is CORRECT.\n Stop looking for insights.\n", flush=True) # Was this the first trial? if trial == 1: # Yes. We should return the successful response, and no insight. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py new file mode 100644 index 000000000000..68a3831c63ba --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py @@ -0,0 +1,100 @@ +from typing import List + +from autogen_core.models import ( + AssistantMessage, + LLMMessage, + SystemMessage, + UserMessage, + CreateResult, +) + +from ._utils import UserContent + + +class Grader: + def __init__(self, client, page_log): + self.client = client + self.page_log = page_log + + # Create the chat history + self._chat_history: List[LLMMessage] = [] + + async def call_model(self, details, user_content: UserContent = None, system_message=None, keep_these_messages=True): + # Prepare the input message list + user_message = UserMessage(content=user_content, source="User") + if system_message is None: + system_message = "You are a helpful assistant." + system_message = SystemMessage(content=system_message) + + input_messages = [system_message] + self._chat_history + [user_message] + + # Call the model. + response = await self.client.create(input_messages) + assert isinstance(response, CreateResult) + response_string = response.content + assert isinstance(response_string, str) + response_message = AssistantMessage(content=response_string, source="Assistant") + assert isinstance(response_message, AssistantMessage) + + # Log the model call + parent_page = self.page_log.add_model_call(description="Ask the model", + details=details, input_messages=input_messages, response=response, caller='Grader') + + # Manage the chat history + if keep_these_messages: + self._chat_history.append(user_message) + self._chat_history.append(response_message) + + # Return the response as a string for now + return response_string, parent_page + + def remove_last_turn(self): + if len(self._chat_history) > 0: + self._chat_history.pop() + + def clear_history(self): + self._chat_history = [] + + async def response_is_correct(self, task_description, response_to_be_graded, correct_answer): + # Returns only the insights that the client verifies are relevant to the task. + + sys_message = """You are a helpful and thoughtful assistant.""" + + user_message = ["""Your job is to extract a possible answer to the following question from the given text. +- First review the following task. +- Then review the response that follows, which may contain reasoning that led to the answer, as well as other comments. +- Do not attempt to actually solve the task yourself. +- Don't try to judge whether the reasoning steps were correct. +- Simply respond by providing a copy of the answer from the text, omitting any other parts of the text. +- If no answer is present in the text, simply reply "None"."""] + user_message.append("\n# Task description") + user_message.append(task_description) + user_message.append("\n# Text that may contain an answer") + user_message.append(response_to_be_graded) + self.clear_history() + given_answer, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to extract the answer") + + user_message = ["""Your job is to decide whether a given answer to a task is correct or not. +- You will be given the task description and the correct, gold-standard answer, along with the answer to be graded. +- In general, an answer is correct if it is equivalent to the correct answer. +- Specifically, the given answer must contain the important information from the correct answer, and must not in any way contradict the correct answer. +- Ignore any differences of grammar, spelling mistakes, punctuation, capitalization, formatting, or extra commentary. +- An answer should be considered correct if it omits information that is clearly inferred. + - For instance, if the correct answer is "Paris, France", the answer "Paris" should be considered correct. +- Respond with a single character: '1' if the answer to be graded is correct", '0' if not."""] + user_message.append("\n# Task description") + user_message.append(task_description) + user_message.append("\n# Correct answer") + user_message.append(correct_answer) + user_message.append("\n# Answer to be graded") + user_message.append(given_answer) + self.clear_history() + decision, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to check the answer for correctness") + + return decision == "1" diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py index 28b557020590..97b01e9f5a63 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py @@ -21,17 +21,22 @@ def __init__( self, verbosity: Optional[int] = 0, reset: Optional[bool] = False, - path_to_archive_dir: Optional[str] = "tmp/archive", + memory_dir: str = "tmp/memory", + run_subdir: str = "run1", page_log=None, ): """ Args: - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. - reset (Optional, bool): True to clear the DB before starting. Default False - - path_to_archive_dir (Optional, str): path to the directory where the archive is stored. + - memory_dir (Optional, str): path to the directory where all memory data is stored. + - run_subdir (Optional, str): name of the subdirectory for this run's memory data. + - page_log (Optional, PageLog): the PageLog object to use for logging. """ - self.path_to_archive_dir = path_to_archive_dir - path_to_db_dir = os.path.join(path_to_archive_dir, "memo_store") + memory_dir = os.path.expanduser(memory_dir) + path_to_db_dir = os.path.join(memory_dir, run_subdir, "memo_store") + self.path_to_dict = os.path.join(memory_dir, run_subdir, "uid_insight_dict.pkl") + self.page_log = page_log parent_page = self.page_log.last_page() parent_page.add_lines("Creating KnowedgeArchive object", flush=True) @@ -39,7 +44,6 @@ def __init__( self.memo_store = MemoStore(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir) # Load or create the associated memo dict on disk. - self.path_to_dict = os.path.join(path_to_archive_dir, "uid_insight_dict.pkl") self.uid_insight_dict = {} self.last_insight_id = 0 if (not reset) and os.path.exists(self.path_to_dict): From 3a40b3027800f762b8219df977b46d407a560302 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 27 Dec 2024 17:57:07 -0800 Subject: [PATCH 11/93] Adjustments for comparison tests. --- .../autogen-ext/samples/learning_to_reason.py | 65 ++++++++++--------- .../agentic_memory/_agentic_memory.py | 24 +++---- .../src/autogen_ext/agentic_memory/_grader.py | 6 +- 3 files changed, 49 insertions(+), 46 deletions(-) diff --git a/python/packages/autogen-ext/samples/learning_to_reason.py b/python/packages/autogen-ext/samples/learning_to_reason.py index 9686cccf74bd..2b3138c78c9a 100644 --- a/python/packages/autogen-ext/samples/learning_to_reason.py +++ b/python/packages/autogen-ext/samples/learning_to_reason.py @@ -4,9 +4,9 @@ from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.teams import MagenticOneGroupChat -# from autogen_ext.agents.web_surfer import MultimodalWebSurfer -# from autogen_ext.agents.web_surfer._utils import message_content_to_str -from autogen_agentchat.task import Console +from autogen_ext.agents.web_surfer import MultimodalWebSurfer +from autogen_ext.agents.web_surfer._utils import message_content_to_str +from autogen_agentchat.ui._console import Console from autogen_core.models import ( AssistantMessage, ChatCompletionClient, @@ -24,7 +24,7 @@ MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "repro-19-08" +RUN_SUBDIR = "run_21_cl_base" # Default client parameters TEMPERATURE = 0.8 @@ -181,21 +181,17 @@ async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str max_turns=20, ) - # user_input = await asyncio.get_event_loop().run_in_executor(None, input, ">: ") + # Get the team's text response to the task. stream = team.run_stream(task=task) task_result = await Console(stream) + response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) + page.add_lines("----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - # Use the entire task_result (with images removed) as the work history. - work_history = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) - - # Extract the final response as the last line of the last message. - # This assumes that the task statement specified that the answer should be on the last line. - final_message_string = task_result.messages[-1].content - final_message_lines = final_message_string.split("\n") - final_response = final_message_lines[-1] + # MagenticOne's response is the chat history, which we use here as the work history. + work_history = response_str page_log.finish_page(page) - return final_response, work_history + return response_str, work_history async def assign_task_to_client(task, client, page_log): @@ -218,21 +214,20 @@ async def assign_task_to_client(task, client, page_log): input_messages = [system_message] + [user_message] response = await client.create(input_messages) + response_str = response.content # Log the model call page_log.add_model_call(description="Ask the model", details="to complete the task", input_messages=input_messages, response=response, num_input_tokens=0, caller='assign_task_to_client') + page.add_lines("----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - # Split the response into lines. - response_lines = response.content.split("\n") - - # The final line contains the answer. Extract it. - answer = response_lines[-1] + # Use the response as the work history as well. + work_history = response_str page_log.finish_page(page) - return answer, response.content + return response_str, work_history async def train(task_with_answer, max_train_trials, max_test_trials, task_assignment_callback, reset_memory, @@ -276,15 +271,18 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor response = None num_successes = 0 for trial in range(num_trials): - page.add_lines("----- TRIAL {} -----\n".format(trial + 1), flush=True) + page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) page.add_lines("Try to solve the task.\n", flush=True) response, _ = await task_assignment_callback(task_with_answer["task"], client, page_log) - page.add_lines("Response: {}\n".format(response), flush=True) - response_is_correct = await grader.response_is_correct( + response_is_correct, extracted_answer = await grader.response_is_correct( task_with_answer["task"], response, task_with_answer["expected_answer"]) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) num_successes += 1 + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) @@ -341,6 +339,7 @@ async def test_on_task_with_memory(task_index, task_assignment_callback, client, client=client, page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) + return num_successes, num_trials async def test_on_task(task_index, task_assignment_callback, client, page_log, num_trials): @@ -353,6 +352,7 @@ async def test_on_task(task_index, task_assignment_callback, client, page_log, n client=client, page_log=page_log) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) + return num_successes, num_trials async def main() -> None: @@ -373,19 +373,20 @@ async def main() -> None: task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one # Test, without using memory. - # await test_on_task(task_index, task_assignment_callback, client, page_log, 1) + num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, 50) # Test, using memory. - # await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials=3, reset_memory=True) + # num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials=3, reset_memory=True) # Train and test, using memory. - num_successes, num_trials = await train_and_test( - task_index, - 10, # Normally 10 - 3, # Normally 3 - task_assignment_callback, - client, - page_log) + # num_successes, num_trials = await train_and_test( + # task_index, + # 10, # Normally 10 + # 3, # Normally 3 + # task_assignment_callback, + # client, + # page_log) + success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 63667fcefbfc..c2703438b632 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -69,14 +69,15 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca # Attempt to solve the task. page.add_lines("Try to solve the task.\n", flush=True) response, _ = await task_assignment_callback(task_plus_insights, self.client, self.page_log) - page.add_lines("Response: {}\n".format(response), flush=True) - response_is_correct = await self.grader.response_is_correct(task, response, expected_answer) + response_is_correct, extracted_answer = await self.grader.response_is_correct( + task, response, expected_answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: - page.add_lines("Response is CORRECT.\n", flush=True) + page.add_lines("Answer is CORRECT.\n", flush=True) num_successes += 1 else: - page.add_lines("Response is INCORRECT.\n", flush=True) + page.add_lines("Answer is INCORRECT.\n", flush=True) # Calculate the success rate as a percentage, rounded to the nearest whole number. page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) @@ -152,8 +153,8 @@ def format_memory_section(self, memories): memory_section += ('- ' + mem + '\n') return memory_section - async def _test_for_failure(self, task_plus_insights: str, expected_answer: str, assign_task_to_completer: Callable, - num_trials: int): + async def _test_for_failure(self, task: str, task_plus_insights: str, expected_answer: str, + assign_task_to_completer: Callable, num_trials: int): """ Attempts to solve the given task multiple times to find a failure case to learn from. """ @@ -174,13 +175,14 @@ async def _test_for_failure(self, task_plus_insights: str, expected_answer: str, # Attempt to solve the task. page.add_lines("Try to solve the task.", flush=True) response, work_history = await assign_task_to_completer(task_plus_insights, self.client, self.page_log) - page.add_lines("Response: {}\n".format(response), flush=True) - response_is_correct = await self.grader.response_is_correct(task_plus_insights, response, expected_answer) + response_is_correct, extracted_answer = await self.grader.response_is_correct( + task, response, expected_answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: - page.add_lines("Response is CORRECT.\n", flush=True) + page.add_lines("Answer is CORRECT.\n", flush=True) else: - page.add_lines("Response is INCORRECT.\n Stop testing, and return the details of the failure.\n", flush=True) + page.add_lines("Answer is INCORRECT.\n Stop testing, and return the details of the failure.\n", flush=True) failure_found = True break @@ -220,7 +222,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to # Can we find a failure case to learn from? failure_found, response, work_history = await self._test_for_failure( - task_plus_insights, expected_answer, assign_task_to_completer, max_test_trials) + task, task_plus_insights, expected_answer, assign_task_to_completer, max_test_trials) if not failure_found: # No. Time to exit the loop. page.add_lines("\nResponse is CORRECT.\n Stop looking for insights.\n", flush=True) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py index 68a3831c63ba..da14a6cf6bf0 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py @@ -72,7 +72,7 @@ async def response_is_correct(self, task_description, response_to_be_graded, cor user_message.append("\n# Text that may contain an answer") user_message.append(response_to_be_graded) self.clear_history() - given_answer, page = await self.call_model( + extracted_answer, page = await self.call_model( system_message=sys_message, user_content=user_message, details="to extract the answer") @@ -90,11 +90,11 @@ async def response_is_correct(self, task_description, response_to_be_graded, cor user_message.append("\n# Correct answer") user_message.append(correct_answer) user_message.append("\n# Answer to be graded") - user_message.append(given_answer) + user_message.append(extracted_answer) self.clear_history() decision, page = await self.call_model( system_message=sys_message, user_content=user_message, details="to check the answer for correctness") - return decision == "1" + return decision == "1", extracted_answer From 8622c5ee76099aae2870c65df8a850132b29cd9a Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 30 Dec 2024 15:31:39 -0800 Subject: [PATCH 12/93] Test generalization over multiple tasks. --- .../autogen-ext/samples/learning_to_reason.py | 133 +++++++++--------- .../agentic_memory/_agentic_memory.py | 8 +- 2 files changed, 76 insertions(+), 65 deletions(-) diff --git a/python/packages/autogen-ext/samples/learning_to_reason.py b/python/packages/autogen-ext/samples/learning_to_reason.py index 2b3138c78c9a..d1e3a8785ce5 100644 --- a/python/packages/autogen-ext/samples/learning_to_reason.py +++ b/python/packages/autogen-ext/samples/learning_to_reason.py @@ -8,11 +8,6 @@ from autogen_ext.agents.web_surfer._utils import message_content_to_str from autogen_agentchat.ui._console import Console from autogen_core.models import ( - AssistantMessage, - ChatCompletionClient, - FunctionExecutionResult, - FunctionExecutionResultMessage, - LLMMessage, SystemMessage, UserMessage, ) @@ -24,7 +19,7 @@ MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_21_cl_base" +RUN_SUBDIR = "run_27_3_g1_cl" # Default client parameters TEMPERATURE = 0.8 @@ -54,8 +49,13 @@ def define_tasks_with_answers(): # Task index 2 tasks_with_answers.append({ - "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know that one of them is not. You ask each one 'How many guards here tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards tell the truth?""", - "expected_answer": "3"}) + "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know for a fact that at least one of them never tells the truth. You ask each one 'How many guards here always tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards always tell the truth?""", + "expected_answer": "None of them do"}) + + # Task index 3 + tasks_with_answers.append({ + "task": """You ask ten people 'How many of you are liars?' They all answer 'At least one of us is not a liar.' You happen to know that at least one of them IS a liar. How many of them are liars in total?""", + "expected_answer": "All of them are liars."}) return tasks_with_answers @@ -185,7 +185,7 @@ async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str stream = team.run_stream(task=task) task_result = await Console(stream) response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) - page.add_lines("----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) # MagenticOne's response is the chat history, which we use here as the work history. work_history = response_str @@ -221,7 +221,7 @@ async def assign_task_to_client(task, client, page_log): details="to complete the task", input_messages=input_messages, response=response, num_input_tokens=0, caller='assign_task_to_client') - page.add_lines("----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) # Use the response as the work history as well. work_history = response_str @@ -290,45 +290,6 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor return response, num_successes, num_trials -async def train_and_test(task_index, max_train_trials, max_test_trials, task_assignment_callback, client, page_log): - page = page_log.begin_page( - summary="train_and_test", - details='', - method_call="train_and_test") - - tasklist = define_tasks_with_answers() - task_with_answer = tasklist[task_index] - - num_loops = 10 # Normally 10 - total_num_successes = 0 - total_num_trials = 0 - for i in range(num_loops): - await train( - task_with_answer=task_with_answer, - max_train_trials=max_train_trials, - max_test_trials=max_test_trials, - task_assignment_callback=task_assignment_callback, - reset_memory=True, - client=client, - page_log=page_log) - last_response, num_successes, num_trials = await test( - task_with_answer=task_with_answer, - num_trials=max_test_trials, - task_assignment_callback=task_assignment_callback, - use_memory=True, - reset_memory=False, - client=client, - page_log=page_log) - - page.add_lines("Success rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) - print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) - total_num_successes += num_successes - total_num_trials += num_trials - - page_log.finish_page(page) - return total_num_successes, total_num_trials - - async def test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials, reset_memory): last_response, num_successes, num_trials = await test( task_with_answer=define_tasks_with_answers()[task_index], @@ -355,8 +316,49 @@ async def test_on_task(task_index, task_assignment_callback, client, page_log, n return num_successes, num_trials +async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_trials, task_assignment_callback, client, page_log): + page = page_log.begin_page( + summary="train_and_test", + details='', + method_call="train_and_test") + + tasklist = define_tasks_with_answers() + task_with_answer_list = [tasklist[task_index] for task_index in task_index_list] + + total_num_successes_list = [0 for _ in task_index_list] + for i in range(num_loops): + # Always train on the first task. + await train( + task_with_answer=task_with_answer_list[0], + max_train_trials=max_train_trials, + max_test_trials=max_test_trials, + task_assignment_callback=task_assignment_callback, + reset_memory=True, + client=client, + page_log=page_log) + + # Test on all tasks. + for j, task_with_answer in enumerate(task_with_answer_list): + last_response, num_successes, num_trials = await test( + task_with_answer=task_with_answer, + num_trials=max_test_trials, + task_assignment_callback=task_assignment_callback, + use_memory=True, + reset_memory=False, + client=client, + page_log=page_log) + page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) + print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) + total_num_successes_list[j] += num_successes + + page.add_lines("") + + page_log.finish_page(page) + return total_num_successes_list + + async def main() -> None: - # Create the PageLog. (This is optional) + # Create the PageLog. page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) page = page_log.begin_page( summary="main", @@ -366,29 +368,32 @@ async def main() -> None: # Create the client. client = create_client(page_log) - # Choose the task from those listed at the top. - task_index = 0 + # Choose the tasks from those listed at the top. + task_index_list = [3, 1] # Choose the client, agent or team to assign the task to. task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one # Test, without using memory. - num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, 50) + # num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, 50) # Test, using memory. # num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials=3, reset_memory=True) - # Train and test, using memory. - # num_successes, num_trials = await train_and_test( - # task_index, - # 10, # Normally 10 - # 3, # Normally 3 - # task_assignment_callback, - # client, - # page_log) - - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) + # Train and test on any number of tasks using memory. + num_loops = 10 # Normally 10 + total_num_successes_list = await train_and_test( + task_index_list, + num_loops, + 10, # Normally 10 + 3, # Normally 3 + task_assignment_callback, + client, + page_log) + + for i, total_num_successes in enumerate(total_num_successes_list): + success_rate = round((total_num_successes / num_loops) * 100) + page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index c2703438b632..31e8ea4af169 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -92,13 +92,19 @@ async def add_insight_to_memory(self, task: str, insight: str): details="", method_call="AgenticMemory.add_insight_to_memory") + page.add_lines("\nGIVEN TASK:") + page.add_lines(task) + + page.add_lines("\nGIVEN INSIGHT:") + page.add_lines(insight) + # Generalize the task. generalized_task = await self.prompter.generalize_task(task) # Get a combined list of topics from the task and insight. task_plus_insight = generalized_task.strip() + "\n(Hint: " + insight + ")" topics = await self.prompter.find_index_topics(task_plus_insight) - page.add_lines("\nTOPICS EXTRACTED FROM TASK AND INSIGHT:") + page.add_lines("\nTOPICS EXTRACTED FROM TASK AND INSIGHT COMBINED:") page.add_lines("\n".join(topics)) page.add_lines("") From 20b26c10481258c45bc74a6e7e80bc18c2c8ab2c Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 30 Dec 2024 18:17:35 -0800 Subject: [PATCH 13/93] Add teachability and a test for it. --- .../samples/{learning_to_reason.py => amt.py} | 156 +++++++++++++++--- .../agentic_memory/_agentic_memory.py | 89 ++++++++++ .../autogen_ext/agentic_memory/_prompter.py | 31 ++++ 3 files changed, 253 insertions(+), 23 deletions(-) rename python/packages/autogen-ext/samples/{learning_to_reason.py => amt.py} (75%) diff --git a/python/packages/autogen-ext/samples/learning_to_reason.py b/python/packages/autogen-ext/samples/amt.py similarity index 75% rename from python/packages/autogen-ext/samples/learning_to_reason.py rename to python/packages/autogen-ext/samples/amt.py index d1e3a8785ce5..b7c6d31346af 100644 --- a/python/packages/autogen-ext/samples/learning_to_reason.py +++ b/python/packages/autogen-ext/samples/amt.py @@ -19,7 +19,7 @@ MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_27_3_g1_cl" +RUN_SUBDIR = "run_28_teach" # Default client parameters TEMPERATURE = 0.8 @@ -57,6 +57,11 @@ def define_tasks_with_answers(): "task": """You ask ten people 'How many of you are liars?' They all answer 'At least one of us is not a liar.' You happen to know that at least one of them IS a liar. How many of them are liars in total?""", "expected_answer": "All of them are liars."}) + # Task index 4 + tasks_with_answers.append({ + "task": "As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen?", + "expected_answer": "It's best to have your agent in its own repo, then add the autogen-extension topic to that repo."}) + return tasks_with_answers @@ -357,44 +362,149 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ return total_num_successes_list -async def main() -> None: - # Create the PageLog. - page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) +async def test_without_memory(task_assignment_callback, client, page_log): page = page_log.begin_page( - summary="main", + summary="test_without_memory", details='', - method_call="main") + method_call="test_without_memory") - # Create the client. - client = create_client(page_log) + task_index = 3 + num_trials = 1 - # Choose the tasks from those listed at the top. - task_index_list = [3, 1] + num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, num_trials) - # Choose the client, agent or team to assign the task to. - task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) + + page_log.finish_page(page) + + +async def test_with_memory(task_assignment_callback, client, page_log): + page = page_log.begin_page( + summary="test_with_memory", + details='', + method_call="test_with_memory") + + task_index = 3 + + num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, + num_trials=3, reset_memory=False) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) + + page_log.finish_page(page) - # Test, without using memory. - # num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, 50) - # Test, using memory. - # num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials=3, reset_memory=True) +async def test_self_teaching(task_assignment_callback, client, page_log): + page = page_log.begin_page( + summary="test_self_teaching", + details='', + method_call="test_self_teaching") + + # Choose the tasks from those listed at the top. + task_index_list = [3, 1] # Train and test on any number of tasks using memory. num_loops = 10 # Normally 10 total_num_successes_list = await train_and_test( - task_index_list, - num_loops, - 10, # Normally 10 - 3, # Normally 3 - task_assignment_callback, - client, - page_log) + task_index_list=task_index_list, + num_loops=num_loops, + max_train_trials=10, # Normally 10 + max_test_trials=3, # Normally 3 + task_assignment_callback=task_assignment_callback, + client=client, + page_log=page_log) for i, total_num_successes in enumerate(total_num_successes_list): success_rate = round((total_num_successes / num_loops) * 100) page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) + page_log.finish_page(page) + + +async def send_message_to_agent(text, task_assignment_callback, client, page_log, reset_memory) -> None: + page = page_log.begin_page( + summary="send_message_to_agent", + details="", + method_call="send_message_to_agent") + + memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, + memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) + response = await memory.handle_user_message(text, task_assignment_callback) + + page.add_lines("Response: {}\n".format(response), flush=True) + + page_log.finish_page(page) + return response + + +async def test_teachability(task_assignment_callback, client, page_log): + page = page_log.begin_page( + summary="test_teachability", + details='', + method_call="test_teachability") + + tasklist = define_tasks_with_answers() + task_index = 4 + task_with_answer = tasklist[task_index] + task = task_with_answer["task"] + answer = task_with_answer["expected_answer"] + grader = Grader(client, page_log) + + # First test without memory. + page.add_lines("Clearing memory, then asking the question.") + # task = "As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen?" + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=True) + + # Check the response. + response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) + + # Give the advice. + page.add_lines("Giving the advice.") + insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" + await send_message_to_agent(insight, task_assignment_callback, client, page_log, reset_memory=False) + + # Now ask the question again to see if the advice is retrieved from memory. + page.add_lines("Asking the question again to see if the advice is retrieved from memory.") + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=False) + + # Check the response. + response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) + + page_log.finish_page(page) + + + +async def main() -> None: + # Create the PageLog. + page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) + page = page_log.begin_page( + summary="main", + details='', + method_call="main") + + # Create the client. + client = create_client(page_log) + + # Choose the client, agent or team to assign the task to. + task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one + + # SELECT ONE TEST TO RUN + # await test_without_memory(task_assignment_callback, client, page_log) + # await test_with_memory(task_assignment_callback, client, page_log) + # await test_self_teaching(task_assignment_callback, client, page_log) + await test_teachability(task_assignment_callback, client, page_log) + page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 31e8ea4af169..77f193f50747 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -113,6 +113,27 @@ async def add_insight_to_memory(self, task: str, insight: str): self.page_log.finish_page(page) + async def add_insight_without_task_to_memory(self, insight: str): + # Adds an insight to the DB. + page = self.page_log.begin_page( + summary="AgenticMemory.add_insight_without_task_to_memory", + details="", + method_call="AgenticMemory.add_insight_without_task_to_memory") + + page.add_lines("\nGIVEN INSIGHT:") + page.add_lines(insight) + + # Get a list of topics from the insight. + topics = await self.prompter.find_index_topics(insight) + page.add_lines("\nTOPICS EXTRACTED FROM INSIGHT:") + page.add_lines("\n".join(topics)) + page.add_lines("") + + # Add the insight to the archive. + self.archive.add_insight(insight, None, topics) + + self.page_log.finish_page(page) + async def retrieve_relevant_insights(self, task: str): # Retrieve insights from the DB that are relevant to the task. page = self.page_log.begin_page( @@ -259,3 +280,71 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to page.add_lines("\n{}\n".format(final_response), flush=True) self.page_log.finish_page(page) return final_response, successful_insight + + async def _handle_task_or_advice(self, text): + page = self.page_log.begin_page( + summary="AgenticMemory._handle_task_or_advice", + details="", + method_call="AgenticMemory._handle_task_or_advice") + + task = await self.prompter.extract_task(text) + page.add_lines("Task: {}".format(task), flush=True) + + advice = await self.prompter.extract_advice(text) + page.add_lines("Advice: {}".format(advice), flush=True) + + self.page_log.finish_page(page) + return task, advice + + async def execute_task(self, task: str, task_assignment_callback: Callable, should_await: bool, + should_retrieve_insights: bool = True): + """ + Assigns a task to the completion agent, along with any relevant insights/memories. + """ + page = self.page_log.begin_page( + summary="AgenticMemory.execute_task", + details="", + method_call="AgenticMemory.execute_task") + + if should_retrieve_insights: + # Try to retrieve any relevant memories from the DB. + filtered_insights = await self.retrieve_relevant_insights(task) + if len(filtered_insights) > 0: + page.add_lines("Relevant insights were retrieved from memory.\n", flush=True) + memory_section = self.format_memory_section(filtered_insights) + task = task + '\n\n' + memory_section + + # Attempt to solve the task. + page.add_lines("Try to solve the task.\n", flush=True) + if should_await: + response, _ = await task_assignment_callback(task, self.client, self.page_log) + else: + response, _ = task_assignment_callback(task, self.client, self.page_log) + + # page.add_lines("Response: {}\n".format(response), flush=True) + + self.page_log.finish_page(page) + return response + + async def handle_user_message(self, text, task_assignment_callback, should_await=True): + page = self.page_log.begin_page( + summary="AgenticMemory.handle_user_message", + details="", + method_call="AgenticMemory.handle_user_message") + + # task = await self.prompter.extract_task(text) + # page.add_lines("Task: {}".format(task), flush=True) + + advice = await self.prompter.extract_advice(text) + page.add_lines("Advice: {}".format(advice), flush=True) + + if advice is not None: + print("Adding advice to memory.") + await self.add_insight_without_task_to_memory(advice) + + print("Passing task to completion agent.") + response = await self.execute_task(text, task_assignment_callback, should_await, + should_retrieve_insights=(advice is None)) + + self.page_log.finish_page(page) + return response diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index f39ae4bf224b..80b975e5a8b2 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -242,3 +242,34 @@ async def validate_insights(self, insights, task_description): details="to list the relevant insights") return [validated_insights] if validated_insights != "None" else [] + + async def extract_task(self, text): + # Returns a task from the given text, or None if none is found. + sys_message = """You are a helpful and thoughtful assistant.""" + user_message = ["""Does the following text contain a question or a some task we are being asked to perform? +- If so, please reply with the full question or task description, along with any supporting information, but without adding extra commentary or formatting. +- If the task is just to remember something, that doesn't count as a task, so don't include it. +- If there is no question or task in the text, simply write "None" with no punctuation."""] + user_message.append("\n# Text to analyze") + user_message.append(text) + self.clear_history() + response, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to extract a task") + return response if response != "None" else None + + async def extract_advice(self, text): + # Returns a task from the given text, or None if none is found. + sys_message = """You are a helpful and thoughtful assistant.""" + user_message = ["""Does the following text contain any information or advice that might be useful later? +- If so, please copy the information or advice, adding no extra commentary or formatting. +- If there is no potentially useful information or advice at all, simply write "None" with no punctuation."""] + user_message.append("\n# Text to analyze") + user_message.append(text) + self.clear_history() + response, page = await self.call_model( + system_message=sys_message, + user_content=user_message, + details="to extract advice") + return response if response != "None" else None From 9d4722767403663a8a5778da1dc7f1ad1a2b0726 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 31 Dec 2024 17:05:30 -0800 Subject: [PATCH 14/93] Learning from demonstration, in-progress. --- python/packages/autogen-ext/samples/amt.py | 69 ++++++++++++++++++- .../agentic_memory/_agentic_memory.py | 38 ++++++---- .../agentic_memory/_knowledge_archive.py | 12 ++++ 3 files changed, 102 insertions(+), 17 deletions(-) diff --git a/python/packages/autogen-ext/samples/amt.py b/python/packages/autogen-ext/samples/amt.py index b7c6d31346af..66db467d7591 100644 --- a/python/packages/autogen-ext/samples/amt.py +++ b/python/packages/autogen-ext/samples/amt.py @@ -19,7 +19,7 @@ MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_28_teach" +RUN_SUBDIR = "run_29_demonstration" # Default client parameters TEMPERATURE = 0.8 @@ -62,6 +62,11 @@ def define_tasks_with_answers(): "task": "As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen?", "expected_answer": "It's best to have your agent in its own repo, then add the autogen-extension topic to that repo."}) + # Task index 5 + tasks_with_answers.append({ + "task": "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 18, 11, 8, 9, 5, 2. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value.", + "expected_answer": "2"}) + return tasks_with_answers @@ -485,6 +490,65 @@ async def test_teachability(task_assignment_callback, client, page_log): +async def give_demonstration_to_agent(task, demonstration, client, page_log) -> None: + page = page_log.begin_page( + summary="give_demonstration_to_agent", + details="", + method_call="give_demonstration_to_agent") + + memory = AgenticMemory(reset=False, client=client, page_log=page_log, memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) + await memory.learn_from_demonstration(task, demonstration) + + page_log.finish_page(page) + + +async def test_learning_from_demonstration(task_assignment_callback, client, page_log): + page = page_log.begin_page( + summary="test_learning_from_demonstration", + details='', + method_call="test_learning_from_demonstration") + + tasklist = define_tasks_with_answers() + task_index = 5 + task_with_answer = tasklist[task_index] + task = task_with_answer["task"] + answer = task_with_answer["expected_answer"] + grader = Grader(client, page_log) + + # First test without memory. + page.add_lines("Clearing memory, then assigning the task.") + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=True) + + # Check the response. + response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) + + # Provide the demonstration. + page.add_lines("Demonstrating a solution to a similar task.") + demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 10, 12, 17, 18, 19, 20, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." + demonstration = "First I sort the houses by location: 3, 6, 10, 12, 17, 18, 19, 20. Then I start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so I place a tower at 7. How far would that tower at 7 reach? Radius means in both directions, so while it reaches the house at 3, it also reaches up to mile 11. And that would cover the house at 10. The next uncovered house would be at 12, so a second tower is required. It could go at 16 (16 + 4) and it would reach further, up to mile 20 (16 + 4) where the last house is located. So 2 towers are enough." + await give_demonstration_to_agent(demo_task, demonstration, client, page_log) + + # Now assign the task again to see if the demonstration helps. + page.add_lines("Assigning the task again to see if the demonstration is useful.") + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=False) + + # Check the response. + response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) + + page_log.finish_page(page) + + + async def main() -> None: # Create the PageLog. page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) @@ -503,7 +567,8 @@ async def main() -> None: # await test_without_memory(task_assignment_callback, client, page_log) # await test_with_memory(task_assignment_callback, client, page_log) # await test_self_teaching(task_assignment_callback, client, page_log) - await test_teachability(task_assignment_callback, client, page_log) + # await test_teachability(task_assignment_callback, client, page_log) + await test_learning_from_demonstration(task_assignment_callback, client, page_log) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 77f193f50747..0981a2768bc3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -281,21 +281,6 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to self.page_log.finish_page(page) return final_response, successful_insight - async def _handle_task_or_advice(self, text): - page = self.page_log.begin_page( - summary="AgenticMemory._handle_task_or_advice", - details="", - method_call="AgenticMemory._handle_task_or_advice") - - task = await self.prompter.extract_task(text) - page.add_lines("Task: {}".format(task), flush=True) - - advice = await self.prompter.extract_advice(text) - page.add_lines("Advice: {}".format(advice), flush=True) - - self.page_log.finish_page(page) - return task, advice - async def execute_task(self, task: str, task_assignment_callback: Callable, should_await: bool, should_retrieve_insights: bool = True): """ @@ -348,3 +333,26 @@ async def handle_user_message(self, text, task_assignment_callback, should_await self.page_log.finish_page(page) return response + + async def learn_from_demonstration(self, task, demonstration): + page = self.page_log.begin_page( + summary="AgenticMemory.learn_from_demonstration", + details="", + method_call="AgenticMemory.learn_from_demonstration") + + page.add_lines("\nEXAMPLE TASK:") + page.add_lines(task) + + page.add_lines("\nEXAMPLE DEMONSTRATION:") + page.add_lines(demonstration) + + # Get a list of topics from the task. + topics = await self.prompter.find_index_topics(task.strip()) + page.add_lines("\nTOPICS EXTRACTED FROM TASK:") + page.add_lines("\n".join(topics)) + page.add_lines("") + + # Add the insight to the archive. + self.archive.add_demonstration(task, demonstration, topics) + + self.page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py index 97b01e9f5a63..3f62039408ed 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py @@ -101,3 +101,15 @@ def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional del insight_relevance_dict[insight] return insight_relevance_dict + + def add_demonstration(self, task: str, demonstration: str, topics: List[str]): + """Adds a task-demonstration pair (as a single insight) to the knowledge archive.""" + self.last_insight_id += 1 + id_str = str(self.last_insight_id) + insight_str = "Example task:\n\n{}\nExample solution:\n\n{}".format(task, demonstration) + insight = Insight(id=id_str, insight_str=insight_str, task_str=task, topics=topics) + for topic in topics: + # Add a mapping in the vec DB from each topic to the insight. + self.memo_store.add_input_output_pair(topic, id_str) + self.uid_insight_dict[str(id_str)] = insight + self.save_archive() From 52d4e00643d366d45b84c61b8a73c8a0126ff402 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 1 Jan 2025 10:21:39 -0800 Subject: [PATCH 15/93] In memory retrieval, validate insights separately rather than together. --- python/packages/autogen-ext/samples/amt.py | 26 ++++++++------- .../agentic_memory/_agentic_memory.py | 8 +++-- .../autogen_ext/agentic_memory/_prompter.py | 33 +++++++------------ 3 files changed, 31 insertions(+), 36 deletions(-) diff --git a/python/packages/autogen-ext/samples/amt.py b/python/packages/autogen-ext/samples/amt.py index 66db467d7591..b1017999fe99 100644 --- a/python/packages/autogen-ext/samples/amt.py +++ b/python/packages/autogen-ext/samples/amt.py @@ -19,7 +19,7 @@ MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_29_demonstration" +RUN_SUBDIR = "run_31_3_g1_m1" # Default client parameters TEMPERATURE = 0.8 @@ -64,7 +64,7 @@ def define_tasks_with_answers(): # Task index 5 tasks_with_answers.append({ - "task": "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 18, 11, 8, 9, 5, 2. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value.", + "task": "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 17, 19, 9, 10, 11, 2, 4, 5. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value.", "expected_answer": "2"}) return tasks_with_answers @@ -326,7 +326,8 @@ async def test_on_task(task_index, task_assignment_callback, client, page_log, n return num_successes, num_trials -async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_trials, task_assignment_callback, client, page_log): +async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_trials, num_final_test_trials, + task_assignment_callback, client, page_log): page = page_log.begin_page( summary="train_and_test", details='', @@ -336,6 +337,7 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ task_with_answer_list = [tasklist[task_index] for task_index in task_index_list] total_num_successes_list = [0 for _ in task_index_list] + total_num_trials = 0 for i in range(num_loops): # Always train on the first task. await train( @@ -351,7 +353,7 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ for j, task_with_answer in enumerate(task_with_answer_list): last_response, num_successes, num_trials = await test( task_with_answer=task_with_answer, - num_trials=max_test_trials, + num_trials=num_final_test_trials, task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=False, @@ -360,11 +362,12 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) total_num_successes_list[j] += num_successes + total_num_trials += num_final_test_trials page.add_lines("") page_log.finish_page(page) - return total_num_successes_list + return total_num_successes_list, total_num_trials async def test_without_memory(task_assignment_callback, client, page_log): @@ -373,8 +376,8 @@ async def test_without_memory(task_assignment_callback, client, page_log): details='', method_call="test_without_memory") - task_index = 3 - num_trials = 1 + task_index = 5 + num_trials = 20 num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, num_trials) @@ -411,17 +414,18 @@ async def test_self_teaching(task_assignment_callback, client, page_log): # Train and test on any number of tasks using memory. num_loops = 10 # Normally 10 - total_num_successes_list = await train_and_test( + total_num_successes_list, total_num_trials = await train_and_test( task_index_list=task_index_list, num_loops=num_loops, max_train_trials=10, # Normally 10 max_test_trials=3, # Normally 3 + num_final_test_trials=3, # Normally 3 task_assignment_callback=task_assignment_callback, client=client, page_log=page_log) for i, total_num_successes in enumerate(total_num_successes_list): - success_rate = round((total_num_successes / num_loops) * 100) + success_rate = round((total_num_successes / total_num_trials) * 100) page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) page_log.finish_page(page) @@ -566,9 +570,9 @@ async def main() -> None: # SELECT ONE TEST TO RUN # await test_without_memory(task_assignment_callback, client, page_log) # await test_with_memory(task_assignment_callback, client, page_log) - # await test_self_teaching(task_assignment_callback, client, page_log) + await test_self_teaching(task_assignment_callback, client, page_log) # await test_teachability(task_assignment_callback, client, page_log) - await test_learning_from_demonstration(task_assignment_callback, client, page_log) + # await test_learning_from_demonstration(task_assignment_callback, client, page_log) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 0981a2768bc3..8e347dbcb76c 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -161,10 +161,12 @@ async def retrieve_relevant_insights(self, task: str): page.add_lines("\n INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) relevant_insights.append(insight) + # Apply a final validation stage to keep only the insights that the LLM concludes are relevant. validated_insights = [] - if len(relevant_insights) > 0: - # Apply a final validation stage to keep only the insights that the LLM concludes are relevant. - validated_insights = await self.prompter.validate_insights(relevant_insights, task) + for insight in relevant_insights: + if await self.prompter.validate_insight(insight, task): + validated_insights.append(insight) + page.add_lines("\n{} VALIDATED INSIGHTS".format(len(validated_insights))) for insight in validated_insights: page.add_lines("\n INSIGHT: {}".format(insight)) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index 80b975e5a8b2..e9f6aa1b51f5 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -210,38 +210,27 @@ async def generalize_task(self, task_description): return generalized_task - async def validate_insights(self, insights, task_description): - # Returns only the insights that the client verifies are relevant to the task. + async def validate_insight(self, insight, task_description): + # Determines whether the insight could help solve the task. sys_message = """You are a helpful and thoughtful assistant.""" - user_message = ["""We have been given a list of insights that may or may not be useful for solving the given task. + user_message = ["""We have been given a potential insight that may or may not be useful for solving a given task. - First review the following task. -- Then review the list of insights that follow, and discuss which ones could be useful in solving the given task. -- Do not attempt to actually solve the task. That will come later."""] +- Then review the insight that follows, and consider whether it might help solve the given task. +- Do not attempt to actually solve the task. +- Reply with a single character, '1' if the insight may be useful, or '0' if it is not."""] user_message.append("\n# Task description") user_message.append(task_description) - user_message.append("\n# Possibly useful insights") - user_message.extend(insights) + user_message.append("\n# Possibly useful insight") + user_message.append(insight) self.clear_history() - response1, page = await self.call_model( - system_message=sys_message, - user_content=user_message, - details="to review the task and insights") - - user_message = ["""Now output a verbatim copy the insights that you decided are relevant to the task. -- The original list of insights is provided below for reference. -- If an insight is not relevant to the task, simply omit it from your response. -- Do not add any additional commentary either before or after the relevant tasks. -- If none of the tasks are relevant, simply write "None"."""] - user_message.append("\n# Original list of possibly useful insights") - user_message.extend(insights) - validated_insights, page = await self.call_model( + response, page = await self.call_model( system_message=sys_message, user_content=user_message, - details="to list the relevant insights") + details="to validate the insight") - return [validated_insights] if validated_insights != "None" else [] + return response == "1" async def extract_task(self, text): # Returns a task from the given text, or None if none is found. From 6b15777c317ad246ec33f61eca531139a58c1314 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 1 Jan 2025 16:57:39 -0800 Subject: [PATCH 16/93] Finish learning from demonstration. Seed messages with random int for variability. --- python/packages/autogen-ext/samples/amt.py | 69 +++++++++---------- .../agentic_memory/_agentic_memory.py | 62 +++++++++-------- .../agentic_memory/_knowledge_archive.py | 5 +- .../autogen_ext/agentic_memory/_prompter.py | 37 ++++++---- 4 files changed, 92 insertions(+), 81 deletions(-) diff --git a/python/packages/autogen-ext/samples/amt.py b/python/packages/autogen-ext/samples/amt.py index b1017999fe99..5fb331a6e90a 100644 --- a/python/packages/autogen-ext/samples/amt.py +++ b/python/packages/autogen-ext/samples/amt.py @@ -1,3 +1,5 @@ +import time +import random import asyncio from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.models.openai import AzureOpenAIChatCompletionClient @@ -19,7 +21,7 @@ MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_31_3_g1_m1" +RUN_SUBDIR = "run_32" # Default client parameters TEMPERATURE = 0.8 @@ -64,7 +66,7 @@ def define_tasks_with_answers(): # Task index 5 tasks_with_answers.append({ - "task": "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 17, 19, 9, 10, 11, 2, 4, 5. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value.", + "task": "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 17, 19, 11, 9, 10, 2, 5, 4. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value.", "expected_answer": "2"}) return tasks_with_answers @@ -212,14 +214,22 @@ async def assign_task_to_client(task, client, page_log): page.add_lines(task) - system_message = SystemMessage(content="""You are a helpful and thoughtful assistant. + system_message_content = """You are a helpful and thoughtful assistant. In responding to every user message, you follow the same multi-step process given here: 1. Explain your understanding of the user message in detail, covering all the important points. 2. List as many possible responses as you can think of. 3. Carefully list and weigh the pros and cons (if any) of each possible response. 4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. 5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. -6. Finish by providing your final response in the particular format requested by the user.""") +6. Finish by providing your final response in the particular format requested by the user.""" + + # Randomize the system message content to add variability. + random.seed(int(time.time() * 1000)) + rand = random.Random() + random_str = "({})\n\n".format(rand.randint(0, 1000000)) # Inject a random int for variability. + system_message_content = random_str + system_message_content + + system_message = SystemMessage(content=system_message_content) user_message = UserMessage(content=task, source="User") input_messages = [system_message] + [user_message] @@ -512,47 +522,32 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag details='', method_call="test_learning_from_demonstration") - tasklist = define_tasks_with_answers() task_index = 5 - task_with_answer = tasklist[task_index] - task = task_with_answer["task"] - answer = task_with_answer["expected_answer"] - grader = Grader(client, page_log) - - # First test without memory. - page.add_lines("Clearing memory, then assigning the task.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=True) + num_trials = 10 - # Check the response. - response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) - if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) - else: - page.add_lines("Answer is INCORRECT.\n", flush=True) + # First test after clearing memory. + page.add_lines("To get a baseline, clear memory, then assign the task.") + num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, + num_trials=num_trials, reset_memory=True) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) # Provide the demonstration. - page.add_lines("Demonstrating a solution to a similar task.") - demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 10, 12, 17, 18, 19, 20, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." - demonstration = "First I sort the houses by location: 3, 6, 10, 12, 17, 18, 19, 20. Then I start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so I place a tower at 7. How far would that tower at 7 reach? Radius means in both directions, so while it reaches the house at 3, it also reaches up to mile 11. And that would cover the house at 10. The next uncovered house would be at 12, so a second tower is required. It could go at 16 (16 + 4) and it would reach further, up to mile 20 (16 + 4) where the last house is located. So 2 towers are enough." + page.add_lines("Demonstrate a solution to a similar task.") + demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." + demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." await give_demonstration_to_agent(demo_task, demonstration, client, page_log) - # Now assign the task again to see if the demonstration helps. - page.add_lines("Assigning the task again to see if the demonstration is useful.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=False) - - # Check the response. - response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) - if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) - else: - page.add_lines("Answer is INCORRECT.\n", flush=True) + # Now test again to see if the demonstration (retrieved from memory) helps. + page.add_lines("Assign the task again to see if the demonstration helps.") + num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, + num_trials=num_trials, reset_memory=False) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) page_log.finish_page(page) - async def main() -> None: # Create the PageLog. page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) @@ -570,9 +565,9 @@ async def main() -> None: # SELECT ONE TEST TO RUN # await test_without_memory(task_assignment_callback, client, page_log) # await test_with_memory(task_assignment_callback, client, page_log) - await test_self_teaching(task_assignment_callback, client, page_log) + # await test_self_teaching(task_assignment_callback, client, page_log) # await test_teachability(task_assignment_callback, client, page_log) - # await test_learning_from_demonstration(task_assignment_callback, client, page_log) + await test_learning_from_demonstration(task_assignment_callback, client, page_log) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 8e347dbcb76c..49fbe0d78f81 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -141,35 +141,39 @@ async def retrieve_relevant_insights(self, task: str): details="", method_call="AgenticMemory.retrieve_relevant_insights") - page.add_lines("\nCURRENT TASK:") - page.add_lines(task) - - # Generalize the task. - generalized_task = await self.prompter.generalize_task(task) - - # Get a list of topics from the task. - topics = await self.prompter.find_index_topics(generalized_task) - page.add_lines("\nTOPICS EXTRACTED FROM TASK:") - page.add_lines("\n".join(topics)) - page.add_lines("") - - # Retrieve relevant insights from the archive. - relevant_insights_and_relevances = self.archive.get_relevant_insights(topics=topics) - relevant_insights = [] - page.add_lines("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) - for insight, relevance in relevant_insights_and_relevances.items(): - page.add_lines("\n INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) - relevant_insights.append(insight) - - # Apply a final validation stage to keep only the insights that the LLM concludes are relevant. - validated_insights = [] - for insight in relevant_insights: - if await self.prompter.validate_insight(insight, task): - validated_insights.append(insight) - - page.add_lines("\n{} VALIDATED INSIGHTS".format(len(validated_insights))) - for insight in validated_insights: - page.add_lines("\n INSIGHT: {}".format(insight)) + if self.archive.contains_insights(): + page.add_lines("\nCURRENT TASK:") + page.add_lines(task) + + # Generalize the task. + generalized_task = await self.prompter.generalize_task(task) + + # Get a list of topics from the task. + topics = await self.prompter.find_index_topics(generalized_task) + page.add_lines("\nTOPICS EXTRACTED FROM TASK:") + page.add_lines("\n".join(topics)) + page.add_lines("") + + # Retrieve relevant insights from the archive. + relevant_insights_and_relevances = self.archive.get_relevant_insights(topics=topics) + relevant_insights = [] + page.add_lines("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) + for insight, relevance in relevant_insights_and_relevances.items(): + page.add_lines("\n INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) + relevant_insights.append(insight) + + # Apply a final validation stage to keep only the insights that the LLM concludes are relevant. + validated_insights = [] + for insight in relevant_insights: + if await self.prompter.validate_insight(insight, task): + validated_insights.append(insight) + + page.add_lines("\n{} VALIDATED INSIGHTS".format(len(validated_insights))) + for insight in validated_insights: + page.add_lines("\n INSIGHT: {}".format(insight)) + else: + page.add_lines("\nNO INSIGHTS WERE FOUND IN MEMORY") + validated_insights = [] self.page_log.finish_page(page) return validated_insights diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py index 3f62039408ed..24b67509637b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py @@ -54,6 +54,9 @@ def __init__( self.last_insight_id = len(self.uid_insight_dict) parent_page.add_lines("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) + def contains_insights(self): + return len(self.uid_insight_dict) > 0 + def save_archive(self): self.memo_store.save_memos() parent_page = self.page_log.last_page() @@ -106,7 +109,7 @@ def add_demonstration(self, task: str, demonstration: str, topics: List[str]): """Adds a task-demonstration pair (as a single insight) to the knowledge archive.""" self.last_insight_id += 1 id_str = str(self.last_insight_id) - insight_str = "Example task:\n\n{}\nExample solution:\n\n{}".format(task, demonstration) + insight_str = "Example task:\n\n{}\n\nExample solution:\n\n{}".format(task, demonstration) insight = Insight(id=id_str, insight_str=insight_str, task_str=task, topics=topics) for topic in topics: # Add a mapping in the vec DB from each topic to the insight. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index e9f6aa1b51f5..f3ac6dab0220 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -1,4 +1,5 @@ import time +import random from typing import List from autogen_core.models import ( @@ -18,19 +19,27 @@ class Prompter: def __init__(self, client, page_log): self.client = client self.page_log = page_log + self.default_system_message_content = "You are a helpful assistant." self.time_spent_in_model_calls = 0. self.num_model_calls = 0 self.start_time = time.time() + # Instantiate a random number generator, seeded from the current time. + random.seed(int(time.time() * 1000)) + self.rand = random.Random() + # Create the chat history self._chat_history: List[LLMMessage] = [] - async def call_model(self, details, user_content: UserContent = None, system_message=None, keep_these_messages=True): + async def call_model(self, details, user_content: UserContent = None, system_message_content=None, keep_these_messages=True): # Prepare the input message list user_message = UserMessage(content=user_content, source="User") - if system_message is None: - system_message = self.default_system_message - system_message = SystemMessage(content=system_message) + + if system_message_content is None: + system_message_content = self.default_system_message_content + random_str = "({})\n\n".format(self.rand.randint(0, 1000000)) # Inject a random int for variability. + system_message_content = random_str + system_message_content + system_message = SystemMessage(content=system_message_content) input_messages = [system_message] + self._chat_history + [user_message] @@ -125,14 +134,14 @@ async def learn_from_failure(self, task_description, memory_section, final_respo self.clear_history() response1, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to learn from this failure") user_message = [ "Now put yourself in the mind of the students. What misconception led them to their incorrect answer?"] response2, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to state the misconception") @@ -144,7 +153,7 @@ async def learn_from_failure(self, task_description, memory_section, final_respo # user_message.append(memory_section) insight, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to formulate a concise insight") @@ -169,7 +178,7 @@ async def find_index_topics(self, input_string): self.clear_history() topics, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to extract topics") @@ -192,19 +201,19 @@ async def generalize_task(self, task_description): self.clear_history() response1, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to rephrase the task in a list of important points") user_message = ["Do you see any parts of this list that are irrelevant to actually solving the task? If so, explain which items are irrelevant."] response2, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to identify irrelevant points") user_message = ["Revise your original list to include only the most general terms, those that are critical to solving the task, removing any themes or descriptions that are not essential to the solution. Your final list may be shorter, but do not leave out any part of the task that is needed for solving the task. Do not add any additional commentary either before or after the list."] generalized_task, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to make a final list of general terms") @@ -226,7 +235,7 @@ async def validate_insight(self, insight, task_description): user_message.append(insight) self.clear_history() response, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to validate the insight") @@ -243,7 +252,7 @@ async def extract_task(self, text): user_message.append(text) self.clear_history() response, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to extract a task") return response if response != "None" else None @@ -258,7 +267,7 @@ async def extract_advice(self, text): user_message.append(text) self.clear_history() response, page = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to extract advice") return response if response != "None" else None From a18674c30b5900a9090318c101367966df635ee7 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 2 Jan 2025 18:05:10 -0800 Subject: [PATCH 17/93] Added RecordableChatCompletionClient as a guardrail during refactoring. --- python/packages/autogen-ext/samples/amt.py | 62 ++++++++++-- .../autogen_ext/agentic_memory/__init__.py | 3 +- .../recordable_chat_completion_client.py | 94 +++++++++++++++++++ 3 files changed, 152 insertions(+), 7 deletions(-) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/recordable_chat_completion_client.py diff --git a/python/packages/autogen-ext/samples/amt.py b/python/packages/autogen-ext/samples/amt.py index 5fb331a6e90a..0dc30c4d15e6 100644 --- a/python/packages/autogen-ext/samples/amt.py +++ b/python/packages/autogen-ext/samples/amt.py @@ -16,12 +16,12 @@ from typing import ( Tuple, ) -from autogen_ext.agentic_memory import AgenticMemory, PageLog, Grader +from autogen_ext.agentic_memory import AgenticMemory, PageLog, Grader, RecordableChatCompletionClient MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_32" +RUN_SUBDIR = "run_33" # Default client parameters TEMPERATURE = 0.8 @@ -95,7 +95,7 @@ def create_oai_client(page_log): if page_log is not None: page_log.append_entry_line("Client: {}".format(client._resolved_model)) page_log.append_entry_line(" created through OpenAI directly") - page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) + page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) return client @@ -121,7 +121,7 @@ def create_aoai_client(page_log): if page_log is not None: page_log.append_entry_line("Client: {}".format(client._resolved_model)) page_log.append_entry_line(" created through Azure OpenAI") - page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) + page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) return client @@ -162,7 +162,7 @@ def create_trapi_client(page_log): if page_log is not None: page_log.append_entry_line("Client: {}".format(client._resolved_model)) page_log.append_entry_line(" created through TRAPI") - page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) + page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) return client @@ -548,6 +548,55 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag page_log.finish_page(page) +async def call_client(task, client, page_log): + page = page_log.begin_page( + summary="call_client", + details='', + method_call="call_client") + + page.add_lines(task) + + system_message_content = """You are a helpful and thoughtful assistant.""" + + system_message = SystemMessage(content=system_message_content) + user_message = UserMessage(content=task, source="User") + + input_messages = [system_message] + [user_message] + response = await client.create(input_messages) + response_str = response.content + + # Log the model call + page_log.add_model_call(description="Ask the model", + details="to complete the task", input_messages=input_messages, + response=response, + num_input_tokens=0, caller='assign_task_to_client') + page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + + page_log.finish_page(page) + return response_str + + +async def test_recordable_client(client, page_log): + page = page_log.begin_page( + summary="test_recordable_client", + details='', + method_call="test_recordable_client") + + # Define a simple task. + task = "What is 4^4?" + + # Use a client wrapper to record a session. + client1 = RecordableChatCompletionClient(client, "record", page_log) + await call_client(task, client1, page_log) + client1.save() + + # Use a second client wrapper to check and replay the session. + client2 = RecordableChatCompletionClient(client, "check-replay", page_log) + await call_client(task, client2, page_log) + + page_log.finish_page(page) + + async def main() -> None: # Create the PageLog. page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) @@ -567,7 +616,8 @@ async def main() -> None: # await test_with_memory(task_assignment_callback, client, page_log) # await test_self_teaching(task_assignment_callback, client, page_log) # await test_teachability(task_assignment_callback, client, page_log) - await test_learning_from_demonstration(task_assignment_callback, client, page_log) + # await test_learning_from_demonstration(task_assignment_callback, client, page_log) + await test_recordable_client(client, page_log) page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index 26e6c64f98a5..02acd03d2937 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,5 +1,6 @@ from ._agentic_memory import AgenticMemory from ._page_log import PageLog from ._grader import Grader +from .recordable_chat_completion_client import RecordableChatCompletionClient -__all__ = ["AgenticMemory", "PageLog", "Grader"] +__all__ = ["AgenticMemory", "PageLog", "Grader", "RecordableChatCompletionClient"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/recordable_chat_completion_client.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/recordable_chat_completion_client.py new file mode 100644 index 000000000000..a13017fbc034 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/recordable_chat_completion_client.py @@ -0,0 +1,94 @@ +import os +import json +from typing import Any, List, Dict, Mapping, Optional, Sequence +from autogen_ext.models.openai import AzureOpenAIChatCompletionClient +from autogen_core import CancellationToken +from autogen_core.models import ( + CreateResult, + LLMMessage, + RequestUsage, +) +from autogen_core.tools import Tool, ToolSchema +from autogen_ext.agentic_memory import PageLog + + +class RecordableChatCompletionClient: + """ + Wraps a client object to record messages and responses (in record mode) + or check the messages and replay the responses (in check-replay mode). + """ + def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, page_log: PageLog) -> None: + self.base_client = base_client + self.mode = mode + self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), "session.json") + if page_log is not None: + page_log.append_entry_line("Wrapped the base client in a RecordableChatCompletionClient.") + if self.mode == "record": + # Prepare to record the messages and responses. + page_log.append_entry_line("Recording mode enabled.") + self.recorded_turns = [] + elif self.mode == "check-replay": + # Load the recorded messages and responses from disk. + page_log.append_entry_line("Replay-check mode enabled.") + self.recorded_turns = self.load() + self.next_turn = 0 + + async def create( + self, + messages: Sequence[LLMMessage], + tools: Sequence[Tool | ToolSchema] = [], + json_output: Optional[bool] = None, + extra_create_args: Mapping[str, Any] = {}, + cancellation_token: Optional[CancellationToken] = None, + ) -> CreateResult: + if self.mode == "pass-through": + return await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) + elif self.mode == "record": + response = await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) + self.record(messages, response) + return response + elif self.mode == "check-replay": + recorded_response = self.replay_and_check(messages) + return recorded_response + else: + raise ValueError(f"Invalid mode: {self.mode}") + + def convert_messages(self, messages: Sequence[LLMMessage]) -> List[Dict[str, str]]: + converted_messages = [] + for message in messages: + turn = {"content": message.content, "source": 'System' if message.type == "SystemMessage" else message.source} + converted_messages.append(turn) + return converted_messages + + def record(self, messages: Sequence[LLMMessage], response: CreateResult) -> None: + # Record the messages and response. + converted_messages = self.convert_messages(messages) + turn = {"messages": converted_messages, "response": response.content} + self.recorded_turns.append(turn) + + def replay_and_check(self, messages): + # Compare the messages to the recorded messages, and return the recorded response. + assert self.next_turn < len(self.recorded_turns) + recorded_turn = self.recorded_turns[self.next_turn] + self.next_turn += 1 + recorded_messages = recorded_turn["messages"] + converted_messages = self.convert_messages(messages) + assert converted_messages == recorded_messages + response = recorded_turn["response"] + cur_usage = RequestUsage(prompt_tokens=0, completion_tokens=0) + result = CreateResult(finish_reason="stop", content=response, usage=cur_usage, cached=True) + return result + + def save(self) -> None: + # Save the recorded messages and responses to disk. + session = {"turns": self.recorded_turns} + with open(self.path_to_output_file, "w", encoding="utf-8") as file: + json.dump(session, file, ensure_ascii=False, indent=4, sort_keys=True) + + def load(self): + # Load the recorded messages and responses from disk. + recorded_turns = [] + with open(self.path_to_output_file, "r", encoding="utf-8") as file: + session = json.load(file) + recorded_turns = session["turns"] + return recorded_turns From 52e213efc56def2c657f05fda598841c8f7974e8 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sun, 5 Jan 2025 13:30:35 -0800 Subject: [PATCH 18/93] Ran 3 evals with session recording and replay. --- python/packages/autogen-ext/samples/amt.py | 77 +++++++++--------- .../autogen_ext/agentic_memory/__init__.py | 4 +- .../agentic_memory/_agentic_memory.py | 4 +- .../src/autogen_ext/agentic_memory/_grader.py | 19 +++-- .../autogen_ext/agentic_memory/_page_log.py | 7 +- .../autogen_ext/agentic_memory/_prompter.py | 12 +-- ...completion_client.py => client_wrapper.py} | 78 +++++++++++++++---- 7 files changed, 125 insertions(+), 76 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{recordable_chat_completion_client.py => client_wrapper.py} (53%) diff --git a/python/packages/autogen-ext/samples/amt.py b/python/packages/autogen-ext/samples/amt.py index 0dc30c4d15e6..6204a8bba7e2 100644 --- a/python/packages/autogen-ext/samples/amt.py +++ b/python/packages/autogen-ext/samples/amt.py @@ -1,5 +1,3 @@ -import time -import random import asyncio from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.models.openai import AzureOpenAIChatCompletionClient @@ -16,12 +14,12 @@ from typing import ( Tuple, ) -from autogen_ext.agentic_memory import AgenticMemory, PageLog, Grader, RecordableChatCompletionClient +from autogen_ext.agentic_memory import AgenticMemory, PageLog, Grader, ClientWrapper MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_33" +RUN_SUBDIR = "run_44" # Default client parameters TEMPERATURE = 0.8 @@ -69,6 +67,16 @@ def define_tasks_with_answers(): "task": "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 17, 19, 11, 9, 10, 2, 5, 4. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value.", "expected_answer": "2"}) + # Task index 6 + tasks_with_answers.append({ + "task": "What is 4^4?", + "expected_answer": "256"}) + + # Task index 7 + tasks_with_answers.append({ + "task": "What is 3^3?", + "expected_answer": "27"}) + return tasks_with_answers @@ -223,12 +231,6 @@ async def assign_task_to_client(task, client, page_log): 5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. 6. Finish by providing your final response in the particular format requested by the user.""" - # Randomize the system message content to add variability. - random.seed(int(time.time() * 1000)) - rand = random.Random() - random_str = "({})\n\n".format(rand.randint(0, 1000000)) # Inject a random int for variability. - system_message_content = random_str + system_message_content - system_message = SystemMessage(content=system_message_content) user_message = UserMessage(content=task, source="User") @@ -295,7 +297,7 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor page.add_lines("Try to solve the task.\n", flush=True) response, _ = await task_assignment_callback(task_with_answer["task"], client, page_log) - response_is_correct, extracted_answer = await grader.response_is_correct( + response_is_correct, extracted_answer = await grader.is_response_correct( task_with_answer["task"], response, task_with_answer["expected_answer"]) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: @@ -471,12 +473,11 @@ async def test_teachability(task_assignment_callback, client, page_log): grader = Grader(client, page_log) # First test without memory. - page.add_lines("Clearing memory, then asking the question.") - # task = "As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen?" + page.add_lines("\nClear memory, then ask the question.") response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=True) # Check the response. - response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) + response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) @@ -484,16 +485,16 @@ async def test_teachability(task_assignment_callback, client, page_log): page.add_lines("Answer is INCORRECT.\n", flush=True) # Give the advice. - page.add_lines("Giving the advice.") + page.add_lines("Give the advice.") insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" await send_message_to_agent(insight, task_assignment_callback, client, page_log, reset_memory=False) # Now ask the question again to see if the advice is retrieved from memory. - page.add_lines("Asking the question again to see if the advice is retrieved from memory.") + page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=False) # Check the response. - response_is_correct, extracted_answer = await grader.response_is_correct(task, response, answer) + response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) @@ -576,27 +577,6 @@ async def call_client(task, client, page_log): return response_str -async def test_recordable_client(client, page_log): - page = page_log.begin_page( - summary="test_recordable_client", - details='', - method_call="test_recordable_client") - - # Define a simple task. - task = "What is 4^4?" - - # Use a client wrapper to record a session. - client1 = RecordableChatCompletionClient(client, "record", page_log) - await call_client(task, client1, page_log) - client1.save() - - # Use a second client wrapper to check and replay the session. - client2 = RecordableChatCompletionClient(client, "check-replay", page_log) - await call_client(task, client2, page_log) - - page_log.finish_page(page) - - async def main() -> None: # Create the PageLog. page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) @@ -617,7 +597,26 @@ async def main() -> None: # await test_self_teaching(task_assignment_callback, client, page_log) # await test_teachability(task_assignment_callback, client, page_log) # await test_learning_from_demonstration(task_assignment_callback, client, page_log) - await test_recordable_client(client, page_log) + + + # WRAPPED-CLIENT TESTS + + # Wrap the client in a ClientWrapper to record or check-replay a session. + session_name = "teach-1" + + # Record + # client = ClientWrapper(client, "record", session_name, page_log) + # # await test_teachability(task_assignment_callback, client, page_log) + # # await test_learning_from_demonstration(task_assignment_callback, client, page_log) + # # await test_self_teaching(task_assignment_callback, client, page_log) + # client.save() + + # Check-replay + client = ClientWrapper(client, "check-replay", session_name, page_log) + await test_teachability(task_assignment_callback, client, page_log) + # await test_learning_from_demonstration(task_assignment_callback, client, page_log) + # await test_self_teaching(task_assignment_callback, client, page_log) + page_log.flush(final=True) # Finalize the page log page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index 02acd03d2937..1e4df35e5b76 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,6 +1,6 @@ from ._agentic_memory import AgenticMemory from ._page_log import PageLog from ._grader import Grader -from .recordable_chat_completion_client import RecordableChatCompletionClient +from .client_wrapper import ClientWrapper -__all__ = ["AgenticMemory", "PageLog", "Grader", "RecordableChatCompletionClient"] +__all__ = ["AgenticMemory", "PageLog", "Grader", "ClientWrapper"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 49fbe0d78f81..1ea277c5db3e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -70,7 +70,7 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca page.add_lines("Try to solve the task.\n", flush=True) response, _ = await task_assignment_callback(task_plus_insights, self.client, self.page_log) - response_is_correct, extracted_answer = await self.grader.response_is_correct( + response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: @@ -209,7 +209,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a page.add_lines("Try to solve the task.", flush=True) response, work_history = await assign_task_to_completer(task_plus_insights, self.client, self.page_log) - response_is_correct, extracted_answer = await self.grader.response_is_correct( + response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py index da14a6cf6bf0..2ac41f04e08d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py @@ -55,27 +55,32 @@ def remove_last_turn(self): def clear_history(self): self._chat_history = [] - async def response_is_correct(self, task_description, response_to_be_graded, correct_answer): + async def is_response_correct(self, task_description, response_to_be_graded, correct_answer): # Returns only the insights that the client verifies are relevant to the task. + page = self.page_log.begin_page( + summary="Grader.is_response_correct", + details="", + method_call="Grader.is_response_correct") sys_message = """You are a helpful and thoughtful assistant.""" user_message = ["""Your job is to extract a possible answer to the following question from the given text. - First review the following task. -- Then review the response that follows, which may contain reasoning that led to the answer, as well as other comments. +- Then review the text that follows, which may an answer, plus reasoning that led to the answer. - Do not attempt to actually solve the task yourself. - Don't try to judge whether the reasoning steps were correct. -- Simply respond by providing a copy of the answer from the text, omitting any other parts of the text. -- If no answer is present in the text, simply reply "None"."""] +- Simply respond by summarizing the answer described in the text, omitting any other parts of the text. +- If no answer is present can be extracted from the text, simply reply "None"."""] user_message.append("\n# Task description") user_message.append(task_description) user_message.append("\n# Text that may contain an answer") user_message.append(response_to_be_graded) self.clear_history() - extracted_answer, page = await self.call_model( + extracted_answer, _ = await self.call_model( system_message=sys_message, user_content=user_message, details="to extract the answer") + page.add_lines("Extracted answer: " + extracted_answer) user_message = ["""Your job is to decide whether a given answer to a task is correct or not. - You will be given the task description and the correct, gold-standard answer, along with the answer to be graded. @@ -92,9 +97,11 @@ async def response_is_correct(self, task_description, response_to_be_graded, cor user_message.append("\n# Answer to be graded") user_message.append(extracted_answer) self.clear_history() - decision, page = await self.call_model( + decision, _ = await self.call_model( system_message=sys_message, user_content=user_message, details="to check the answer for correctness") + page.add_lines("Decision: " + decision) + self.page_log.finish_page(page) return decision == "1", extracted_answer diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py index f18e6643412e..cb630060b049 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py @@ -1,7 +1,8 @@ import os import shutil import time -from typing import List +import json +from typing import List, Dict from autogen_core import Image from autogen_core.models import ( @@ -228,6 +229,10 @@ def message_content(self, page, message=None, message_content=None): item.image.save(image_path) # Add a link to the image. content_list.append(page.link_to_image(image_filename, "message_image")) + elif isinstance(item, Dict): + # Add a dictionary to the log. + json_str = json.dumps(item, indent=4) + content_list.append(json_str) else: content_list.append(str(item).rstrip()) else: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index f3ac6dab0220..6e90998f59bc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -1,5 +1,4 @@ import time -import random from typing import List from autogen_core.models import ( @@ -24,10 +23,6 @@ def __init__(self, client, page_log): self.num_model_calls = 0 self.start_time = time.time() - # Instantiate a random number generator, seeded from the current time. - random.seed(int(time.time() * 1000)) - self.rand = random.Random() - # Create the chat history self._chat_history: List[LLMMessage] = [] @@ -37,8 +32,6 @@ async def call_model(self, details, user_content: UserContent = None, system_mes if system_message_content is None: system_message_content = self.default_system_message_content - random_str = "({})\n\n".format(self.rand.randint(0, 1000000)) # Inject a random int for variability. - system_message_content = random_str + system_message_content system_message = SystemMessage(content=system_message_content) input_messages = [system_message] + self._chat_history + [user_message] @@ -54,8 +47,9 @@ async def call_model(self, details, user_content: UserContent = None, system_mes # Call the model start_time = time.time() - # create_result, num_input_tokens = self.core.call_model(input_messages) - num_input_tokens = self.client.count_tokens(input_messages) + # Optional code to pre-count tokens. + # num_input_tokens = self.client.count_tokens(input_messages) + num_input_tokens = 0 max_input_tokens_per_call = None # This is a placeholder value. if (max_input_tokens_per_call is not None) and (num_input_tokens > max_input_tokens_per_call): # The input is too large. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/recordable_chat_completion_client.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py similarity index 53% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/recordable_chat_completion_client.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py index a13017fbc034..e5e5287f5c25 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/recordable_chat_completion_client.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py @@ -12,27 +12,35 @@ from autogen_ext.agentic_memory import PageLog -class RecordableChatCompletionClient: +class ClientWrapper: """ Wraps a client object to record messages and responses (in record mode) or check the messages and replay the responses (in check-replay mode). """ - def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, page_log: PageLog) -> None: + def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, page_log: PageLog) -> None: + page = page_log.begin_page( + summary="ClientWrapper.__init__", + details='', + method_call="ClientWrapper.__init__") + self.base_client = base_client self.mode = mode - self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), "session.json") + self.page_log = page_log + self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), session_name + ".json") if page_log is not None: - page_log.append_entry_line("Wrapped the base client in a RecordableChatCompletionClient.") + page.add_lines("Wrapping the base client in a ClientWrapper.") if self.mode == "record": # Prepare to record the messages and responses. - page_log.append_entry_line("Recording mode enabled.") + page.add_lines("Recording mode enabled.\nRecording session to: " + self.path_to_output_file) self.recorded_turns = [] elif self.mode == "check-replay": # Load the recorded messages and responses from disk. - page_log.append_entry_line("Replay-check mode enabled.") + page.add_lines("Check-Replay mode enabled.\nRetrieving session from: " + self.path_to_output_file) self.recorded_turns = self.load() self.next_turn = 0 + self.page_log.finish_page(page) + async def create( self, messages: Sequence[LLMMessage], @@ -41,18 +49,29 @@ async def create( extra_create_args: Mapping[str, Any] = {}, cancellation_token: Optional[CancellationToken] = None, ) -> CreateResult: + page = self.page_log.begin_page( + summary="ClientWrapper.create", + details='', + method_call="ClientWrapper.create") + + response = None + if self.mode == "pass-through": - return await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) + page.add_lines("Passing through to the base client.") + response = await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) elif self.mode == "record": + page.add_lines("Recording the messages and response.") response = await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) - self.record(messages, response) - return response + self.record_one_turn(messages, response) elif self.mode == "check-replay": - recorded_response = self.replay_and_check(messages) - return recorded_response + page.add_lines("Comparing the messages to the recorded messages.") + response = self.check_and_replay_one_turn(messages) else: raise ValueError(f"Invalid mode: {self.mode}") + self.page_log.finish_page(page) + return response + def convert_messages(self, messages: Sequence[LLMMessage]) -> List[Dict[str, str]]: converted_messages = [] for message in messages: @@ -60,23 +79,48 @@ def convert_messages(self, messages: Sequence[LLMMessage]) -> List[Dict[str, str converted_messages.append(turn) return converted_messages - def record(self, messages: Sequence[LLMMessage], response: CreateResult) -> None: + def record_one_turn(self, messages: Sequence[LLMMessage], response: CreateResult) -> None: # Record the messages and response. + page = self.page_log.begin_page( + summary="ClientWrapper.record_one_turn", + details='', + method_call="ClientWrapper.record_one_turn") + converted_messages = self.convert_messages(messages) turn = {"messages": converted_messages, "response": response.content} self.recorded_turns.append(turn) + self.page_log.finish_page(page) - def replay_and_check(self, messages): + def check_and_replay_one_turn(self, messages): # Compare the messages to the recorded messages, and return the recorded response. + page = self.page_log.begin_page( + summary="ClientWrapper.check_and_replay_one_turn", + details='', + method_call="ClientWrapper.check_and_replay_one_turn") + + # Get the next recorded turn. assert self.next_turn < len(self.recorded_turns) recorded_turn = self.recorded_turns[self.next_turn] self.next_turn += 1 + + # Check the current message list against the recorded message list. recorded_messages = recorded_turn["messages"] - converted_messages = self.convert_messages(messages) - assert converted_messages == recorded_messages - response = recorded_turn["response"] + current_messages = self.convert_messages(messages) + if current_messages != recorded_messages: + error_str = "Current message list doesn't match the recorded message list." + page.add_lines(error_str) + page.page_log.add_message_content(recorded_messages, "recorded message list") + page.page_log.add_message_content(current_messages, "current message list") + self.page_log.append_exit_line(error_str) + self.page_log.flush(final=True) # Finalize the page log + self.page_log.finish_page(page) + raise ValueError(error_str) + assert current_messages == recorded_messages + + # Return the recorded response. cur_usage = RequestUsage(prompt_tokens=0, completion_tokens=0) - result = CreateResult(finish_reason="stop", content=response, usage=cur_usage, cached=True) + result = CreateResult(finish_reason="stop", content=recorded_turn["response"], usage=cur_usage, cached=True) + self.page_log.finish_page(page) return result def save(self) -> None: From a440b0a300e1a0ee4f7201f0c7fb02b14cd1f970 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sun, 5 Jan 2025 15:34:43 -0800 Subject: [PATCH 19/93] Add results to recorded sessions, including session length. Save sessions as yaml for readability. --- python/packages/autogen-ext/samples/amt.py | 17 +-- .../src/autogen_ext/agentic_memory/_grader.py | 5 + .../agentic_memory/client_wrapper.py | 105 +++++++++++------- 3 files changed, 81 insertions(+), 46 deletions(-) diff --git a/python/packages/autogen-ext/samples/amt.py b/python/packages/autogen-ext/samples/amt.py index 6204a8bba7e2..1f8569a38c52 100644 --- a/python/packages/autogen-ext/samples/amt.py +++ b/python/packages/autogen-ext/samples/amt.py @@ -19,7 +19,7 @@ MEMORY_DIR = "~/agentic_memory_archive" PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "run_44" +RUN_SUBDIR = "replay-short-teach" # Default client parameters TEMPERATURE = 0.8 @@ -602,20 +602,21 @@ async def main() -> None: # WRAPPED-CLIENT TESTS # Wrap the client in a ClientWrapper to record or check-replay a session. - session_name = "teach-1" + session_name = "short-teach" # Record # client = ClientWrapper(client, "record", session_name, page_log) - # # await test_teachability(task_assignment_callback, client, page_log) + # await test_teachability(task_assignment_callback, client, page_log) # # await test_learning_from_demonstration(task_assignment_callback, client, page_log) # # await test_self_teaching(task_assignment_callback, client, page_log) - # client.save() + # client.finalize() # Check-replay - client = ClientWrapper(client, "check-replay", session_name, page_log) - await test_teachability(task_assignment_callback, client, page_log) - # await test_learning_from_demonstration(task_assignment_callback, client, page_log) - # await test_self_teaching(task_assignment_callback, client, page_log) + # client = ClientWrapper(client, "check-replay", session_name, page_log) + # await test_teachability(task_assignment_callback, client, page_log) + # # await test_learning_from_demonstration(task_assignment_callback, client, page_log) + # # await test_self_teaching(task_assignment_callback, client, page_log) + # client.finalize() page_log.flush(final=True) # Finalize the page log diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py index 2ac41f04e08d..55f70c651ee9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py @@ -16,6 +16,9 @@ def __init__(self, client, page_log): self.client = client self.page_log = page_log + # Check whether to report results to the client. + self.report_results = hasattr(self.client, 'report_result') + # Create the chat history self._chat_history: List[LLMMessage] = [] @@ -104,4 +107,6 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor page.add_lines("Decision: " + decision) self.page_log.finish_page(page) + if self.report_results: + self.client.report_result(decision) return decision == "1", extracted_answer diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py index e5e5287f5c25..a0b5e9c4bb4f 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py @@ -1,5 +1,5 @@ import os -import json +import yaml from typing import Any, List, Dict, Mapping, Optional, Sequence from autogen_ext.models.openai import AzureOpenAIChatCompletionClient from autogen_core import CancellationToken @@ -26,18 +26,18 @@ def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, sess self.base_client = base_client self.mode = mode self.page_log = page_log - self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), session_name + ".json") + self.next_item_index = 0 + self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), session_name + ".yaml") if page_log is not None: page.add_lines("Wrapping the base client in a ClientWrapper.") if self.mode == "record": # Prepare to record the messages and responses. page.add_lines("Recording mode enabled.\nRecording session to: " + self.path_to_output_file) - self.recorded_turns = [] + self.recorded_items = [] elif self.mode == "check-replay": # Load the recorded messages and responses from disk. page.add_lines("Check-Replay mode enabled.\nRetrieving session from: " + self.path_to_output_file) - self.recorded_turns = self.load() - self.next_turn = 0 + self.recorded_items = self.load() self.page_log.finish_page(page) @@ -49,27 +49,18 @@ async def create( extra_create_args: Mapping[str, Any] = {}, cancellation_token: Optional[CancellationToken] = None, ) -> CreateResult: - page = self.page_log.begin_page( - summary="ClientWrapper.create", - details='', - method_call="ClientWrapper.create") - response = None if self.mode == "pass-through": - page.add_lines("Passing through to the base client.") response = await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) elif self.mode == "record": - page.add_lines("Recording the messages and response.") response = await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) self.record_one_turn(messages, response) elif self.mode == "check-replay": - page.add_lines("Comparing the messages to the recorded messages.") response = self.check_and_replay_one_turn(messages) else: raise ValueError(f"Invalid mode: {self.mode}") - self.page_log.finish_page(page) return response def convert_messages(self, messages: Sequence[LLMMessage]) -> List[Dict[str, str]]: @@ -81,58 +72,96 @@ def convert_messages(self, messages: Sequence[LLMMessage]) -> List[Dict[str, str def record_one_turn(self, messages: Sequence[LLMMessage], response: CreateResult) -> None: # Record the messages and response. - page = self.page_log.begin_page( - summary="ClientWrapper.record_one_turn", - details='', - method_call="ClientWrapper.record_one_turn") - converted_messages = self.convert_messages(messages) turn = {"messages": converted_messages, "response": response.content} - self.recorded_turns.append(turn) - self.page_log.finish_page(page) + self.recorded_items.append(turn) + self.next_item_index += 1 def check_and_replay_one_turn(self, messages): # Compare the messages to the recorded messages, and return the recorded response. - page = self.page_log.begin_page( - summary="ClientWrapper.check_and_replay_one_turn", - details='', - method_call="ClientWrapper.check_and_replay_one_turn") - # Get the next recorded turn. - assert self.next_turn < len(self.recorded_turns) - recorded_turn = self.recorded_turns[self.next_turn] - self.next_turn += 1 + if self.next_item_index >= len(self.recorded_items): + error_str = "No more recorded items to check." + self.page_log.append_exit_line(error_str) + self.page_log.flush(final=True) + raise ValueError(error_str) + recorded_turn = self.recorded_items[self.next_item_index] + self.next_item_index += 1 # Check the current message list against the recorded message list. + if "messages" not in recorded_turn: + error_str = "Recorded turn doesn't contain a messages field. Perhaps a result was recorded instead." + self.page_log.append_exit_line(error_str) + self.page_log.flush(final=True) + raise ValueError(error_str) recorded_messages = recorded_turn["messages"] current_messages = self.convert_messages(messages) if current_messages != recorded_messages: error_str = "Current message list doesn't match the recorded message list." - page.add_lines(error_str) - page.page_log.add_message_content(recorded_messages, "recorded message list") - page.page_log.add_message_content(current_messages, "current message list") + self.page_log.add_message_content(recorded_messages, "recorded message list") + self.page_log.add_message_content(current_messages, "current message list") self.page_log.append_exit_line(error_str) self.page_log.flush(final=True) # Finalize the page log - self.page_log.finish_page(page) raise ValueError(error_str) assert current_messages == recorded_messages # Return the recorded response. cur_usage = RequestUsage(prompt_tokens=0, completion_tokens=0) result = CreateResult(finish_reason="stop", content=recorded_turn["response"], usage=cur_usage, cached=True) - self.page_log.finish_page(page) return result + def report_result(self, result: Any) -> None: + if self.mode == "pass-through": + return + elif self.mode == "record": + self.record_result(result) + elif self.mode == "check-replay": + self.check_result(result) + + def record_result(self, result: Any) -> None: + # Record a result. + self.recorded_items.append({"result": result}) + self.next_item_index += 1 + + def check_result(self, result: Any) -> None: + # Check a result. + if self.next_item_index >= len(self.recorded_items): + error_str = "No more recorded items to check." + self.page_log.append_exit_line(error_str) + self.page_log.flush(final=True) + raise ValueError(error_str) + recorded_result = self.recorded_items[self.next_item_index] + self.next_item_index += 1 + + if "result" not in recorded_result: + error_str = "Recorded turn doesn't contain a result field. Perhaps a turn was recorded instead." + self.page_log.append_exit_line(error_str) + self.page_log.flush(final=True) + raise ValueError(error_str) + if result != recorded_result["result"]: + error_str = "Recorded result ({}) doesn't match the current result ({}).".format(recorded_result["result"], result) + self.page_log.append_exit_line(error_str) + self.page_log.flush(final=True) + raise ValueError(error_str) + + def finalize(self) -> None: + self.report_result("Total items = " + str(self.next_item_index)) + if self.mode == "record": + self.save() + self.page_log.append_exit_line("Recorded session was saved to: " + self.path_to_output_file) + elif self.mode == "check-replay": + self.page_log.append_exit_line("Recorded session was fully replayed and checked.") + def save(self) -> None: # Save the recorded messages and responses to disk. - session = {"turns": self.recorded_turns} + session = {"turns_and_results": self.recorded_items} with open(self.path_to_output_file, "w", encoding="utf-8") as file: - json.dump(session, file, ensure_ascii=False, indent=4, sort_keys=True) + yaml.dump(session, file, sort_keys=False) def load(self): # Load the recorded messages and responses from disk. recorded_turns = [] with open(self.path_to_output_file, "r", encoding="utf-8") as file: - session = json.load(file) - recorded_turns = session["turns"] + session = yaml.load(file, Loader=yaml.FullLoader) + recorded_turns = session["turns_and_results"] return recorded_turns From cab51f10949a046176249c876f44c51ccec62704 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 6 Jan 2025 18:13:38 -0800 Subject: [PATCH 20/93] Use yaml file for eval settings. --- .../autogen-ext/samples/{amt.py => eval.py} | 574 +++++++++--------- .../packages/autogen-ext/samples/short.yaml | 28 + 2 files changed, 310 insertions(+), 292 deletions(-) rename python/packages/autogen-ext/samples/{amt.py => eval.py} (58%) create mode 100644 python/packages/autogen-ext/samples/short.yaml diff --git a/python/packages/autogen-ext/samples/amt.py b/python/packages/autogen-ext/samples/eval.py similarity index 58% rename from python/packages/autogen-ext/samples/amt.py rename to python/packages/autogen-ext/samples/eval.py index 1f8569a38c52..c2659b14d35f 100644 --- a/python/packages/autogen-ext/samples/amt.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -1,3 +1,5 @@ +import sys +import yaml import asyncio from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.models.openai import AzureOpenAIChatCompletionClient @@ -17,19 +19,6 @@ from autogen_ext.agentic_memory import AgenticMemory, PageLog, Grader, ClientWrapper -MEMORY_DIR = "~/agentic_memory_archive" -PAGELOG_DIR = "~/pagelogs/" -RUN_SUBDIR = "replay-short-teach" - -# Default client parameters -TEMPERATURE = 0.8 -MAX_TOKENS = 4096 -PRESENCE_PENALTY = 0.0 -FREQUENCY_PENALTY = 0.0 -TOP_P = 1.0 -MAX_RETRIES = 65535 - - def define_tasks_with_answers(): tasks_with_answers = [] @@ -80,186 +69,14 @@ def define_tasks_with_answers(): return tasks_with_answers -def create_client(page_log=None): - # Choose one. - # return create_oai_client(page_log) - # return create_aoai_client(page_log) - return create_trapi_client(page_log) - - -def create_oai_client(page_log): - # Create an OpenAI client - model_name = "gpt-4o-2024-08-06" - client = OpenAIChatCompletionClient( - model=model_name, - api_key="", - temperature=TEMPERATURE, - max_tokens=MAX_TOKENS, - presence_penalty=PRESENCE_PENALTY, - frequency_penalty=FREQUENCY_PENALTY, - top_p=TOP_P, - max_retries=MAX_RETRIES, - ) - if page_log is not None: - page_log.append_entry_line("Client: {}".format(client._resolved_model)) - page_log.append_entry_line(" created through OpenAI directly") - page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) - return client - - -def create_aoai_client(page_log): - # Create an Azure OpenAI client - token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") - azure_deployment = "gpt-4o-2024-08-06-eval" - model = "gpt-4o-2024-08-06" - azure_endpoint = "https://agentic2.openai.azure.com/" - client = AzureOpenAIChatCompletionClient( - azure_endpoint=azure_endpoint, - azure_ad_token_provider=token_provider, - azure_deployment=azure_deployment, - api_version="2024-06-01", - model=model, - temperature=TEMPERATURE, - max_tokens=MAX_TOKENS, - presence_penalty=PRESENCE_PENALTY, - frequency_penalty=FREQUENCY_PENALTY, - top_p=TOP_P, - max_retries=MAX_RETRIES, - ) - if page_log is not None: - page_log.append_entry_line("Client: {}".format(client._resolved_model)) - page_log.append_entry_line(" created through Azure OpenAI") - page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) - return client - - -def create_trapi_client(page_log): - # Create an Azure OpenAI client through TRAPI - token_provider = get_bearer_token_provider(ChainedTokenCredential( - AzureCliCredential(), - DefaultAzureCredential( - exclude_cli_credential=True, - # Exclude other credentials we are not interested in. - exclude_environment_credential=True, - exclude_shared_token_cache_credential=True, - exclude_developer_cli_credential=True, - exclude_powershell_credential=True, - exclude_interactive_browser_credential=True, - exclude_visual_studio_code_credentials=True, - # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs - ) - ), "api://trapi/.default") - model = "gpt-4o-2024-08-06" # This is (for instance) the OpenAI model name, which is used to look up capabilities. - azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models - trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models - endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' - api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - client = AzureOpenAIChatCompletionClient( - azure_ad_token_provider=token_provider, - model=model, - azure_deployment=azure_deployment, - azure_endpoint=endpoint, - api_version=api_version, - temperature=TEMPERATURE, - max_tokens=MAX_TOKENS, - presence_penalty=PRESENCE_PENALTY, - frequency_penalty=FREQUENCY_PENALTY, - top_p=TOP_P, - max_retries=MAX_RETRIES, - ) - if page_log is not None: - page_log.append_entry_line("Client: {}".format(client._resolved_model)) - page_log.append_entry_line(" created through TRAPI") - page_log.append_entry_line(" temperature: {}".format(TEMPERATURE)) - return client - - -async def assign_task_to_magentic_one(task, model_client, page_log) -> Tuple[str, str]: - page = page_log.begin_page( - summary="assign_task_to_magentic_one", - details='', - method_call="assign_task_to_magentic_one") - - page.add_lines(task) - - general_agent = AssistantAgent( - "general_agent", - model_client, - description="A general GPT-4o AI assistant capable of performing a variety of tasks.", ) - - web_surfer = MultimodalWebSurfer( - name="web_surfer", - model_client=model_client, - downloads_folder="logs", - debug_dir="logs", - to_save_screenshots=True, - ) - - team = MagenticOneGroupChat( - [general_agent, web_surfer], - model_client=model_client, - max_turns=20, - ) - - # Get the team's text response to the task. - stream = team.run_stream(task=task) - task_result = await Console(stream) - response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) - page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - - # MagenticOne's response is the chat history, which we use here as the work history. - work_history = response_str - - page_log.finish_page(page) - return response_str, work_history - - -async def assign_task_to_client(task, client, page_log): - page = page_log.begin_page( - summary="assign_task_to_client", - details='', - method_call="assign_task_to_client") - - page.add_lines(task) - - system_message_content = """You are a helpful and thoughtful assistant. -In responding to every user message, you follow the same multi-step process given here: -1. Explain your understanding of the user message in detail, covering all the important points. -2. List as many possible responses as you can think of. -3. Carefully list and weigh the pros and cons (if any) of each possible response. -4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. -5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. -6. Finish by providing your final response in the particular format requested by the user.""" - - system_message = SystemMessage(content=system_message_content) - user_message = UserMessage(content=task, source="User") - - input_messages = [system_message] + [user_message] - response = await client.create(input_messages) - response_str = response.content - - # Log the model call - page_log.add_model_call(description="Ask the model", - details="to complete the task", input_messages=input_messages, - response=response, - num_input_tokens=0, caller='assign_task_to_client') - page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - - # Use the response as the work history as well. - work_history = response_str - - page_log.finish_page(page) - return response_str, work_history - - async def train(task_with_answer, max_train_trials, max_test_trials, task_assignment_callback, reset_memory, - client, page_log) -> None: + client, page_log, memory_dir, run_subdir) -> None: page = page_log.begin_page( summary="train", details='', method_call="train") memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, - memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) + memory_dir=memory_dir, run_subdir=run_subdir) await memory.train_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], @@ -271,7 +88,7 @@ async def train(task_with_answer, max_train_trials, max_test_trials, task_assign async def test(task_with_answer, num_trials, task_assignment_callback, use_memory, reset_memory, - client, page_log) -> Tuple[str, int, int]: + client, page_log, memory_dir, run_subdir) -> Tuple[str, int, int]: page = page_log.begin_page( summary="test", details='', @@ -282,7 +99,7 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor if use_memory: page.add_lines("Testing with memory.\n", flush=True) memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, - memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) + memory_dir=memory_dir, run_subdir=run_subdir) response, num_successes, num_trials = await memory.test_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], @@ -312,7 +129,8 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor return response, num_successes, num_trials -async def test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, num_trials, reset_memory): +async def test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, memory_dir, run_subdir, + num_trials, reset_memory): last_response, num_successes, num_trials = await test( task_with_answer=define_tasks_with_answers()[task_index], num_trials=num_trials, @@ -320,12 +138,14 @@ async def test_on_task_with_memory(task_index, task_assignment_callback, client, use_memory=True, reset_memory=reset_memory, client=client, - page_log=page_log) + page_log=page_log, + memory_dir=memory_dir, + run_subdir=run_subdir) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) return num_successes, num_trials -async def test_on_task(task_index, task_assignment_callback, client, page_log, num_trials): +async def test_on_task(task_index, task_assignment_callback, client, page_log, memory_dir, run_subdir, num_trials): last_response, num_successes, num_trials = await test( task_with_answer=define_tasks_with_answers()[task_index], num_trials=num_trials, @@ -333,13 +153,15 @@ async def test_on_task(task_index, task_assignment_callback, client, page_log, n use_memory=False, reset_memory=False, client=client, - page_log=page_log) + page_log=page_log, + memory_dir=memory_dir, + run_subdir=run_subdir) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) return num_successes, num_trials async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_trials, num_final_test_trials, - task_assignment_callback, client, page_log): + task_assignment_callback, client, page_log, memory_dir, run_subdir): page = page_log.begin_page( summary="train_and_test", details='', @@ -359,7 +181,9 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ task_assignment_callback=task_assignment_callback, reset_memory=True, client=client, - page_log=page_log) + page_log=page_log, + memory_dir=memory_dir, + run_subdir=run_subdir) # Test on all tasks. for j, task_with_answer in enumerate(task_with_answer_list): @@ -370,7 +194,9 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ use_memory=True, reset_memory=False, client=client, - page_log=page_log) + page_log=page_log, + memory_dir=memory_dir, + run_subdir=run_subdir) page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) total_num_successes_list[j] += num_successes @@ -382,7 +208,7 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ return total_num_successes_list, total_num_trials -async def test_without_memory(task_assignment_callback, client, page_log): +async def test_without_memory(task_assignment_callback, client, page_log, memory_dir, run_subdir): page = page_log.begin_page( summary="test_without_memory", details='', @@ -391,7 +217,8 @@ async def test_without_memory(task_assignment_callback, client, page_log): task_index = 5 num_trials = 20 - num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, num_trials) + num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, + memory_dir, run_subdir, num_trials) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) @@ -399,7 +226,7 @@ async def test_without_memory(task_assignment_callback, client, page_log): page_log.finish_page(page) -async def test_with_memory(task_assignment_callback, client, page_log): +async def test_with_memory(task_assignment_callback, client, page_log, memory_dir, run_subdir): page = page_log.begin_page( summary="test_with_memory", details='', @@ -408,6 +235,7 @@ async def test_with_memory(task_assignment_callback, client, page_log): task_index = 3 num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, + memory_dir=memory_dir, run_subdir=run_subdir, num_trials=3, reset_memory=False) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) @@ -415,42 +243,15 @@ async def test_with_memory(task_assignment_callback, client, page_log): page_log.finish_page(page) -async def test_self_teaching(task_assignment_callback, client, page_log): - page = page_log.begin_page( - summary="test_self_teaching", - details='', - method_call="test_self_teaching") - - # Choose the tasks from those listed at the top. - task_index_list = [3, 1] - - # Train and test on any number of tasks using memory. - num_loops = 10 # Normally 10 - total_num_successes_list, total_num_trials = await train_and_test( - task_index_list=task_index_list, - num_loops=num_loops, - max_train_trials=10, # Normally 10 - max_test_trials=3, # Normally 3 - num_final_test_trials=3, # Normally 3 - task_assignment_callback=task_assignment_callback, - client=client, - page_log=page_log) - - for i, total_num_successes in enumerate(total_num_successes_list): - success_rate = round((total_num_successes / total_num_trials) * 100) - page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) - - page_log.finish_page(page) - - -async def send_message_to_agent(text, task_assignment_callback, client, page_log, reset_memory) -> None: +async def send_message_to_agent(text, task_assignment_callback, client, page_log, memory_dir, run_subdir, + reset_memory) -> None: page = page_log.begin_page( summary="send_message_to_agent", details="", method_call="send_message_to_agent") memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, - memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) + memory_dir=memory_dir, run_subdir=run_subdir) response = await memory.handle_user_message(text, task_assignment_callback) page.add_lines("Response: {}\n".format(response), flush=True) @@ -459,7 +260,7 @@ async def send_message_to_agent(text, task_assignment_callback, client, page_log return response -async def test_teachability(task_assignment_callback, client, page_log): +async def test_teachability(task_assignment_callback, client, page_log, memory_dir, run_subdir, settings): page = page_log.begin_page( summary="test_teachability", details='', @@ -474,7 +275,8 @@ async def test_teachability(task_assignment_callback, client, page_log): # First test without memory. page.add_lines("\nClear memory, then ask the question.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=True) + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, run_subdir, + reset_memory=True) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -487,11 +289,13 @@ async def test_teachability(task_assignment_callback, client, page_log): # Give the advice. page.add_lines("Give the advice.") insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" - await send_message_to_agent(insight, task_assignment_callback, client, page_log, reset_memory=False) + await send_message_to_agent(insight, task_assignment_callback, client, page_log, memory_dir, run_subdir, + reset_memory=False) # Now ask the question again to see if the advice is retrieved from memory. page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, reset_memory=False) + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, run_subdir, + reset_memory=False) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -504,31 +308,31 @@ async def test_teachability(task_assignment_callback, client, page_log): page_log.finish_page(page) - -async def give_demonstration_to_agent(task, demonstration, client, page_log) -> None: +async def give_demonstration_to_agent(task, demonstration, client, page_log, memory_dir, run_subdir) -> None: page = page_log.begin_page( summary="give_demonstration_to_agent", details="", method_call="give_demonstration_to_agent") - memory = AgenticMemory(reset=False, client=client, page_log=page_log, memory_dir=MEMORY_DIR, run_subdir=RUN_SUBDIR) + memory = AgenticMemory(reset=False, client=client, page_log=page_log, memory_dir=memory_dir, run_subdir=run_subdir) await memory.learn_from_demonstration(task, demonstration) page_log.finish_page(page) -async def test_learning_from_demonstration(task_assignment_callback, client, page_log): +async def test_learning_from_demonstration(task_assignment_callback, client, page_log, memory_dir, run_subdir, settings): page = page_log.begin_page( summary="test_learning_from_demonstration", details='', method_call="test_learning_from_demonstration") task_index = 5 - num_trials = 10 + num_trials = settings["num_trials"] # First test after clearing memory. page.add_lines("To get a baseline, clear memory, then assign the task.") num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, + memory_dir=memory_dir, run_subdir=run_subdir, num_trials=num_trials, reset_memory=True) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -537,11 +341,12 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag page.add_lines("Demonstrate a solution to a similar task.") demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." - await give_demonstration_to_agent(demo_task, demonstration, client, page_log) + await give_demonstration_to_agent(demo_task, demonstration, client, page_log, memory_dir, run_subdir) # Now test again to see if the demonstration (retrieved from memory) helps. page.add_lines("Assign the task again to see if the demonstration helps.") num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, + memory_dir=memory_dir, run_subdir=run_subdir, num_trials=num_trials, reset_memory=False) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -549,79 +354,264 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag page_log.finish_page(page) -async def call_client(task, client, page_log): +async def test_self_teaching(task_assignment_callback, client, page_log, memory_dir, run_subdir, settings): page = page_log.begin_page( - summary="call_client", + summary="test_self_teaching", details='', - method_call="call_client") + method_call="test_self_teaching") - page.add_lines(task) + # Choose the tasks from those listed at the top. + task_index_list = [3, 1] - system_message_content = """You are a helpful and thoughtful assistant.""" + # Train and test on any number of tasks using memory. + total_num_successes_list, total_num_trials = await train_and_test( + task_index_list=task_index_list, + num_loops=settings["num_loops"], + max_train_trials=settings["max_train_trials"], + max_test_trials=settings["max_test_trials"], + num_final_test_trials=settings["num_final_test_trials"], + task_assignment_callback=task_assignment_callback, + client=client, + page_log=page_log, + memory_dir=memory_dir, + run_subdir=run_subdir) - system_message = SystemMessage(content=system_message_content) - user_message = UserMessage(content=task, source="User") + for i, total_num_successes in enumerate(total_num_successes_list): + success_rate = round((total_num_successes / total_num_trials) * 100) + page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) - input_messages = [system_message] + [user_message] - response = await client.create(input_messages) - response_str = response.content + page_log.finish_page(page) - # Log the model call - page_log.add_model_call(description="Ask the model", - details="to complete the task", input_messages=input_messages, - response=response, - num_input_tokens=0, caller='assign_task_to_client') - page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - page_log.finish_page(page) - return response_str +class Evaluator: + def __init__(self, settings_filepath): + # Load the .yaml settings file. + with open(settings_filepath, "r") as file: + self.settings = yaml.load(file, Loader=yaml.FullLoader) + self.pagelog_dir = self.settings["pagelog_dir"] + self.memory_dir = self.settings["memory_dir"] + self.run_subdir = self.settings["run_subdir"] + self.base_agent = self.settings["base_agent"] + self.client_settings = self.settings["client"] + self.page_log = None + + def create_client(self): + client = None + provider = self.client_settings["provider"] + if provider == "openai": + client = self.create_oai_client() + elif provider == "azure_openai": + client = self.create_aoai_client() + elif provider == "trapi": + client = self.create_trapi_client() + else: + assert False, "Invalid client provider" + + # Check if the client should be wrapped. + if "wrapper" in self.client_settings: + wrapper_settings = self.client_settings["wrapper"] + if wrapper_settings["enabled"]: + # Wrap the client. + client = ClientWrapper( + client, wrapper_settings["mode"], wrapper_settings["session_name"], self.page_log) + + return client + + def create_oai_client(self): + # Create an OpenAI client + model_name = "gpt-4o-2024-08-06" + client = OpenAIChatCompletionClient( + model=model_name, + api_key=self.client_settings["api_key"], + temperature=self.client_settings["temperature"], + max_tokens=self.client_settings["max_tokens"], + presence_penalty=self.client_settings["presence_penalty"], + frequency_penalty=self.client_settings["frequency_penalty"], + top_p=self.client_settings["top_p"], + max_retries=self.client_settings["max_retries"], + ) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) + self.page_log.append_entry_line(" created through OpenAI directly") + self.page_log.append_entry_line(" temperature: {}".format(self.client_settings["temperature"])) + return client + + def create_aoai_client(self): + # Create an Azure OpenAI client + token_provider = get_bearer_token_provider(DefaultAzureCredential(), + "https://cognitiveservices.azure.com/.default") + azure_deployment = "gpt-4o-2024-08-06-eval" + model = "gpt-4o-2024-08-06" + azure_endpoint = "https://agentic2.openai.azure.com/" + client = AzureOpenAIChatCompletionClient( + azure_endpoint=azure_endpoint, + azure_ad_token_provider=token_provider, + azure_deployment=azure_deployment, + api_version="2024-06-01", + model=model, + temperature=self.client_settings["temperature"], + max_tokens=self.client_settings["max_tokens"], + presence_penalty=self.client_settings["presence_penalty"], + frequency_penalty=self.client_settings["frequency_penalty"], + top_p=self.client_settings["top_p"], + max_retries=self.client_settings["max_retries"], + ) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) + self.page_log.append_entry_line(" created through Azure OpenAI") + self.page_log.append_entry_line(" temperature: {}".format(self.client_settings["temperature"])) + return client + + def create_trapi_client(self): + # Create an Azure OpenAI client through TRAPI + token_provider = get_bearer_token_provider(ChainedTokenCredential( + AzureCliCredential(), + DefaultAzureCredential( + exclude_cli_credential=True, + # Exclude other credentials we are not interested in. + exclude_environment_credential=True, + exclude_shared_token_cache_credential=True, + exclude_developer_cli_credential=True, + exclude_powershell_credential=True, + exclude_interactive_browser_credential=True, + exclude_visual_studio_code_credentials=True, + # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs + ) + ), "api://trapi/.default") + model = "gpt-4o-2024-08-06" # This is (for instance) the OpenAI model name, which is used to look up capabilities. + azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models + trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models + endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' + api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release + client = AzureOpenAIChatCompletionClient( + azure_ad_token_provider=token_provider, + model=model, + azure_deployment=azure_deployment, + azure_endpoint=endpoint, + api_version=api_version, + temperature=self.client_settings["temperature"], + max_tokens=self.client_settings["max_tokens"], + presence_penalty=self.client_settings["presence_penalty"], + frequency_penalty=self.client_settings["frequency_penalty"], + top_p=self.client_settings["top_p"], + max_retries=self.client_settings["max_retries"], + ) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) + self.page_log.append_entry_line(" created through TRAPI") + self.page_log.append_entry_line(" temperature: {}".format(self.client_settings["temperature"])) + return client + + async def assign_task_to_magentic_one(self, task, model_client, page_log) -> Tuple[str, str]: + page = page_log.begin_page( + summary="assign_task_to_magentic_one", + details='', + method_call="assign_task_to_magentic_one") + + page.add_lines(task) + + general_agent = AssistantAgent( + "general_agent", + model_client, + description="A general GPT-4o AI assistant capable of performing a variety of tasks.", ) + + web_surfer = MultimodalWebSurfer( + name="web_surfer", + model_client=model_client, + downloads_folder="logs", + debug_dir="logs", + to_save_screenshots=True, + ) + team = MagenticOneGroupChat( + [general_agent, web_surfer], + model_client=model_client, + max_turns=20, + ) -async def main() -> None: - # Create the PageLog. - page_log = PageLog(PAGELOG_DIR, RUN_SUBDIR) - page = page_log.begin_page( - summary="main", - details='', - method_call="main") + # Get the team's text response to the task. + stream = team.run_stream(task=task) + task_result = await Console(stream) + response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) + page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - # Create the client. - client = create_client(page_log) + # MagenticOne's response is the chat history, which we use here as the work history. + work_history = response_str - # Choose the client, agent or team to assign the task to. - task_assignment_callback = assign_task_to_client # assign_task_to_client or assign_task_to_magentic_one + page_log.finish_page(page) + return response_str, work_history - # SELECT ONE TEST TO RUN - # await test_without_memory(task_assignment_callback, client, page_log) - # await test_with_memory(task_assignment_callback, client, page_log) - # await test_self_teaching(task_assignment_callback, client, page_log) - # await test_teachability(task_assignment_callback, client, page_log) - # await test_learning_from_demonstration(task_assignment_callback, client, page_log) + async def assign_task_to_client(self, task, client, page_log): + page = page_log.begin_page( + summary="assign_task_to_client", + details='', + method_call="assign_task_to_client") + page.add_lines(task) - # WRAPPED-CLIENT TESTS + system_message_content = """You are a helpful and thoughtful assistant. +In responding to every user message, you follow the same multi-step process given here: +1. Explain your understanding of the user message in detail, covering all the important points. +2. List as many possible responses as you can think of. +3. Carefully list and weigh the pros and cons (if any) of each possible response. +4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. +5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. +6. Finish by providing your final response in the particular format requested by the user.""" - # Wrap the client in a ClientWrapper to record or check-replay a session. - session_name = "short-teach" + system_message = SystemMessage(content=system_message_content) + user_message = UserMessage(content=task, source="User") - # Record - # client = ClientWrapper(client, "record", session_name, page_log) - # await test_teachability(task_assignment_callback, client, page_log) - # # await test_learning_from_demonstration(task_assignment_callback, client, page_log) - # # await test_self_teaching(task_assignment_callback, client, page_log) - # client.finalize() + input_messages = [system_message] + [user_message] + response = await client.create(input_messages) + response_str = response.content - # Check-replay - # client = ClientWrapper(client, "check-replay", session_name, page_log) - # await test_teachability(task_assignment_callback, client, page_log) - # # await test_learning_from_demonstration(task_assignment_callback, client, page_log) - # # await test_self_teaching(task_assignment_callback, client, page_log) - # client.finalize() + # Log the model call + page_log.add_model_call(description="Ask the model", + details="to complete the task", input_messages=input_messages, + response=response, + num_input_tokens=0, caller='assign_task_to_client') + page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + # Use the response as the work history as well. + work_history = response_str - page_log.flush(final=True) # Finalize the page log - page_log.finish_page(page) + page_log.finish_page(page) + return response_str, work_history + + async def run(self): + # Create the PageLog. + self.page_log = PageLog(self.settings["pagelog_dir"], self.settings["run_subdir"]) + page = self.page_log.begin_page( + summary="main", + details='', + method_call="main") + + # Create the client. + client = self.create_client() + + # Choose the base agent. + if self.base_agent == "magentic_one": + task_assignment_callback = self.assign_task_to_magentic_one + elif self.base_agent == "thin_agent": + task_assignment_callback = self.assign_task_to_client + else: + assert False, "Invalid base agent" + + # Execute each eval. + evals = self.settings["evals"] + for ev in evals: + eval_function = globals()[ev["name"]] + await eval_function(task_assignment_callback, client, self.page_log, self.memory_dir, self.run_subdir, ev) + + if hasattr(client, "finalize"): + # The client wrapper needs to be finalized. + client.finalize() + + self.page_log.flush(final=True) # Finalize the page log + self.page_log.finish_page(page) if __name__ == "__main__": - asyncio.run(main()) + args = sys.argv[1:] + if len(args) != 1: + print("Usage: amt.py ") + else: + evaluator = Evaluator(settings_filepath=args[0]) + asyncio.run(evaluator.run()) diff --git a/python/packages/autogen-ext/samples/short.yaml b/python/packages/autogen-ext/samples/short.yaml new file mode 100644 index 000000000000..770ce8c382e8 --- /dev/null +++ b/python/packages/autogen-ext/samples/short.yaml @@ -0,0 +1,28 @@ +pagelog_dir: ~/pagelogs/ +memory_dir: ~/agentic_memory_archive +run_subdir: temp +base_agent: thin_agent # thin_agent or magentic_one + +client: + provider: trapi # openai, azure_openai, or trapi + api_key: sk-xx # only for openai + temperature: 0.8 + max_tokens: 4096 + presence_penalty: 0.0 + frequency_penalty: 0.0 + top_p: 1.0 + max_retries: 65535 + wrapper: + enabled: 1 # Only available for thin_agent + mode: check-replay # pass-through, record, or check-replay + session_name: short-3 + +evals: + - name: test_teachability + - name: test_learning_from_demonstration + num_trials: 1 # 1-10 + - name: test_self_teaching + num_loops: 1 # 1-10 + max_train_trials: 2 # 2-10 + max_test_trials: 1 # 1-3 + num_final_test_trials: 1 # 1-3 From d91e58c8750490ddba310791322cb4137e5fe382 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 7 Jan 2025 14:33:19 -0800 Subject: [PATCH 21/93] Simplify paths and other settings. --- python/packages/autogen-ext/samples/eval.py | 213 +++++++++--------- .../packages/autogen-ext/samples/short.yaml | 28 ++- .../agentic_memory/_agentic_memory.py | 5 +- .../agentic_memory/_knowledge_archive.py | 8 +- .../autogen_ext/agentic_memory/_page_log.py | 29 +-- 5 files changed, 143 insertions(+), 140 deletions(-) diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/samples/eval.py index c2659b14d35f..9ece2f6790a5 100644 --- a/python/packages/autogen-ext/samples/eval.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -70,13 +70,13 @@ def define_tasks_with_answers(): async def train(task_with_answer, max_train_trials, max_test_trials, task_assignment_callback, reset_memory, - client, page_log, memory_dir, run_subdir) -> None: + client, page_log, memory_dir) -> None: page = page_log.begin_page( summary="train", details='', method_call="train") memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir, run_subdir=run_subdir) + memory_dir=memory_dir) await memory.train_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], @@ -88,7 +88,7 @@ async def train(task_with_answer, max_train_trials, max_test_trials, task_assign async def test(task_with_answer, num_trials, task_assignment_callback, use_memory, reset_memory, - client, page_log, memory_dir, run_subdir) -> Tuple[str, int, int]: + client, page_log, memory_dir) -> Tuple[str, int, int]: page = page_log.begin_page( summary="test", details='', @@ -99,7 +99,7 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor if use_memory: page.add_lines("Testing with memory.\n", flush=True) memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir, run_subdir=run_subdir) + memory_dir=memory_dir) response, num_successes, num_trials = await memory.test_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], @@ -129,7 +129,7 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor return response, num_successes, num_trials -async def test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, memory_dir, run_subdir, +async def test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, memory_dir, num_trials, reset_memory): last_response, num_successes, num_trials = await test( task_with_answer=define_tasks_with_answers()[task_index], @@ -139,13 +139,12 @@ async def test_on_task_with_memory(task_index, task_assignment_callback, client, reset_memory=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir, - run_subdir=run_subdir) + memory_dir=memory_dir) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) return num_successes, num_trials -async def test_on_task(task_index, task_assignment_callback, client, page_log, memory_dir, run_subdir, num_trials): +async def test_on_task(task_index, task_assignment_callback, client, page_log, memory_dir, num_trials): last_response, num_successes, num_trials = await test( task_with_answer=define_tasks_with_answers()[task_index], num_trials=num_trials, @@ -154,14 +153,13 @@ async def test_on_task(task_index, task_assignment_callback, client, page_log, m reset_memory=False, client=client, page_log=page_log, - memory_dir=memory_dir, - run_subdir=run_subdir) + memory_dir=memory_dir) print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) return num_successes, num_trials async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_trials, num_final_test_trials, - task_assignment_callback, client, page_log, memory_dir, run_subdir): + task_assignment_callback, client, page_log, memory_dir): page = page_log.begin_page( summary="train_and_test", details='', @@ -182,8 +180,7 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ reset_memory=True, client=client, page_log=page_log, - memory_dir=memory_dir, - run_subdir=run_subdir) + memory_dir=memory_dir) # Test on all tasks. for j, task_with_answer in enumerate(task_with_answer_list): @@ -195,8 +192,7 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ reset_memory=False, client=client, page_log=page_log, - memory_dir=memory_dir, - run_subdir=run_subdir) + memory_dir=memory_dir) page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) total_num_successes_list[j] += num_successes @@ -208,7 +204,7 @@ async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_ return total_num_successes_list, total_num_trials -async def test_without_memory(task_assignment_callback, client, page_log, memory_dir, run_subdir): +async def test_without_memory(task_assignment_callback, client, page_log, memory_dir): page = page_log.begin_page( summary="test_without_memory", details='', @@ -218,7 +214,7 @@ async def test_without_memory(task_assignment_callback, client, page_log, memory num_trials = 20 num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, - memory_dir, run_subdir, num_trials) + memory_dir, num_trials) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) @@ -226,7 +222,7 @@ async def test_without_memory(task_assignment_callback, client, page_log, memory page_log.finish_page(page) -async def test_with_memory(task_assignment_callback, client, page_log, memory_dir, run_subdir): +async def test_with_memory(task_assignment_callback, client, page_log, memory_dir): page = page_log.begin_page( summary="test_with_memory", details='', @@ -235,7 +231,7 @@ async def test_with_memory(task_assignment_callback, client, page_log, memory_di task_index = 3 num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, - memory_dir=memory_dir, run_subdir=run_subdir, + memory_dir=memory_dir, num_trials=3, reset_memory=False) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) @@ -243,7 +239,7 @@ async def test_with_memory(task_assignment_callback, client, page_log, memory_di page_log.finish_page(page) -async def send_message_to_agent(text, task_assignment_callback, client, page_log, memory_dir, run_subdir, +async def send_message_to_agent(text, task_assignment_callback, client, page_log, memory_dir, reset_memory) -> None: page = page_log.begin_page( summary="send_message_to_agent", @@ -251,7 +247,7 @@ async def send_message_to_agent(text, task_assignment_callback, client, page_log method_call="send_message_to_agent") memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir, run_subdir=run_subdir) + memory_dir=memory_dir) response = await memory.handle_user_message(text, task_assignment_callback) page.add_lines("Response: {}\n".format(response), flush=True) @@ -260,7 +256,7 @@ async def send_message_to_agent(text, task_assignment_callback, client, page_log return response -async def test_teachability(task_assignment_callback, client, page_log, memory_dir, run_subdir, settings): +async def test_teachability(task_assignment_callback, client, page_log, memory_dir, settings): page = page_log.begin_page( summary="test_teachability", details='', @@ -275,7 +271,7 @@ async def test_teachability(task_assignment_callback, client, page_log, memory_d # First test without memory. page.add_lines("\nClear memory, then ask the question.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, run_subdir, + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, reset_memory=True) # Check the response. @@ -289,12 +285,12 @@ async def test_teachability(task_assignment_callback, client, page_log, memory_d # Give the advice. page.add_lines("Give the advice.") insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" - await send_message_to_agent(insight, task_assignment_callback, client, page_log, memory_dir, run_subdir, + await send_message_to_agent(insight, task_assignment_callback, client, page_log, memory_dir, reset_memory=False) # Now ask the question again to see if the advice is retrieved from memory. page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, run_subdir, + response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, reset_memory=False) # Check the response. @@ -308,19 +304,19 @@ async def test_teachability(task_assignment_callback, client, page_log, memory_d page_log.finish_page(page) -async def give_demonstration_to_agent(task, demonstration, client, page_log, memory_dir, run_subdir) -> None: +async def give_demonstration_to_agent(task, demonstration, client, page_log, memory_dir) -> None: page = page_log.begin_page( summary="give_demonstration_to_agent", details="", method_call="give_demonstration_to_agent") - memory = AgenticMemory(reset=False, client=client, page_log=page_log, memory_dir=memory_dir, run_subdir=run_subdir) + memory = AgenticMemory(reset=False, client=client, page_log=page_log, memory_dir=memory_dir) await memory.learn_from_demonstration(task, demonstration) page_log.finish_page(page) -async def test_learning_from_demonstration(task_assignment_callback, client, page_log, memory_dir, run_subdir, settings): +async def test_learning_from_demonstration(task_assignment_callback, client, page_log, memory_dir, settings): page = page_log.begin_page( summary="test_learning_from_demonstration", details='', @@ -332,7 +328,7 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag # First test after clearing memory. page.add_lines("To get a baseline, clear memory, then assign the task.") num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, - memory_dir=memory_dir, run_subdir=run_subdir, + memory_dir=memory_dir, num_trials=num_trials, reset_memory=True) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -341,12 +337,12 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag page.add_lines("Demonstrate a solution to a similar task.") demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." - await give_demonstration_to_agent(demo_task, demonstration, client, page_log, memory_dir, run_subdir) + await give_demonstration_to_agent(demo_task, demonstration, client, page_log, memory_dir) # Now test again to see if the demonstration (retrieved from memory) helps. page.add_lines("Assign the task again to see if the demonstration helps.") num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, - memory_dir=memory_dir, run_subdir=run_subdir, + memory_dir=memory_dir, num_trials=num_trials, reset_memory=False) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -354,7 +350,7 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag page_log.finish_page(page) -async def test_self_teaching(task_assignment_callback, client, page_log, memory_dir, run_subdir, settings): +async def test_self_teaching(task_assignment_callback, client, page_log, memory_dir, settings): page = page_log.begin_page( summary="test_self_teaching", details='', @@ -373,8 +369,7 @@ async def test_self_teaching(task_assignment_callback, client, page_log, memory_ task_assignment_callback=task_assignment_callback, client=client, page_log=page_log, - memory_dir=memory_dir, - run_subdir=run_subdir) + memory_dir=memory_dir) for i, total_num_successes in enumerate(total_num_successes_list): success_rate = round((total_num_successes / total_num_trials) * 100) @@ -384,32 +379,24 @@ async def test_self_teaching(task_assignment_callback, client, page_log, memory_ class Evaluator: - def __init__(self, settings_filepath): - # Load the .yaml settings file. - with open(settings_filepath, "r") as file: - self.settings = yaml.load(file, Loader=yaml.FullLoader) - self.pagelog_dir = self.settings["pagelog_dir"] - self.memory_dir = self.settings["memory_dir"] - self.run_subdir = self.settings["run_subdir"] - self.base_agent = self.settings["base_agent"] - self.client_settings = self.settings["client"] + def __init__(self): self.page_log = None - def create_client(self): + def create_client(self, settings): client = None - provider = self.client_settings["provider"] + provider = settings["provider"] if provider == "openai": - client = self.create_oai_client() + client = self.create_oai_client(settings) elif provider == "azure_openai": - client = self.create_aoai_client() + client = self.create_aoai_client(settings) elif provider == "trapi": - client = self.create_trapi_client() + client = self.create_trapi_client(settings) else: assert False, "Invalid client provider" # Check if the client should be wrapped. - if "wrapper" in self.client_settings: - wrapper_settings = self.client_settings["wrapper"] + if "wrapper" in settings: + wrapper_settings = settings["wrapper"] if wrapper_settings["enabled"]: # Wrap the client. client = ClientWrapper( @@ -417,25 +404,25 @@ def create_client(self): return client - def create_oai_client(self): + def create_oai_client(self, settings): # Create an OpenAI client model_name = "gpt-4o-2024-08-06" client = OpenAIChatCompletionClient( model=model_name, - api_key=self.client_settings["api_key"], - temperature=self.client_settings["temperature"], - max_tokens=self.client_settings["max_tokens"], - presence_penalty=self.client_settings["presence_penalty"], - frequency_penalty=self.client_settings["frequency_penalty"], - top_p=self.client_settings["top_p"], - max_retries=self.client_settings["max_retries"], + api_key=settings["api_key"], + temperature=settings["temperature"], + max_tokens=settings["max_tokens"], + presence_penalty=settings["presence_penalty"], + frequency_penalty=settings["frequency_penalty"], + top_p=settings["top_p"], + max_retries=settings["max_retries"], ) self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) self.page_log.append_entry_line(" created through OpenAI directly") - self.page_log.append_entry_line(" temperature: {}".format(self.client_settings["temperature"])) + self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) return client - def create_aoai_client(self): + def create_aoai_client(self, settings): # Create an Azure OpenAI client token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") @@ -448,19 +435,19 @@ def create_aoai_client(self): azure_deployment=azure_deployment, api_version="2024-06-01", model=model, - temperature=self.client_settings["temperature"], - max_tokens=self.client_settings["max_tokens"], - presence_penalty=self.client_settings["presence_penalty"], - frequency_penalty=self.client_settings["frequency_penalty"], - top_p=self.client_settings["top_p"], - max_retries=self.client_settings["max_retries"], + temperature=settings["temperature"], + max_tokens=settings["max_tokens"], + presence_penalty=settings["presence_penalty"], + frequency_penalty=settings["frequency_penalty"], + top_p=settings["top_p"], + max_retries=settings["max_retries"], ) self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) self.page_log.append_entry_line(" created through Azure OpenAI") - self.page_log.append_entry_line(" temperature: {}".format(self.client_settings["temperature"])) + self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) return client - def create_trapi_client(self): + def create_trapi_client(self, settings): # Create an Azure OpenAI client through TRAPI token_provider = get_bearer_token_provider(ChainedTokenCredential( AzureCliCredential(), @@ -487,16 +474,16 @@ def create_trapi_client(self): azure_deployment=azure_deployment, azure_endpoint=endpoint, api_version=api_version, - temperature=self.client_settings["temperature"], - max_tokens=self.client_settings["max_tokens"], - presence_penalty=self.client_settings["presence_penalty"], - frequency_penalty=self.client_settings["frequency_penalty"], - top_p=self.client_settings["top_p"], - max_retries=self.client_settings["max_retries"], + temperature=settings["temperature"], + max_tokens=settings["max_tokens"], + presence_penalty=settings["presence_penalty"], + frequency_penalty=settings["frequency_penalty"], + top_p=settings["top_p"], + max_retries=settings["max_retries"], ) self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) self.page_log.append_entry_line(" created through TRAPI") - self.page_log.append_entry_line(" temperature: {}".format(self.client_settings["temperature"])) + self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) return client async def assign_task_to_magentic_one(self, task, model_client, page_log) -> Tuple[str, str]: @@ -575,37 +562,53 @@ async def assign_task_to_client(self, task, client, page_log): page_log.finish_page(page) return response_str, work_history - async def run(self): - # Create the PageLog. - self.page_log = PageLog(self.settings["pagelog_dir"], self.settings["run_subdir"]) - page = self.page_log.begin_page( - summary="main", - details='', - method_call="main") - - # Create the client. - client = self.create_client() - - # Choose the base agent. - if self.base_agent == "magentic_one": - task_assignment_callback = self.assign_task_to_magentic_one - elif self.base_agent == "thin_agent": - task_assignment_callback = self.assign_task_to_client - else: - assert False, "Invalid base agent" + async def run(self, settings_filepath): + # Load the settings from yaml. + with open(settings_filepath, "r") as file: + settings = yaml.load(file, Loader=yaml.FullLoader) + evaluator_settings = settings["evaluator"] + + # Create the PageLog. + self.page_log = PageLog(evaluator_settings["pagelog"]) + page = self.page_log.begin_page( + summary="main", + details='', + method_call="main") + + # Create the client, which is used by both the apprentice and the evaluator. + client = self.create_client(settings["client"]) + + # Configure the apprentice. + apprentice_settings = settings["apprentice"] + + # Configure the agentic memory controller. + agentic_memory_controller_settings = apprentice_settings["agentic_memory_controller"] + agentic_memory_bank_settings = agentic_memory_controller_settings["agentic_memory_bank"] + + # Configure the agent wrapper. + agent_wrapper_settings = apprentice_settings["agent_wrapper"] + + # Configure the base agent. + base_agent = agent_wrapper_settings["base_agent"] + if base_agent == "magentic_one": + task_assignment_callback = self.assign_task_to_magentic_one + elif base_agent == "thin_agent": + task_assignment_callback = self.assign_task_to_client + else: + assert False, "Invalid base agent" - # Execute each eval. - evals = self.settings["evals"] - for ev in evals: - eval_function = globals()[ev["name"]] - await eval_function(task_assignment_callback, client, self.page_log, self.memory_dir, self.run_subdir, ev) + # Execute each eval. + memory_path = agentic_memory_bank_settings["path"] + for ev in settings["evaluations"]: + eval_function = globals()[ev["name"]] + await eval_function(task_assignment_callback, client, self.page_log, memory_path, ev) - if hasattr(client, "finalize"): - # The client wrapper needs to be finalized. - client.finalize() + if hasattr(client, "finalize"): + # If this is a client wrapper, it needs to be finalized. + client.finalize() - self.page_log.flush(final=True) # Finalize the page log - self.page_log.finish_page(page) + self.page_log.flush(final=True) # Finalize the page log + self.page_log.finish_page(page) if __name__ == "__main__": @@ -613,5 +616,5 @@ async def run(self): if len(args) != 1: print("Usage: amt.py ") else: - evaluator = Evaluator(settings_filepath=args[0]) - asyncio.run(evaluator.run()) + evaluator = Evaluator() + asyncio.run(evaluator.run(settings_filepath=args[0])) diff --git a/python/packages/autogen-ext/samples/short.yaml b/python/packages/autogen-ext/samples/short.yaml index 770ce8c382e8..d9085cd69462 100644 --- a/python/packages/autogen-ext/samples/short.yaml +++ b/python/packages/autogen-ext/samples/short.yaml @@ -1,28 +1,38 @@ -pagelog_dir: ~/pagelogs/ -memory_dir: ~/agentic_memory_archive -run_subdir: temp -base_agent: thin_agent # thin_agent or magentic_one + +evaluator: + pagelog: + path: ~/pagelogs/temp client: provider: trapi # openai, azure_openai, or trapi api_key: sk-xx # only for openai + # Add the model name here. temperature: 0.8 max_tokens: 4096 presence_penalty: 0.0 frequency_penalty: 0.0 top_p: 1.0 max_retries: 65535 - wrapper: - enabled: 1 # Only available for thin_agent + wrapper: # Provides record & replay functionality + enabled: 1 # Only works for thin_agent currently mode: check-replay # pass-through, record, or check-replay session_name: short-3 -evals: +apprentice: + agentic_memory_controller: + agentic_memory_bank: + path: ~/agentic_memory_bank/temp + agent_wrapper: + base_agent: thin_agent # thin_agent or magentic_one + +evaluations: - name: test_teachability + - name: test_learning_from_demonstration num_trials: 1 # 1-10 + - name: test_self_teaching num_loops: 1 # 1-10 - max_train_trials: 2 # 2-10 - max_test_trials: 1 # 1-3 + max_train_trials: 2 # 2-10 Move to AMC + max_test_trials: 1 # 1-3 Move to AMC num_final_test_trials: 1 # 1-3 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py index 1ea277c5db3e..9d16b340553f 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py @@ -5,12 +5,11 @@ class AgenticMemory: - def __init__(self, reset, client, page_log, memory_dir, run_subdir): + def __init__(self, reset, client, page_log, memory_dir): self.client = client self.page_log = page_log self.prompter = Prompter(client, page_log) - self.archive = KnowledgeArchive(verbosity=0, reset=reset, memory_dir=memory_dir, run_subdir=run_subdir, - page_log=page_log) + self.archive = KnowledgeArchive(verbosity=0, reset=reset, memory_dir=memory_dir, page_log=page_log) self.grader = Grader(client, page_log) async def train_on_task(self, diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py index 24b67509637b..b223c4dfd0b2 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py @@ -22,20 +22,18 @@ def __init__( verbosity: Optional[int] = 0, reset: Optional[bool] = False, memory_dir: str = "tmp/memory", - run_subdir: str = "run1", page_log=None, ): """ Args: - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. - reset (Optional, bool): True to clear the DB before starting. Default False - - memory_dir (Optional, str): path to the directory where all memory data is stored. - - run_subdir (Optional, str): name of the subdirectory for this run's memory data. + - memory_dir (Optional, str): path to the directory where this run's memory data is stored. - page_log (Optional, PageLog): the PageLog object to use for logging. """ memory_dir = os.path.expanduser(memory_dir) - path_to_db_dir = os.path.join(memory_dir, run_subdir, "memo_store") - self.path_to_dict = os.path.join(memory_dir, run_subdir, "uid_insight_dict.pkl") + path_to_db_dir = os.path.join(memory_dir, "memo_store") + self.path_to_dict = os.path.join(memory_dir, "uid_insight_dict.pkl") self.page_log = page_log parent_page = self.page_log.last_page() diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py index cb630060b049..a7a371764ee9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py @@ -80,7 +80,7 @@ def add_link_to_image(self, description, source_image_path): # Remove every character from the string 'description' that is not alphanumeric or a space. description = ''.join(e for e in description if e.isalnum() or e.isspace()) target_image_filename = (str(self.page_log.get_next_page_id()) + ' - ' + description) - local_image_path = os.path.join(self.page_log.run_dir_path, target_image_filename) + local_image_path = os.path.join(self.page_log.log_dir, target_image_filename) shutil.copyfile(source_image_path, local_image_path) self.add_lines('\n' + description) self.add_lines(self.link_to_image(target_image_filename, description), flush=True) @@ -90,7 +90,7 @@ def delete_last_line(self): self.lines.pop() def flush(self): - page_path = os.path.join(self.page_log.run_dir_path, self.index_str + ".html") + page_path = os.path.join(self.page_log.log_dir, self.index_str + ".html") with open(page_path, "w") as f: f.write(self.page_log.html_opening(self.file_title, final=self.final)) f.write(f"

{self.file_title}

\n") @@ -106,15 +106,13 @@ def flush(self): class PageLog: - def __init__(self, path, run_id): - self.log_dir = os.path.expanduser(path) - self.run_id = run_id + def __init__(self, settings): + self.log_dir = os.path.expanduser(settings["path"]) self.page_stack = PageStack() self.pages = [] self.last_page_id = 0 self.entry_lines = [] self.exit_lines = [] - self.run_dir_path = None self.name = "0 Overview" self.create_run_dir() self.token_counts_path = self.create_token_counts_file() @@ -125,14 +123,13 @@ def get_next_page_id(self): return self.last_page_id def create_run_dir(self): - # Create a fresh run directory. - self.run_dir_path = os.path.join(self.log_dir, f"{self.run_id}") - if os.path.exists(self.run_dir_path): - shutil.rmtree(self.run_dir_path) - os.makedirs(self.run_dir_path) + # Create a fresh log directory. + if os.path.exists(self.log_dir): + shutil.rmtree(self.log_dir) + os.makedirs(self.log_dir) def create_token_counts_file(self): - token_counts_path = os.path.join(self.run_dir_path, "token_counts.csv") + token_counts_path = os.path.join(self.log_dir, "token_counts.csv") f = open(token_counts_path, "w") f.close() # The file starts empty and will be appended to later. return token_counts_path @@ -142,10 +139,6 @@ def write_token_count(self, num_input_tokens, caller, details_path=None): with open(self.token_counts_path, "a") as f: f.write(f"{num_input_tokens},{caller},{details_path}\n") - def num_subdirectories(self): - # Return the number of subdirectories in the log directory. - return len([name for name in os.listdir(self.log_dir) if os.path.isdir(os.path.join(self.log_dir, name))]) - def html_opening(self, file_title, final=False): # Return the opening text of a simple HTML file. refresh_tag = '' if not final else "" @@ -225,7 +218,7 @@ def message_content(self, page, message=None, message_content=None): elif isinstance(item, Image): # Save the image to disk. image_filename = str(self.get_next_page_id()) + " image.jpg" - image_path = os.path.join(self.run_dir_path, image_filename) + image_path = os.path.join(self.log_dir, image_filename) item.image.save(image_path) # Add a link to the image. content_list.append(page.link_to_image(image_filename, "message_image")) @@ -316,7 +309,7 @@ def last_page(self): def flush(self, final=False): # Create an overview of the log. - overview_path = os.path.join(self.run_dir_path, self.name + ".html") + overview_path = os.path.join(self.log_dir, self.name + ".html") with open(overview_path, "w") as f: f.write(self.html_opening("0 Overview", final=final)) f.write(f"

{self.name}

\n") From f1d7a2f391e24cf991c2ae1541ed9143a391fc2a Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 7 Jan 2025 15:29:08 -0800 Subject: [PATCH 22/93] Renamed the memory classes. --- python/packages/autogen-ext/samples/eval.py | 12 ++-- .../autogen_ext/agentic_memory/__init__.py | 4 +- ...dge_archive.py => _agentic_memory_bank.py} | 43 +++++++----- ...emory.py => _agentic_memory_controller.py} | 64 ++++++++--------- ...emo_store.py => _string_similarity_map.py} | 70 +++++++++---------- 5 files changed, 101 insertions(+), 92 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{_knowledge_archive.py => _agentic_memory_bank.py} (78%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{_agentic_memory.py => _agentic_memory_controller.py} (87%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{_memo_store.py => _string_similarity_map.py} (63%) diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/samples/eval.py index 9ece2f6790a5..6c1aa6ae9afe 100644 --- a/python/packages/autogen-ext/samples/eval.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -16,7 +16,7 @@ from typing import ( Tuple, ) -from autogen_ext.agentic_memory import AgenticMemory, PageLog, Grader, ClientWrapper +from autogen_ext.agentic_memory import AgenticMemoryController, PageLog, Grader, ClientWrapper def define_tasks_with_answers(): @@ -75,7 +75,7 @@ async def train(task_with_answer, max_train_trials, max_test_trials, task_assign summary="train", details='', method_call="train") - memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, + memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, memory_dir=memory_dir) await memory.train_on_task( task=task_with_answer["task"], @@ -98,7 +98,7 @@ async def test(task_with_answer, num_trials, task_assignment_callback, use_memor if use_memory: page.add_lines("Testing with memory.\n", flush=True) - memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, + memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, memory_dir=memory_dir) response, num_successes, num_trials = await memory.test_on_task( task=task_with_answer["task"], @@ -246,7 +246,7 @@ async def send_message_to_agent(text, task_assignment_callback, client, page_log details="", method_call="send_message_to_agent") - memory = AgenticMemory(reset=reset_memory, client=client, page_log=page_log, + memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, memory_dir=memory_dir) response = await memory.handle_user_message(text, task_assignment_callback) @@ -310,7 +310,7 @@ async def give_demonstration_to_agent(task, demonstration, client, page_log, mem details="", method_call="give_demonstration_to_agent") - memory = AgenticMemory(reset=False, client=client, page_log=page_log, memory_dir=memory_dir) + memory = AgenticMemoryController(reset=False, client=client, page_log=page_log, memory_dir=memory_dir) await memory.learn_from_demonstration(task, demonstration) page_log.finish_page(page) @@ -597,7 +597,7 @@ async def run(self, settings_filepath): else: assert False, "Invalid base agent" - # Execute each eval. + # Execute each evaluations. memory_path = agentic_memory_bank_settings["path"] for ev in settings["evaluations"]: eval_function = globals()[ev["name"]] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index 1e4df35e5b76..c2445e11aa79 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,6 +1,6 @@ -from ._agentic_memory import AgenticMemory +from ._agentic_memory_controller import AgenticMemoryController from ._page_log import PageLog from ._grader import Grader from .client_wrapper import ClientWrapper -__all__ = ["AgenticMemory", "PageLog", "Grader", "ClientWrapper"] +__all__ = ["AgenticMemoryController", "PageLog", "Grader", "ClientWrapper"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py similarity index 78% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index b223c4dfd0b2..08cff9ef1af7 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_knowledge_archive.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -2,7 +2,7 @@ from dataclasses import dataclass import pickle from typing import Dict, Optional, Union, List -from ._memo_store import MemoStore +from ._string_similarity_map import StringSimilarityMap @dataclass @@ -13,7 +13,7 @@ class Insight: topics: List[str] -class KnowledgeArchive: +class AgenticMemoryBank: """ Stores task-completion insights in a vector DB for later retrieval. """ @@ -26,22 +26,22 @@ def __init__( ): """ Args: - - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. + - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print string-pair lists. - reset (Optional, bool): True to clear the DB before starting. Default False - memory_dir (Optional, str): path to the directory where this run's memory data is stored. - page_log (Optional, PageLog): the PageLog object to use for logging. """ memory_dir = os.path.expanduser(memory_dir) - path_to_db_dir = os.path.join(memory_dir, "memo_store") + path_to_db_dir = os.path.join(memory_dir, "string_map") self.path_to_dict = os.path.join(memory_dir, "uid_insight_dict.pkl") self.page_log = page_log parent_page = self.page_log.last_page() - parent_page.add_lines("Creating KnowedgeArchive object", flush=True) + parent_page.add_lines("Creating AgenticMemoryBank", flush=True) - self.memo_store = MemoStore(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir) + self.string_map = StringSimilarityMap(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir) - # Load or create the associated memo dict on disk. + # Load or create the associated insight dict on disk. self.uid_insight_dict = {} self.last_insight_id = 0 if (not reset) and os.path.exists(self.path_to_dict): @@ -52,30 +52,39 @@ def __init__( self.last_insight_id = len(self.uid_insight_dict) parent_page.add_lines("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) + # Clear the DB if requested. + if reset: + self.reset_insights() + + def reset_insights(self): + """Forces immediate deletion of the insights, in memory and on disk.""" + self.uid_insight_dict = {} + self.save_insights() + def contains_insights(self): return len(self.uid_insight_dict) > 0 - def save_archive(self): - self.memo_store.save_memos() + def save_insights(self): + self.string_map.save_string_pairs() parent_page = self.page_log.last_page() parent_page.add_lines("\nSAVING INSIGHTS TO DISK {}".format(self.path_to_dict)) with open(self.path_to_dict, "wb") as file: pickle.dump(self.uid_insight_dict, file) def add_insight(self, insight_str: str, task_str: Optional[str] = None, topics: Optional[List[str]] = None): - """Adds an insight to the knowledge archive.""" + """Adds an insight to the memory bank.""" assert topics is not None, "For now, the topics list must be provided." self.last_insight_id += 1 id_str = str(self.last_insight_id) insight = Insight(id=id_str, insight_str=insight_str, task_str=task_str, topics=topics) for topic in topics: # Add a mapping in the vec DB from each topic to the insight. - self.memo_store.add_input_output_pair(topic, id_str) + self.string_map.add_input_output_pair(topic, id_str) self.uid_insight_dict[str(id_str)] = insight - self.save_archive() + self.save_insights() def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional[List[str]] = None): - """Returns any insights from the knowledge archive that are relevant to the given task or topics.""" + """Returns any insights from the memory bank that are relevant to the given task or topics.""" assert (task_str is not None) or (topics is not None), "Either the task string or the topics list must be provided." assert topics is not None, "For now, the topics list is always required, because it won't be generated." @@ -86,7 +95,7 @@ def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional # Process the matching topics. matches = [] # Each match is a tuple: (topic, insight, distance) for topic in topics: - matches.extend(self.memo_store.get_related_memos(topic, 25, 100)) + matches.extend(self.string_map.get_related_string_pairs(topic, 25, 100)) for match in matches: relevance = relevance_conversion_threshold - match[2] insight_id = match[1] @@ -104,13 +113,13 @@ def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional return insight_relevance_dict def add_demonstration(self, task: str, demonstration: str, topics: List[str]): - """Adds a task-demonstration pair (as a single insight) to the knowledge archive.""" + """Adds a task-demonstration pair (as a single insight) to the memory bank.""" self.last_insight_id += 1 id_str = str(self.last_insight_id) insight_str = "Example task:\n\n{}\n\nExample solution:\n\n{}".format(task, demonstration) insight = Insight(id=id_str, insight_str=insight_str, task_str=task, topics=topics) for topic in topics: # Add a mapping in the vec DB from each topic to the insight. - self.memo_store.add_input_output_pair(topic, id_str) + self.string_map.add_input_output_pair(topic, id_str) self.uid_insight_dict[str(id_str)] = insight - self.save_archive() + self.save_insights() diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py similarity index 87% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py index 9d16b340553f..ab9ece64a0ec 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py @@ -1,15 +1,15 @@ from typing import Callable, List from ._prompter import Prompter -from ._knowledge_archive import KnowledgeArchive +from ._agentic_memory_bank import AgenticMemoryBank from ._grader import Grader -class AgenticMemory: +class AgenticMemoryController: def __init__(self, reset, client, page_log, memory_dir): self.client = client self.page_log = page_log self.prompter = Prompter(client, page_log) - self.archive = KnowledgeArchive(verbosity=0, reset=reset, memory_dir=memory_dir, page_log=page_log) + self.memory_bank = AgenticMemoryBank(verbosity=0, reset=reset, memory_dir=memory_dir, page_log=page_log) self.grader = Grader(client, page_log) async def train_on_task(self, @@ -24,9 +24,9 @@ async def train_on_task(self, Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ page = self.page_log.begin_page( - summary="AgenticMemory.train_on_task", + summary="AgenticMemoryController.train_on_task", details="", - method_call="AgenticMemory.train_on_task") + method_call="AgenticMemoryController.train_on_task") # Attempt to create useful new memories. page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) @@ -46,9 +46,9 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca Assigns a task to the completion agent, along with any relevant insights/memories. """ page = self.page_log.begin_page( - summary="AgenticMemory.test_on_task", + summary="AgenticMemoryController.test_on_task", details="", - method_call="AgenticMemory.test_on_task") + method_call="AgenticMemoryController.test_on_task") response = None num_successes = 0 @@ -87,9 +87,9 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca async def add_insight_to_memory(self, task: str, insight: str): # Adds an insight to the DB. page = self.page_log.begin_page( - summary="AgenticMemory.add_insight_to_memory", + summary="AgenticMemoryController.add_insight_to_memory", details="", - method_call="AgenticMemory.add_insight_to_memory") + method_call="AgenticMemoryController.add_insight_to_memory") page.add_lines("\nGIVEN TASK:") page.add_lines(task) @@ -107,17 +107,17 @@ async def add_insight_to_memory(self, task: str, insight: str): page.add_lines("\n".join(topics)) page.add_lines("") - # Add the insight to the archive. - self.archive.add_insight(insight, generalized_task, topics) + # Add the insight to the memory bank. + self.memory_bank.add_insight(insight, generalized_task, topics) self.page_log.finish_page(page) async def add_insight_without_task_to_memory(self, insight: str): # Adds an insight to the DB. page = self.page_log.begin_page( - summary="AgenticMemory.add_insight_without_task_to_memory", + summary="AgenticMemoryController.add_insight_without_task_to_memory", details="", - method_call="AgenticMemory.add_insight_without_task_to_memory") + method_call="AgenticMemoryController.add_insight_without_task_to_memory") page.add_lines("\nGIVEN INSIGHT:") page.add_lines(insight) @@ -128,19 +128,19 @@ async def add_insight_without_task_to_memory(self, insight: str): page.add_lines("\n".join(topics)) page.add_lines("") - # Add the insight to the archive. - self.archive.add_insight(insight, None, topics) + # Add the insight to the memory bank. + self.memory_bank.add_insight(insight, None, topics) self.page_log.finish_page(page) async def retrieve_relevant_insights(self, task: str): # Retrieve insights from the DB that are relevant to the task. page = self.page_log.begin_page( - summary="AgenticMemory.retrieve_relevant_insights", + summary="AgenticMemoryController.retrieve_relevant_insights", details="", - method_call="AgenticMemory.retrieve_relevant_insights") + method_call="AgenticMemoryController.retrieve_relevant_insights") - if self.archive.contains_insights(): + if self.memory_bank.contains_insights(): page.add_lines("\nCURRENT TASK:") page.add_lines(task) @@ -153,8 +153,8 @@ async def retrieve_relevant_insights(self, task: str): page.add_lines("\n".join(topics)) page.add_lines("") - # Retrieve relevant insights from the archive. - relevant_insights_and_relevances = self.archive.get_relevant_insights(topics=topics) + # Retrieve relevant insights from the memory bank. + relevant_insights_and_relevances = self.memory_bank.get_relevant_insights(topics=topics) relevant_insights = [] page.add_lines("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) for insight, relevance in relevant_insights_and_relevances.items(): @@ -191,9 +191,9 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a Attempts to solve the given task multiple times to find a failure case to learn from. """ page = self.page_log.begin_page( - summary="AgenticMemory._test_for_failure", + summary="AgenticMemoryController._test_for_failure", details="", - method_call="AgenticMemory._test_for_failure") + method_call="AgenticMemoryController._test_for_failure") page.add_lines("\nTask description, including any insights: {}".format(task_plus_insights)) page.add_lines("\nExpected answer: {}\n".format(expected_answer)) @@ -224,9 +224,9 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to_completer: Callable, final_format_instructions: str, max_train_trials: int, max_test_trials: int): page = self.page_log.begin_page( - summary="AgenticMemory._iterate_on_task", + summary="AgenticMemoryController._iterate_on_task", details="", - method_call="AgenticMemory._iterate_on_task") + method_call="AgenticMemoryController._iterate_on_task") page.add_lines("\nTask description: {}".format(task)) page.add_lines("\nExpected answer: {}\n".format(expected_answer)) @@ -292,9 +292,9 @@ async def execute_task(self, task: str, task_assignment_callback: Callable, shou Assigns a task to the completion agent, along with any relevant insights/memories. """ page = self.page_log.begin_page( - summary="AgenticMemory.execute_task", + summary="AgenticMemoryController.execute_task", details="", - method_call="AgenticMemory.execute_task") + method_call="AgenticMemoryController.execute_task") if should_retrieve_insights: # Try to retrieve any relevant memories from the DB. @@ -318,9 +318,9 @@ async def execute_task(self, task: str, task_assignment_callback: Callable, shou async def handle_user_message(self, text, task_assignment_callback, should_await=True): page = self.page_log.begin_page( - summary="AgenticMemory.handle_user_message", + summary="AgenticMemoryController.handle_user_message", details="", - method_call="AgenticMemory.handle_user_message") + method_call="AgenticMemoryController.handle_user_message") # task = await self.prompter.extract_task(text) # page.add_lines("Task: {}".format(task), flush=True) @@ -341,9 +341,9 @@ async def handle_user_message(self, text, task_assignment_callback, should_await async def learn_from_demonstration(self, task, demonstration): page = self.page_log.begin_page( - summary="AgenticMemory.learn_from_demonstration", + summary="AgenticMemoryController.learn_from_demonstration", details="", - method_call="AgenticMemory.learn_from_demonstration") + method_call="AgenticMemoryController.learn_from_demonstration") page.add_lines("\nEXAMPLE TASK:") page.add_lines(task) @@ -357,7 +357,7 @@ async def learn_from_demonstration(self, task, demonstration): page.add_lines("\n".join(topics)) page.add_lines("") - # Add the insight to the archive. - self.archive.add_demonstration(task, demonstration, topics) + # Add the insight to the memory bank. + self.memory_bank.add_demonstration(task, demonstration, topics) self.page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_memo_store.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py similarity index 63% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_memo_store.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py index 8eed6d45c52e..25853decc120 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_memo_store.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py @@ -5,12 +5,12 @@ from typing import Optional, Union -class MemoStore: +class StringSimilarityMap: """ - Provides memory storage and retrieval using a vector database. - Each DB entry (called a memo) is a pair of strings: an input text and an output text. - The input text is embedded and used as the retrieval key. - The output text can be anything, but it's typically used as a dict key. + Provides string-pair storage and retrieval using a vector database. + Each DB entry is a pair of strings: an input string and an output string. + The input string is embedded and used as the retrieval key. + The output string can be anything, but it's typically used as a dict key. Vector embeddings are currently supplied by Chroma's default Sentence Transformers. """ @@ -22,7 +22,7 @@ def __init__( ): """ Args: - - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print memo lists. + - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print string-pair lists. - reset (Optional, bool): True to clear the DB before starting. Default False. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. """ @@ -34,73 +34,73 @@ def __init__( anonymized_telemetry=False, allow_reset=True, is_persistent=True, persist_directory=path_to_db_dir ) self.db_client = chromadb.Client(settings) - self.vec_db = self.db_client.create_collection("memos", get_or_create=True) # The collection is the DB. + self.vec_db = self.db_client.create_collection("string-pairs", get_or_create=True) # The collection is the DB. - # Load or create the associated memo dict on disk. + # Load or create the associated string-pair dict on disk. self.path_to_dict = os.path.join(path_to_db_dir, "uid_text_dict.pkl") self.uid_text_dict = {} - self.last_memo_id = 0 + self.last_string_pair_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - print("\nLOADING MEMORY FROM DISK {}".format(self.path_to_dict)) + print("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) print(" Location = {}".format(self.path_to_dict)) with open(self.path_to_dict, "rb") as f: self.uid_text_dict = pickle.load(f) - self.last_memo_id = len(self.uid_text_dict) - print("\n{} MEMOS LOADED".format(len(self.uid_text_dict))) + self.last_string_pair_id = len(self.uid_text_dict) + print("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) if self.verbosity >= 3: - self.list_memos() + self.list_string_pairs() # Clear the DB if requested. if reset: self.reset_db() - def list_memos(self): - """Prints the contents of MemoStore.""" - print("LIST OF MEMOS") + def list_string_pairs(self): + """Prints the string-pair contents.""" + print("LIST OF STRING PAIRS") for uid, text in self.uid_text_dict.items(): input_text, output_text = text print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) - def save_memos_to_text_files(self): - """Saves the contents of MemoStore to text files.""" + def save_string_pairs_to_text_files(self): + """Saves the contents to text files.""" # Delete all files in mem_text dir. for file in os.listdir("mem_text"): os.remove(os.path.join("mem_text", file)) - print("LIST OF MEMOS") + print("LIST OF STRING PAIRS") for uid, text in self.uid_text_dict.items(): input_text, output_text = text print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) - # Save the input text to a file with the same name as the memo ID in the mem_text dir, which is a subdir of the dir containing this file. + # Save the input string to a file with the same name as the string-pair ID in the mem_text dir, which is a subdir of the dir containing this file. with open("mem_text/{}.txt".format(uid), "w") as file: file.write(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) - def save_memos(self): + def save_string_pairs(self): """Saves self.uid_text_dict to disk.""" with open(self.path_to_dict, "wb") as file: pickle.dump(self.uid_text_dict, file) def reset_db(self): """Forces immediate deletion of the DB's contents, in memory and on disk.""" - print("\nCLEARING MEMORY") - self.db_client.delete_collection("memos") - self.vec_db = self.db_client.create_collection("memos") + print("\nCLEARING STRING-PAIR MAP") + self.db_client.delete_collection("string-pairs") + self.vec_db = self.db_client.create_collection("string-pairs") self.uid_text_dict = {} - self.save_memos() + self.save_string_pairs() def add_input_output_pair(self, input_text: str, output_text: str): """Adds an input-output pair to the vector DB.""" - self.last_memo_id += 1 - self.vec_db.add(documents=[input_text], ids=[str(self.last_memo_id)]) - self.uid_text_dict[str(self.last_memo_id)] = input_text, output_text + self.last_string_pair_id += 1 + self.vec_db.add(documents=[input_text], ids=[str(self.last_string_pair_id)]) + self.uid_text_dict[str(self.last_string_pair_id)] = input_text, output_text if self.verbosity >= 1: print("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( - self.last_memo_id, input_text, output_text)) + self.last_string_pair_id, input_text, output_text)) if self.verbosity >= 3: - self.list_memos() + self.list_string_pairs() - def get_related_memos(self, query_text: str, n_results: int, threshold: Union[int, float]): - """Retrieves memos that are related to the given query text within the specified distance threshold.""" + def get_related_string_pairs(self, query_text: str, n_results: int, threshold: Union[int, float]): + """Retrieves STRING PAIRS that are related to the given query text within the specified distance threshold.""" if n_results > len(self.uid_text_dict): n_results = len(self.uid_text_dict) if n_results > 0: @@ -108,7 +108,7 @@ def get_related_memos(self, query_text: str, n_results: int, threshold: Union[in num_results = len(results["ids"][0]) else: num_results = 0 - memos = [] + string_pairs = [] for i in range(num_results): uid, input_text, distance = results["ids"][0][i], results["documents"][0][i], results["distances"][0][i] if distance < threshold: @@ -117,5 +117,5 @@ def get_related_memos(self, query_text: str, n_results: int, threshold: Union[in if self.verbosity >= 1: print("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( input_text, output_text, distance)) - memos.append((input_text, output_text, distance)) - return memos + string_pairs.append((input_text, output_text, distance)) + return string_pairs From 17d4c429466892bcc54d31ab2ab7e3a61d07f7ad Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 7 Jan 2025 16:56:08 -0800 Subject: [PATCH 23/93] Apprentice. Eval simplifications. --- python/packages/autogen-ext/samples/eval.py | 100 ++++++++++++-------- 1 file changed, 59 insertions(+), 41 deletions(-) diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/samples/eval.py index 6c1aa6ae9afe..55d420121b66 100644 --- a/python/packages/autogen-ext/samples/eval.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -239,24 +239,8 @@ async def test_with_memory(task_assignment_callback, client, page_log, memory_di page_log.finish_page(page) -async def send_message_to_agent(text, task_assignment_callback, client, page_log, memory_dir, - reset_memory) -> None: - page = page_log.begin_page( - summary="send_message_to_agent", - details="", - method_call="send_message_to_agent") - - memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir) - response = await memory.handle_user_message(text, task_assignment_callback) - - page.add_lines("Response: {}\n".format(response), flush=True) - - page_log.finish_page(page) - return response - - -async def test_teachability(task_assignment_callback, client, page_log, memory_dir, settings): +async def test_teachability(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): + """An evaluation""" page = page_log.begin_page( summary="test_teachability", details='', @@ -268,11 +252,11 @@ async def test_teachability(task_assignment_callback, client, page_log, memory_d task = task_with_answer["task"] answer = task_with_answer["expected_answer"] grader = Grader(client, page_log) + apprentice.start(reset_memory=True) # First test without memory. page.add_lines("\nClear memory, then ask the question.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, - reset_memory=True) + response = await apprentice.handle_user_message(task, task_assignment_callback) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -285,13 +269,11 @@ async def test_teachability(task_assignment_callback, client, page_log, memory_d # Give the advice. page.add_lines("Give the advice.") insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" - await send_message_to_agent(insight, task_assignment_callback, client, page_log, memory_dir, - reset_memory=False) + await apprentice.handle_user_message(insight, task_assignment_callback) # Now ask the question again to see if the advice is retrieved from memory. page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") - response = await send_message_to_agent(task, task_assignment_callback, client, page_log, memory_dir, - reset_memory=False) + response = await apprentice.handle_user_message(task, task_assignment_callback) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -301,22 +283,12 @@ async def test_teachability(task_assignment_callback, client, page_log, memory_d else: page.add_lines("Answer is INCORRECT.\n", flush=True) + apprentice.stop() page_log.finish_page(page) -async def give_demonstration_to_agent(task, demonstration, client, page_log, memory_dir) -> None: - page = page_log.begin_page( - summary="give_demonstration_to_agent", - details="", - method_call="give_demonstration_to_agent") - - memory = AgenticMemoryController(reset=False, client=client, page_log=page_log, memory_dir=memory_dir) - await memory.learn_from_demonstration(task, demonstration) - - page_log.finish_page(page) - - -async def test_learning_from_demonstration(task_assignment_callback, client, page_log, memory_dir, settings): +async def test_learning_from_demonstration(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): + """An evaluation""" page = page_log.begin_page( summary="test_learning_from_demonstration", details='', @@ -337,7 +309,8 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag page.add_lines("Demonstrate a solution to a similar task.") demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." - await give_demonstration_to_agent(demo_task, demonstration, client, page_log, memory_dir) + memory = AgenticMemoryController(reset=False, client=client, page_log=page_log, memory_dir=memory_dir) + await memory.learn_from_demonstration(demo_task, demonstration) # Now test again to see if the demonstration (retrieved from memory) helps. page.add_lines("Assign the task again to see if the demonstration helps.") @@ -350,7 +323,8 @@ async def test_learning_from_demonstration(task_assignment_callback, client, pag page_log.finish_page(page) -async def test_self_teaching(task_assignment_callback, client, page_log, memory_dir, settings): +async def test_self_teaching(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): + """An evaluation""" page = page_log.begin_page( summary="test_self_teaching", details='', @@ -578,8 +552,9 @@ async def run(self, settings_filepath): # Create the client, which is used by both the apprentice and the evaluator. client = self.create_client(settings["client"]) - # Configure the apprentice. + # Create the apprentice. apprentice_settings = settings["apprentice"] + apprentice = Apprentice(settings["apprentice"], self, client, self.page_log) # Configure the agentic memory controller. agentic_memory_controller_settings = apprentice_settings["agentic_memory_controller"] @@ -601,7 +576,7 @@ async def run(self, settings_filepath): memory_path = agentic_memory_bank_settings["path"] for ev in settings["evaluations"]: eval_function = globals()[ev["name"]] - await eval_function(task_assignment_callback, client, self.page_log, memory_path, ev) + await eval_function(apprentice, self, task_assignment_callback, client, self.page_log, memory_path, ev) if hasattr(client, "finalize"): # If this is a client wrapper, it needs to be finalized. @@ -611,6 +586,49 @@ async def run(self, settings_filepath): self.page_log.finish_page(page) +class Apprentice: + def __init__(self, settings, evaluator, client, page_log): + self.settings = settings + self.evaluator = evaluator + self.client = client + self.page_log = page_log + self.memory_settings = settings["agentic_memory_controller"] + self.agent_settings = settings["agent_wrapper"] + self.memory = None + self.agent = None + + def create_memory(self, reset_memory): + self.memory = AgenticMemoryController( + reset=reset_memory, + client=self.client, + page_log=self.page_log, + memory_dir=self.memory_settings["agentic_memory_bank"]["path"] + ) + + def create_agent(self): + return None + + def start(self, reset_memory): + self.create_memory(reset_memory) + self.create_agent() + + def stop(self): + self.memory = None + self.agent = None + + async def handle_user_message(self, text, task_assignment_callback, should_await=True): + page = self.page_log.begin_page( + summary="Apprentice.handle_user_message", + details="", + method_call="Apprentice.handle_user_message") + + # Pass the user message through to the memory controller. + response = await self.memory.handle_user_message(text, task_assignment_callback, should_await) + + self.page_log.finish_page(page) + return response + + if __name__ == "__main__": args = sys.argv[1:] if len(args) != 1: From 19654e8d165e02c00e0658e7c1f1d4586439d9de Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 7 Jan 2025 18:00:10 -0800 Subject: [PATCH 24/93] Moved test into the evaluator, and removed eval.py's other util functions. --- python/packages/autogen-ext/samples/eval.py | 285 ++++++------------ .../packages/autogen-ext/samples/short.yaml | 6 +- 2 files changed, 93 insertions(+), 198 deletions(-) diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/samples/eval.py index 55d420121b66..805649c83dba 100644 --- a/python/packages/autogen-ext/samples/eval.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -69,182 +69,12 @@ def define_tasks_with_answers(): return tasks_with_answers -async def train(task_with_answer, max_train_trials, max_test_trials, task_assignment_callback, reset_memory, - client, page_log, memory_dir) -> None: - page = page_log.begin_page( - summary="train", - details='', - method_call="train") - memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir) - await memory.train_on_task( - task=task_with_answer["task"], - expected_answer=task_with_answer["expected_answer"], - task_assignment_callback=task_assignment_callback, - final_format_instructions="", - max_train_trials=max_train_trials, - max_test_trials=max_test_trials) - page_log.finish_page(page) - - -async def test(task_with_answer, num_trials, task_assignment_callback, use_memory, reset_memory, - client, page_log, memory_dir) -> Tuple[str, int, int]: - page = page_log.begin_page( - summary="test", - details='', - method_call="test") - - grader = Grader(client, page_log) - - if use_memory: - page.add_lines("Testing with memory.\n", flush=True) - memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir) - response, num_successes, num_trials = await memory.test_on_task( - task=task_with_answer["task"], - expected_answer=task_with_answer["expected_answer"], - task_assignment_callback=task_assignment_callback, - num_trials=num_trials) - else: - page.add_lines("Testing without memory.\n", flush=True) - response = None - num_successes = 0 - for trial in range(num_trials): - page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) - page.add_lines("Try to solve the task.\n", flush=True) - response, _ = await task_assignment_callback(task_with_answer["task"], client, page_log) - - response_is_correct, extracted_answer = await grader.is_response_correct( - task_with_answer["task"], response, task_with_answer["expected_answer"]) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) - if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) - num_successes += 1 - else: - page.add_lines("Answer is INCORRECT.\n", flush=True) - - page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) - - page_log.finish_page(page) - return response, num_successes, num_trials - - -async def test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, memory_dir, - num_trials, reset_memory): - last_response, num_successes, num_trials = await test( - task_with_answer=define_tasks_with_answers()[task_index], - num_trials=num_trials, - task_assignment_callback=task_assignment_callback, - use_memory=True, - reset_memory=reset_memory, - client=client, - page_log=page_log, - memory_dir=memory_dir) - print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) - return num_successes, num_trials - - -async def test_on_task(task_index, task_assignment_callback, client, page_log, memory_dir, num_trials): - last_response, num_successes, num_trials = await test( - task_with_answer=define_tasks_with_answers()[task_index], - num_trials=num_trials, - task_assignment_callback=task_assignment_callback, - use_memory=False, - reset_memory=False, - client=client, - page_log=page_log, - memory_dir=memory_dir) - print("SUCCESS RATE: {}%\n".format(round((num_successes / num_trials) * 100))) - return num_successes, num_trials - - -async def train_and_test(task_index_list, num_loops, max_train_trials, max_test_trials, num_final_test_trials, - task_assignment_callback, client, page_log, memory_dir): - page = page_log.begin_page( - summary="train_and_test", - details='', - method_call="train_and_test") - - tasklist = define_tasks_with_answers() - task_with_answer_list = [tasklist[task_index] for task_index in task_index_list] - - total_num_successes_list = [0 for _ in task_index_list] - total_num_trials = 0 - for i in range(num_loops): - # Always train on the first task. - await train( - task_with_answer=task_with_answer_list[0], - max_train_trials=max_train_trials, - max_test_trials=max_test_trials, - task_assignment_callback=task_assignment_callback, - reset_memory=True, - client=client, - page_log=page_log, - memory_dir=memory_dir) - - # Test on all tasks. - for j, task_with_answer in enumerate(task_with_answer_list): - last_response, num_successes, num_trials = await test( - task_with_answer=task_with_answer, - num_trials=num_final_test_trials, - task_assignment_callback=task_assignment_callback, - use_memory=True, - reset_memory=False, - client=client, - page_log=page_log, - memory_dir=memory_dir) - page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) - print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) - total_num_successes_list[j] += num_successes - total_num_trials += num_final_test_trials - - page.add_lines("") - - page_log.finish_page(page) - return total_num_successes_list, total_num_trials - - -async def test_without_memory(task_assignment_callback, client, page_log, memory_dir): - page = page_log.begin_page( - summary="test_without_memory", - details='', - method_call="test_without_memory") - - task_index = 5 - num_trials = 20 - - num_successes, num_trials = await test_on_task(task_index, task_assignment_callback, client, page_log, - memory_dir, num_trials) - - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) - - page_log.finish_page(page) - - -async def test_with_memory(task_assignment_callback, client, page_log, memory_dir): - page = page_log.begin_page( - summary="test_with_memory", - details='', - method_call="test_with_memory") - - task_index = 3 - - num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, - memory_dir=memory_dir, - num_trials=3, reset_memory=False) - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nOverall success rate: {}%\n".format(success_rate), flush=True) - - page_log.finish_page(page) - - -async def test_teachability(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_teachability(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): """An evaluation""" page = page_log.begin_page( - summary="test_teachability", + summary="eval_teachability", details='', - method_call="test_teachability") + method_call="eval_teachability") tasklist = define_tasks_with_answers() task_index = 4 @@ -287,21 +117,22 @@ async def test_teachability(apprentice, evaluator, task_assignment_callback, cli page_log.finish_page(page) -async def test_learning_from_demonstration(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_learning_from_demonstration(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): """An evaluation""" page = page_log.begin_page( - summary="test_learning_from_demonstration", + summary="eval_learning_from_demonstration", details='', - method_call="test_learning_from_demonstration") + method_call="eval_learning_from_demonstration") task_index = 5 + task_with_answer = define_tasks_with_answers()[task_index] num_trials = settings["num_trials"] # First test after clearing memory. page.add_lines("To get a baseline, clear memory, then assign the task.") - num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, - memory_dir=memory_dir, - num_trials=num_trials, reset_memory=True) + num_successes, num_trials = await evaluator.test(task_with_answer=task_with_answer, num_trials=num_trials, + task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=True, client=client, + page_log=page_log, memory_dir=memory_dir) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -314,36 +145,59 @@ async def test_learning_from_demonstration(apprentice, evaluator, task_assignmen # Now test again to see if the demonstration (retrieved from memory) helps. page.add_lines("Assign the task again to see if the demonstration helps.") - num_successes, num_trials = await test_on_task_with_memory(task_index, task_assignment_callback, client, page_log, - memory_dir=memory_dir, - num_trials=num_trials, reset_memory=False) + num_successes, num_trials = await evaluator.test(task_with_answer=task_with_answer, num_trials=num_trials, + task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=False, client=client, + page_log=page_log, memory_dir=memory_dir) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) page_log.finish_page(page) -async def test_self_teaching(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_self_teaching(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): """An evaluation""" page = page_log.begin_page( - summary="test_self_teaching", + summary="eval_self_teaching", details='', - method_call="test_self_teaching") + method_call="eval_self_teaching") # Choose the tasks from those listed at the top. task_index_list = [3, 1] # Train and test on any number of tasks using memory. - total_num_successes_list, total_num_trials = await train_and_test( - task_index_list=task_index_list, - num_loops=settings["num_loops"], - max_train_trials=settings["max_train_trials"], - max_test_trials=settings["max_test_trials"], - num_final_test_trials=settings["num_final_test_trials"], - task_assignment_callback=task_assignment_callback, - client=client, - page_log=page_log, - memory_dir=memory_dir) + tasklist = define_tasks_with_answers() + task_with_answer_list = [tasklist[task_index] for task_index in task_index_list] + + total_num_successes_list = [0 for _ in task_index_list] + total_num_trials = 0 + for i in range(settings["num_loops"]): + # Always train on the first task. + memory = AgenticMemoryController(reset=True, client=client, page_log=page_log, memory_dir=memory_dir) + task_with_answer = task_with_answer_list[0] + await memory.train_on_task( + task=task_with_answer["task"], + expected_answer=task_with_answer["expected_answer"], + task_assignment_callback=task_assignment_callback, + final_format_instructions="", + max_train_trials=settings["max_train_trials"], + max_test_trials=settings["max_test_trials"]) + + # Test on all tasks. + for j, task_with_answer in enumerate(task_with_answer_list): + num_successes, num_trials = await evaluator.test( + task_with_answer=task_with_answer, + num_trials=settings["num_final_test_trials"], + task_assignment_callback=task_assignment_callback, + use_memory=True, + reset_memory=False, + client=client, + page_log=page_log, + memory_dir=memory_dir) + page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) + print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) + total_num_successes_list[j] += num_successes + total_num_trials += settings["num_final_test_trials"] + page.add_lines("") for i, total_num_successes in enumerate(total_num_successes_list): success_rate = round((total_num_successes / total_num_trials) * 100) @@ -536,6 +390,47 @@ async def assign_task_to_client(self, task, client, page_log): page_log.finish_page(page) return response_str, work_history + async def test(self, task_with_answer, num_trials, task_assignment_callback, use_memory, reset_memory, + client, page_log, memory_dir) -> Tuple[str, int, int]: + page = page_log.begin_page( + summary="Evaluator.test", + details='', + method_call="Evaluator.test") + + grader = Grader(client, page_log) + + if use_memory: + page.add_lines("Testing with memory.\n", flush=True) + memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, + memory_dir=memory_dir) + response, num_successes, num_trials = await memory.test_on_task( + task=task_with_answer["task"], + expected_answer=task_with_answer["expected_answer"], + task_assignment_callback=task_assignment_callback, + num_trials=num_trials) + else: + page.add_lines("Testing without memory.\n", flush=True) + response = None + num_successes = 0 + for trial in range(num_trials): + page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) + page.add_lines("Try to solve the task.\n", flush=True) + response, _ = await task_assignment_callback(task_with_answer["task"], client, page_log) + + response_is_correct, extracted_answer = await grader.is_response_correct( + task_with_answer["task"], response, task_with_answer["expected_answer"]) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + num_successes += 1 + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) + + page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) + + page_log.finish_page(page) + return num_successes, num_trials + async def run(self, settings_filepath): # Load the settings from yaml. with open(settings_filepath, "r") as file: diff --git a/python/packages/autogen-ext/samples/short.yaml b/python/packages/autogen-ext/samples/short.yaml index d9085cd69462..984513ae1ef7 100644 --- a/python/packages/autogen-ext/samples/short.yaml +++ b/python/packages/autogen-ext/samples/short.yaml @@ -26,12 +26,12 @@ apprentice: base_agent: thin_agent # thin_agent or magentic_one evaluations: - - name: test_teachability + - name: eval_teachability - - name: test_learning_from_demonstration + - name: eval_learning_from_demonstration num_trials: 1 # 1-10 - - name: test_self_teaching + - name: eval_self_teaching num_loops: 1 # 1-10 max_train_trials: 2 # 2-10 Move to AMC max_test_trials: 1 # 1-3 Move to AMC From 7aa20c15e15309954a33fb685293c059e9289209 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 8 Jan 2025 11:00:24 -0800 Subject: [PATCH 25/93] renaming --- python/packages/autogen-ext/samples/eval.py | 54 +++++++++---------- python/packages/autogen-ext/samples/long.yaml | 38 +++++++++++++ .../packages/autogen-ext/samples/short.yaml | 16 +++--- 3 files changed, 73 insertions(+), 35 deletions(-) create mode 100644 python/packages/autogen-ext/samples/long.yaml diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/samples/eval.py index 805649c83dba..fde0a7aed8d7 100644 --- a/python/packages/autogen-ext/samples/eval.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -69,7 +69,7 @@ def define_tasks_with_answers(): return tasks_with_answers -async def eval_teachability(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_teachability(fast_learner, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): """An evaluation""" page = page_log.begin_page( summary="eval_teachability", @@ -82,11 +82,11 @@ async def eval_teachability(apprentice, evaluator, task_assignment_callback, cli task = task_with_answer["task"] answer = task_with_answer["expected_answer"] grader = Grader(client, page_log) - apprentice.start(reset_memory=True) + fast_learner.start(reset_memory=True) # First test without memory. page.add_lines("\nClear memory, then ask the question.") - response = await apprentice.handle_user_message(task, task_assignment_callback) + response = await fast_learner.handle_user_message(task, task_assignment_callback) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -99,11 +99,11 @@ async def eval_teachability(apprentice, evaluator, task_assignment_callback, cli # Give the advice. page.add_lines("Give the advice.") insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" - await apprentice.handle_user_message(insight, task_assignment_callback) + await fast_learner.handle_user_message(insight, task_assignment_callback) # Now ask the question again to see if the advice is retrieved from memory. page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") - response = await apprentice.handle_user_message(task, task_assignment_callback) + response = await fast_learner.handle_user_message(task, task_assignment_callback) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -113,11 +113,11 @@ async def eval_teachability(apprentice, evaluator, task_assignment_callback, cli else: page.add_lines("Answer is INCORRECT.\n", flush=True) - apprentice.stop() + fast_learner.stop() page_log.finish_page(page) -async def eval_learning_from_demonstration(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_learning_from_demonstration(fast_learner, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): """An evaluation""" page = page_log.begin_page( summary="eval_learning_from_demonstration", @@ -154,7 +154,7 @@ async def eval_learning_from_demonstration(apprentice, evaluator, task_assignmen page_log.finish_page(page) -async def eval_self_teaching(apprentice, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_self_teaching(fast_learner, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): """An evaluation""" page = page_log.begin_page( summary="eval_self_teaching", @@ -223,8 +223,8 @@ def create_client(self, settings): assert False, "Invalid client provider" # Check if the client should be wrapped. - if "wrapper" in settings: - wrapper_settings = settings["wrapper"] + if "ClientWrapper" in settings: + wrapper_settings = settings["ClientWrapper"] if wrapper_settings["enabled"]: # Wrap the client. client = ClientWrapper( @@ -435,32 +435,32 @@ async def run(self, settings_filepath): # Load the settings from yaml. with open(settings_filepath, "r") as file: settings = yaml.load(file, Loader=yaml.FullLoader) - evaluator_settings = settings["evaluator"] + evaluator_settings = settings["Evaluator"] # Create the PageLog. - self.page_log = PageLog(evaluator_settings["pagelog"]) + self.page_log = PageLog(evaluator_settings["PageLog"]) page = self.page_log.begin_page( summary="main", details='', method_call="main") - # Create the client, which is used by both the apprentice and the evaluator. + # Create the client, which is used by both the fast_learner and the evaluator. client = self.create_client(settings["client"]) - # Create the apprentice. - apprentice_settings = settings["apprentice"] - apprentice = Apprentice(settings["apprentice"], self, client, self.page_log) + # Create the fast_learner. + fast_learner_settings = settings["FastLearner"] + fast_learner = FastLearner(fast_learner_settings, self, client, self.page_log) # Configure the agentic memory controller. - agentic_memory_controller_settings = apprentice_settings["agentic_memory_controller"] - agentic_memory_bank_settings = agentic_memory_controller_settings["agentic_memory_bank"] + agentic_memory_controller_settings = fast_learner_settings["AgenticMemoryController"] + agentic_memory_bank_settings = agentic_memory_controller_settings["AgenticMemoryBank"] # Configure the agent wrapper. - agent_wrapper_settings = apprentice_settings["agent_wrapper"] + agent_wrapper_settings = fast_learner_settings["AgentWrapper"] # Configure the base agent. base_agent = agent_wrapper_settings["base_agent"] - if base_agent == "magentic_one": + if base_agent == "MagenticOneGroupChat": task_assignment_callback = self.assign_task_to_magentic_one elif base_agent == "thin_agent": task_assignment_callback = self.assign_task_to_client @@ -471,7 +471,7 @@ async def run(self, settings_filepath): memory_path = agentic_memory_bank_settings["path"] for ev in settings["evaluations"]: eval_function = globals()[ev["name"]] - await eval_function(apprentice, self, task_assignment_callback, client, self.page_log, memory_path, ev) + await eval_function(fast_learner, self, task_assignment_callback, client, self.page_log, memory_path, ev) if hasattr(client, "finalize"): # If this is a client wrapper, it needs to be finalized. @@ -481,14 +481,14 @@ async def run(self, settings_filepath): self.page_log.finish_page(page) -class Apprentice: +class FastLearner: def __init__(self, settings, evaluator, client, page_log): self.settings = settings self.evaluator = evaluator self.client = client self.page_log = page_log - self.memory_settings = settings["agentic_memory_controller"] - self.agent_settings = settings["agent_wrapper"] + self.memory_settings = settings["AgenticMemoryController"] + self.agent_settings = settings["AgentWrapper"] self.memory = None self.agent = None @@ -497,7 +497,7 @@ def create_memory(self, reset_memory): reset=reset_memory, client=self.client, page_log=self.page_log, - memory_dir=self.memory_settings["agentic_memory_bank"]["path"] + memory_dir=self.memory_settings["AgenticMemoryBank"]["path"] ) def create_agent(self): @@ -513,9 +513,9 @@ def stop(self): async def handle_user_message(self, text, task_assignment_callback, should_await=True): page = self.page_log.begin_page( - summary="Apprentice.handle_user_message", + summary="FastLearner.handle_user_message", details="", - method_call="Apprentice.handle_user_message") + method_call="FastLearner.handle_user_message") # Pass the user message through to the memory controller. response = await self.memory.handle_user_message(text, task_assignment_callback, should_await) diff --git a/python/packages/autogen-ext/samples/long.yaml b/python/packages/autogen-ext/samples/long.yaml new file mode 100644 index 000000000000..219be2d1d78e --- /dev/null +++ b/python/packages/autogen-ext/samples/long.yaml @@ -0,0 +1,38 @@ + +Evaluator: + PageLog: + path: ~/pagelogs/long + +client: + provider: trapi # openai, azure_openai, or trapi + api_key: sk-xx # only for openai + # Add the model name here. + temperature: 0.8 + max_tokens: 4096 + presence_penalty: 0.0 + frequency_penalty: 0.0 + top_p: 1.0 + max_retries: 65535 + ClientWrapper: # Provides record & replay functionality + enabled: 0 # Only works for thin_agent currently + mode: check-replay # pass-through, record, or check-replay + session_name: long + +FastLearner: + AgenticMemoryController: + AgenticMemoryBank: + path: ~/agentic_memory_bank/long + AgentWrapper: + base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + +evaluations: + - name: eval_teachability + + - name: eval_learning_from_demonstration + num_trials: 10 # 1-10 + + - name: eval_self_teaching + num_loops: 10 # 1-10 + max_train_trials: 10 # 2-10 Move to AMC + max_test_trials: 3 # 1-3 Move to AMC + num_final_test_trials: 3 # 1-3 diff --git a/python/packages/autogen-ext/samples/short.yaml b/python/packages/autogen-ext/samples/short.yaml index 984513ae1ef7..2a55bb9a819a 100644 --- a/python/packages/autogen-ext/samples/short.yaml +++ b/python/packages/autogen-ext/samples/short.yaml @@ -1,6 +1,6 @@ -evaluator: - pagelog: +Evaluator: + PageLog: path: ~/pagelogs/temp client: @@ -13,17 +13,17 @@ client: frequency_penalty: 0.0 top_p: 1.0 max_retries: 65535 - wrapper: # Provides record & replay functionality + ClientWrapper: # Provides record & replay functionality enabled: 1 # Only works for thin_agent currently mode: check-replay # pass-through, record, or check-replay session_name: short-3 -apprentice: - agentic_memory_controller: - agentic_memory_bank: +FastLearner: + AgenticMemoryController: + AgenticMemoryBank: path: ~/agentic_memory_bank/temp - agent_wrapper: - base_agent: thin_agent # thin_agent or magentic_one + AgentWrapper: + base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. evaluations: - name: eval_teachability From 83a7ddc36a7b0ba09df2766223910fc9ebcbf99f Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 8 Jan 2025 17:00:07 -0800 Subject: [PATCH 26/93] Rerouted calls to AgenticMemoryController through FastLearner. --- python/packages/autogen-ext/samples/eval.py | 146 ++++++------------ .../autogen_ext/agentic_memory/__init__.py | 3 +- .../agentic_memory/_agentic_memory_bank.py | 4 + .../_agentic_memory_controller.py | 21 ++- .../agentic_memory/fast_learner.py | 83 ++++++++++ 5 files changed, 148 insertions(+), 109 deletions(-) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/samples/eval.py index fde0a7aed8d7..2a0028da1a9e 100644 --- a/python/packages/autogen-ext/samples/eval.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -14,9 +14,10 @@ UserMessage, ) from typing import ( + Callable, Tuple, ) -from autogen_ext.agentic_memory import AgenticMemoryController, PageLog, Grader, ClientWrapper +from autogen_ext.agentic_memory import FastLearner, PageLog, Grader, ClientWrapper def define_tasks_with_answers(): @@ -82,7 +83,7 @@ async def eval_teachability(fast_learner, evaluator, task_assignment_callback, c task = task_with_answer["task"] answer = task_with_answer["expected_answer"] grader = Grader(client, page_log) - fast_learner.start(reset_memory=True) + fast_learner.reset_memory() # First test without memory. page.add_lines("\nClear memory, then ask the question.") @@ -113,7 +114,6 @@ async def eval_teachability(fast_learner, evaluator, task_assignment_callback, c else: page.add_lines("Answer is INCORRECT.\n", flush=True) - fast_learner.stop() page_log.finish_page(page) @@ -127,12 +127,14 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, task_assignm task_index = 5 task_with_answer = define_tasks_with_answers()[task_index] num_trials = settings["num_trials"] + fast_learner.reset_memory() - # First test after clearing memory. + # Start by clearing memory then running a baseline test. page.add_lines("To get a baseline, clear memory, then assign the task.") - num_successes, num_trials = await evaluator.test(task_with_answer=task_with_answer, num_trials=num_trials, - task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=True, client=client, - page_log=page_log, memory_dir=memory_dir) + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=num_trials, + task_assignment_callback=task_assignment_callback, use_memory=True, client=client, + page_log=page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -140,14 +142,14 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, task_assignm page.add_lines("Demonstrate a solution to a similar task.") demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." - memory = AgenticMemoryController(reset=False, client=client, page_log=page_log, memory_dir=memory_dir) - await memory.learn_from_demonstration(demo_task, demonstration) + await fast_learner.learn_from_demonstration(demo_task, demonstration) # Now test again to see if the demonstration (retrieved from memory) helps. page.add_lines("Assign the task again to see if the demonstration helps.") - num_successes, num_trials = await evaluator.test(task_with_answer=task_with_answer, num_trials=num_trials, - task_assignment_callback=task_assignment_callback, use_memory=True, reset_memory=False, client=client, - page_log=page_log, memory_dir=memory_dir) + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=num_trials, + task_assignment_callback=task_assignment_callback, use_memory=True, client=client, + page_log=page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -161,6 +163,8 @@ async def eval_self_teaching(fast_learner, evaluator, task_assignment_callback, details='', method_call="eval_self_teaching") + fast_learner.reset_memory() + # Choose the tasks from those listed at the top. task_index_list = [3, 1] @@ -172,9 +176,8 @@ async def eval_self_teaching(fast_learner, evaluator, task_assignment_callback, total_num_trials = 0 for i in range(settings["num_loops"]): # Always train on the first task. - memory = AgenticMemoryController(reset=True, client=client, page_log=page_log, memory_dir=memory_dir) task_with_answer = task_with_answer_list[0] - await memory.train_on_task( + await fast_learner.train_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], task_assignment_callback=task_assignment_callback, @@ -184,15 +187,11 @@ async def eval_self_teaching(fast_learner, evaluator, task_assignment_callback, # Test on all tasks. for j, task_with_answer in enumerate(task_with_answer_list): - num_successes, num_trials = await evaluator.test( - task_with_answer=task_with_answer, - num_trials=settings["num_final_test_trials"], - task_assignment_callback=task_assignment_callback, - use_memory=True, - reset_memory=False, - client=client, - page_log=page_log, - memory_dir=memory_dir) + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=settings["num_final_test_trials"], + task_assignment_callback=task_assignment_callback, use_memory=True, client=client, + page_log=page_log) + page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) total_num_successes_list[j] += num_successes @@ -390,44 +389,34 @@ async def assign_task_to_client(self, task, client, page_log): page_log.finish_page(page) return response_str, work_history - async def test(self, task_with_answer, num_trials, task_assignment_callback, use_memory, reset_memory, - client, page_log, memory_dir) -> Tuple[str, int, int]: + async def test_fast_learner(self, fast_learner, task_with_answer, num_trials, task_assignment_callback, use_memory, + client, page_log) -> Tuple[int, int]: page = page_log.begin_page( - summary="Evaluator.test", + summary="Evaluator.test_fast_learner", details='', - method_call="Evaluator.test") + method_call="Evaluator.test_fast_learner") - grader = Grader(client, page_log) + page.add_lines("Testing the fast learner on the given task.\n", flush=True) - if use_memory: - page.add_lines("Testing with memory.\n", flush=True) - memory = AgenticMemoryController(reset=reset_memory, client=client, page_log=page_log, - memory_dir=memory_dir) - response, num_successes, num_trials = await memory.test_on_task( - task=task_with_answer["task"], - expected_answer=task_with_answer["expected_answer"], - task_assignment_callback=task_assignment_callback, - num_trials=num_trials) - else: - page.add_lines("Testing without memory.\n", flush=True) - response = None - num_successes = 0 - for trial in range(num_trials): - page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) - page.add_lines("Try to solve the task.\n", flush=True) - response, _ = await task_assignment_callback(task_with_answer["task"], client, page_log) - - response_is_correct, extracted_answer = await grader.is_response_correct( - task_with_answer["task"], response, task_with_answer["expected_answer"]) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) - if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) - num_successes += 1 - else: - page.add_lines("Answer is INCORRECT.\n", flush=True) + grader = Grader(client, page_log) + num_successes = 0 + + for trial in range(num_trials): + page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) + page.add_lines("Try to solve the task.\n", flush=True) + task = task_with_answer["task"] + response = await fast_learner.assign_task(task, task_assignment_callback, + should_await=True, use_memory=use_memory) + response_is_correct, extracted_answer = await grader.is_response_correct( + task, response, task_with_answer["expected_answer"]) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + num_successes += 1 + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) - page_log.finish_page(page) return num_successes, num_trials @@ -440,9 +429,9 @@ async def run(self, settings_filepath): # Create the PageLog. self.page_log = PageLog(evaluator_settings["PageLog"]) page = self.page_log.begin_page( - summary="main", + summary="Evaluator.main", details='', - method_call="main") + method_call="Evaluator.main") # Create the client, which is used by both the fast_learner and the evaluator. client = self.create_client(settings["client"]) @@ -481,49 +470,6 @@ async def run(self, settings_filepath): self.page_log.finish_page(page) -class FastLearner: - def __init__(self, settings, evaluator, client, page_log): - self.settings = settings - self.evaluator = evaluator - self.client = client - self.page_log = page_log - self.memory_settings = settings["AgenticMemoryController"] - self.agent_settings = settings["AgentWrapper"] - self.memory = None - self.agent = None - - def create_memory(self, reset_memory): - self.memory = AgenticMemoryController( - reset=reset_memory, - client=self.client, - page_log=self.page_log, - memory_dir=self.memory_settings["AgenticMemoryBank"]["path"] - ) - - def create_agent(self): - return None - - def start(self, reset_memory): - self.create_memory(reset_memory) - self.create_agent() - - def stop(self): - self.memory = None - self.agent = None - - async def handle_user_message(self, text, task_assignment_callback, should_await=True): - page = self.page_log.begin_page( - summary="FastLearner.handle_user_message", - details="", - method_call="FastLearner.handle_user_message") - - # Pass the user message through to the memory controller. - response = await self.memory.handle_user_message(text, task_assignment_callback, should_await) - - self.page_log.finish_page(page) - return response - - if __name__ == "__main__": args = sys.argv[1:] if len(args) != 1: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index c2445e11aa79..7b1c37f58519 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,6 +1,7 @@ +from .fast_learner import FastLearner from ._agentic_memory_controller import AgenticMemoryController from ._page_log import PageLog from ._grader import Grader from .client_wrapper import ClientWrapper -__all__ = ["AgenticMemoryController", "PageLog", "Grader", "ClientWrapper"] +__all__ = ["FastLearner", "AgenticMemoryController", "PageLog", "Grader", "ClientWrapper"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index 08cff9ef1af7..e93dc654f531 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -56,6 +56,10 @@ def __init__( if reset: self.reset_insights() + def reset(self): + self.string_map.reset_db() + self.reset_insights() + def reset_insights(self): """Forces immediate deletion of the insights, in memory and on disk.""" self.uid_insight_dict = {} diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py index ab9ece64a0ec..27c4962f3398 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py @@ -12,6 +12,9 @@ def __init__(self, reset, client, page_log, memory_dir): self.memory_bank = AgenticMemoryBank(verbosity=0, reset=reset, memory_dir=memory_dir, page_log=page_log) self.grader = Grader(client, page_log) + def reset_memory(self): + self.memory_bank.reset() + async def train_on_task(self, task: str, # The task to be completed. expected_answer: str, # The expected answer to the task. @@ -286,23 +289,25 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to self.page_log.finish_page(page) return final_response, successful_insight - async def execute_task(self, task: str, task_assignment_callback: Callable, should_await: bool, - should_retrieve_insights: bool = True): + async def assign_task(self, task: str, task_assignment_callback: Callable, use_memory: bool = True, + should_await: bool = True): """ - Assigns a task to the completion agent, along with any relevant insights/memories. + Assigns a task to the agent, along with any relevant insights/memories. """ page = self.page_log.begin_page( - summary="AgenticMemoryController.execute_task", + summary="AgenticMemoryController.assign_task", details="", - method_call="AgenticMemoryController.execute_task") + method_call="AgenticMemoryController.assign_task") - if should_retrieve_insights: + if use_memory: # Try to retrieve any relevant memories from the DB. filtered_insights = await self.retrieve_relevant_insights(task) if len(filtered_insights) > 0: page.add_lines("Relevant insights were retrieved from memory.\n", flush=True) memory_section = self.format_memory_section(filtered_insights) task = task + '\n\n' + memory_section + # if len(memory_section) > 0: # Best to include this condition, but it will require new recordings. + # task = task + '\n\n' + memory_section # Attempt to solve the task. page.add_lines("Try to solve the task.\n", flush=True) @@ -333,8 +338,8 @@ async def handle_user_message(self, text, task_assignment_callback, should_await await self.add_insight_without_task_to_memory(advice) print("Passing task to completion agent.") - response = await self.execute_task(text, task_assignment_callback, should_await, - should_retrieve_insights=(advice is None)) + response = await self.assign_task(text, task_assignment_callback, use_memory=(advice is None), + should_await=should_await) self.page_log.finish_page(page) return response diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py new file mode 100644 index 000000000000..630fe05ba000 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py @@ -0,0 +1,83 @@ +from typing import Callable +from ._agentic_memory_controller import AgenticMemoryController + + +class FastLearner: + def __init__(self, settings, evaluator, client, page_log): + self.settings = settings + self.evaluator = evaluator + self.client = client + self.page_log = page_log + + # Create the AgenticMemoryController, which creates the AgenticMemoryBank. + self.memory_controller_settings = settings["AgenticMemoryController"] + self.memory_controller = AgenticMemoryController( + reset=False, + client=self.client, + page_log=self.page_log, + memory_dir=self.memory_controller_settings["AgenticMemoryBank"]["path"] + ) + + # Create the agent wrapper, which creates the base agent. + self.agent_settings = settings["AgentWrapper"] + self.agent = None + + def reset_memory(self): + if self.memory_controller is not None: + self.memory_controller.reset_memory() + + async def handle_user_message(self, text, task_assignment_callback, should_await=True): + """A foreground operation, intended for immediate response to the user.""" + page = self.page_log.begin_page( + summary="FastLearner.handle_user_message", + details="", + method_call="FastLearner.handle_user_message") + + # Pass the user message through to the memory controller. + response = await self.memory_controller.handle_user_message(text, task_assignment_callback, should_await) + + self.page_log.finish_page(page) + return response + + async def learn_from_demonstration(self, task, demonstration): + """A foreground operation, assuming that the task and demonstration are already known.""" + page = self.page_log.begin_page( + summary="FastLearner.learn_from_demonstration", + details="", + method_call="FastLearner.learn_from_demonstration") + + # Pass the task and demonstration through to the memory controller. + await self.memory_controller.learn_from_demonstration(task, demonstration) + + self.page_log.finish_page(page) + + async def assign_task(self, task: str, task_assignment_callback: Callable, use_memory: bool = True, + should_await: bool = True): + """ + Assigns a task to the agent, along with any relevant insights/memories. + """ + page = self.page_log.begin_page( + summary="FastLearner.assign_task", + details="", + method_call="FastLearner.assign_task") + + # Pass the task through to the memory controller. + response = await self.memory_controller.assign_task(task, task_assignment_callback, use_memory, should_await) + + self.page_log.finish_page(page) + return response + + async def train_on_task(self, task, expected_answer, task_assignment_callback, final_format_instructions, + max_train_trials, max_test_trials): + """A background operation, not intended for immediate response.""" + page = self.page_log.begin_page( + summary="FastLearner.train_on_task", + details="", + method_call="FastLearner.train_on_task") + + # Pass the task through to the memory controller. + await self.memory_controller.train_on_task( + task, expected_answer, task_assignment_callback, final_format_instructions, + max_train_trials, max_test_trials) + + self.page_log.finish_page(page) From 3047c1c4915b96e1308fd3a2c47a07e937d33b05 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 9 Jan 2025 14:52:07 -0800 Subject: [PATCH 27/93] Replace task_assignment_callback with AgentWrapper. --- python/packages/autogen-ext/samples/eval.py | 145 ++---------------- python/packages/autogen-ext/samples/m1.yaml | 34 ++++ .../agentic_memory/_agent_wrapper.py | 114 ++++++++++++++ .../agentic_memory/_agentic_memory_bank.py | 13 +- .../_agentic_memory_controller.py | 42 ++--- .../agentic_memory/_string_similarity_map.py | 17 +- .../agentic_memory/fast_learner.py | 31 ++-- 7 files changed, 213 insertions(+), 183 deletions(-) create mode 100644 python/packages/autogen-ext/samples/m1.yaml create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agent_wrapper.py diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/samples/eval.py index 2a0028da1a9e..572279526e09 100644 --- a/python/packages/autogen-ext/samples/eval.py +++ b/python/packages/autogen-ext/samples/eval.py @@ -4,19 +4,7 @@ from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.models.openai import AzureOpenAIChatCompletionClient from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider -from autogen_agentchat.agents import AssistantAgent -from autogen_agentchat.teams import MagenticOneGroupChat -from autogen_ext.agents.web_surfer import MultimodalWebSurfer -from autogen_ext.agents.web_surfer._utils import message_content_to_str -from autogen_agentchat.ui._console import Console -from autogen_core.models import ( - SystemMessage, - UserMessage, -) -from typing import ( - Callable, - Tuple, -) +from typing import Tuple from autogen_ext.agentic_memory import FastLearner, PageLog, Grader, ClientWrapper @@ -70,7 +58,7 @@ def define_tasks_with_answers(): return tasks_with_answers -async def eval_teachability(fast_learner, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_teachability(fast_learner, evaluator, client, page_log, settings): """An evaluation""" page = page_log.begin_page( summary="eval_teachability", @@ -87,7 +75,7 @@ async def eval_teachability(fast_learner, evaluator, task_assignment_callback, c # First test without memory. page.add_lines("\nClear memory, then ask the question.") - response = await fast_learner.handle_user_message(task, task_assignment_callback) + response = await fast_learner.handle_user_message(task) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -100,11 +88,11 @@ async def eval_teachability(fast_learner, evaluator, task_assignment_callback, c # Give the advice. page.add_lines("Give the advice.") insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" - await fast_learner.handle_user_message(insight, task_assignment_callback) + await fast_learner.handle_user_message(insight) # Now ask the question again to see if the advice is retrieved from memory. page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") - response = await fast_learner.handle_user_message(task, task_assignment_callback) + response = await fast_learner.handle_user_message(task) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) @@ -117,7 +105,7 @@ async def eval_teachability(fast_learner, evaluator, task_assignment_callback, c page_log.finish_page(page) -async def eval_learning_from_demonstration(fast_learner, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_learning_from_demonstration(fast_learner, evaluator, client, page_log, settings): """An evaluation""" page = page_log.begin_page( summary="eval_learning_from_demonstration", @@ -133,8 +121,7 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, task_assignm page.add_lines("To get a baseline, clear memory, then assign the task.") num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=num_trials, - task_assignment_callback=task_assignment_callback, use_memory=True, client=client, - page_log=page_log) + use_memory=True, client=client, page_log=page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -148,15 +135,14 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, task_assignm page.add_lines("Assign the task again to see if the demonstration helps.") num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=num_trials, - task_assignment_callback=task_assignment_callback, use_memory=True, client=client, - page_log=page_log) + use_memory=True, client=client, page_log=page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) page_log.finish_page(page) -async def eval_self_teaching(fast_learner, evaluator, task_assignment_callback, client, page_log, memory_dir, settings): +async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings): """An evaluation""" page = page_log.begin_page( summary="eval_self_teaching", @@ -180,7 +166,6 @@ async def eval_self_teaching(fast_learner, evaluator, task_assignment_callback, await fast_learner.train_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], - task_assignment_callback=task_assignment_callback, final_format_instructions="", max_train_trials=settings["max_train_trials"], max_test_trials=settings["max_test_trials"]) @@ -189,8 +174,7 @@ async def eval_self_teaching(fast_learner, evaluator, task_assignment_callback, for j, task_with_answer in enumerate(task_with_answer_list): num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=settings["num_final_test_trials"], - task_assignment_callback=task_assignment_callback, use_memory=True, client=client, - page_log=page_log) + use_memory=True, client=client, page_log=page_log) page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) @@ -313,83 +297,7 @@ def create_trapi_client(self, settings): self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) return client - async def assign_task_to_magentic_one(self, task, model_client, page_log) -> Tuple[str, str]: - page = page_log.begin_page( - summary="assign_task_to_magentic_one", - details='', - method_call="assign_task_to_magentic_one") - - page.add_lines(task) - - general_agent = AssistantAgent( - "general_agent", - model_client, - description="A general GPT-4o AI assistant capable of performing a variety of tasks.", ) - - web_surfer = MultimodalWebSurfer( - name="web_surfer", - model_client=model_client, - downloads_folder="logs", - debug_dir="logs", - to_save_screenshots=True, - ) - - team = MagenticOneGroupChat( - [general_agent, web_surfer], - model_client=model_client, - max_turns=20, - ) - - # Get the team's text response to the task. - stream = team.run_stream(task=task) - task_result = await Console(stream) - response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) - page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - - # MagenticOne's response is the chat history, which we use here as the work history. - work_history = response_str - - page_log.finish_page(page) - return response_str, work_history - - async def assign_task_to_client(self, task, client, page_log): - page = page_log.begin_page( - summary="assign_task_to_client", - details='', - method_call="assign_task_to_client") - - page.add_lines(task) - - system_message_content = """You are a helpful and thoughtful assistant. -In responding to every user message, you follow the same multi-step process given here: -1. Explain your understanding of the user message in detail, covering all the important points. -2. List as many possible responses as you can think of. -3. Carefully list and weigh the pros and cons (if any) of each possible response. -4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. -5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. -6. Finish by providing your final response in the particular format requested by the user.""" - - system_message = SystemMessage(content=system_message_content) - user_message = UserMessage(content=task, source="User") - - input_messages = [system_message] + [user_message] - response = await client.create(input_messages) - response_str = response.content - - # Log the model call - page_log.add_model_call(description="Ask the model", - details="to complete the task", input_messages=input_messages, - response=response, - num_input_tokens=0, caller='assign_task_to_client') - page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) - - # Use the response as the work history as well. - work_history = response_str - - page_log.finish_page(page) - return response_str, work_history - - async def test_fast_learner(self, fast_learner, task_with_answer, num_trials, task_assignment_callback, use_memory, + async def test_fast_learner(self, fast_learner, task_with_answer, num_trials, use_memory, client, page_log) -> Tuple[int, int]: page = page_log.begin_page( summary="Evaluator.test_fast_learner", @@ -405,8 +313,7 @@ async def test_fast_learner(self, fast_learner, task_with_answer, num_trials, ta page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) page.add_lines("Try to solve the task.\n", flush=True) task = task_with_answer["task"] - response = await fast_learner.assign_task(task, task_assignment_callback, - should_await=True, use_memory=use_memory) + response = await fast_learner.assign_task(task, use_memory=use_memory) response_is_correct, extracted_answer = await grader.is_response_correct( task, response, task_with_answer["expected_answer"]) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) @@ -437,30 +344,12 @@ async def run(self, settings_filepath): client = self.create_client(settings["client"]) # Create the fast_learner. - fast_learner_settings = settings["FastLearner"] - fast_learner = FastLearner(fast_learner_settings, self, client, self.page_log) - - # Configure the agentic memory controller. - agentic_memory_controller_settings = fast_learner_settings["AgenticMemoryController"] - agentic_memory_bank_settings = agentic_memory_controller_settings["AgenticMemoryBank"] - - # Configure the agent wrapper. - agent_wrapper_settings = fast_learner_settings["AgentWrapper"] - - # Configure the base agent. - base_agent = agent_wrapper_settings["base_agent"] - if base_agent == "MagenticOneGroupChat": - task_assignment_callback = self.assign_task_to_magentic_one - elif base_agent == "thin_agent": - task_assignment_callback = self.assign_task_to_client - else: - assert False, "Invalid base agent" + fast_learner = FastLearner(settings["FastLearner"], self, client, self.page_log) - # Execute each evaluations. - memory_path = agentic_memory_bank_settings["path"] - for ev in settings["evaluations"]: - eval_function = globals()[ev["name"]] - await eval_function(fast_learner, self, task_assignment_callback, client, self.page_log, memory_path, ev) + # Execute each evaluation. + for evaluation in settings["evaluations"]: + eval_function = globals()[evaluation["name"]] + await eval_function(fast_learner, self, client, self.page_log, evaluation) if hasattr(client, "finalize"): # If this is a client wrapper, it needs to be finalized. diff --git a/python/packages/autogen-ext/samples/m1.yaml b/python/packages/autogen-ext/samples/m1.yaml new file mode 100644 index 000000000000..11f32da605bb --- /dev/null +++ b/python/packages/autogen-ext/samples/m1.yaml @@ -0,0 +1,34 @@ + +Evaluator: + PageLog: + path: ~/pagelogs/m1 + +client: + provider: trapi # openai, azure_openai, or trapi + api_key: sk-xx # only for openai + # Add the model name here. + temperature: 0.8 + max_tokens: 4096 + presence_penalty: 0.0 + frequency_penalty: 0.0 + top_p: 1.0 + max_retries: 65535 + +FastLearner: + AgenticMemoryController: + AgenticMemoryBank: + path: ~/agentic_memory_bank/m1 + AgentWrapper: + base_agent: MagenticOneGroupChat # MagenticOneGroupChat, thin_agent, etc. + +evaluations: +# - name: eval_teachability +# +# - name: eval_learning_from_demonstration +# num_trials: 1 # 1-10 + + - name: eval_self_teaching + num_loops: 1 # 1-10 + max_train_trials: 2 # 2-10 Move to AMC + max_test_trials: 1 # 1-3 Move to AMC + num_final_test_trials: 1 # 1-3 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agent_wrapper.py new file mode 100644 index 000000000000..3ab458441e76 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agent_wrapper.py @@ -0,0 +1,114 @@ +from autogen_agentchat.agents import AssistantAgent +from autogen_agentchat.teams import MagenticOneGroupChat +from autogen_ext.agents.web_surfer import MultimodalWebSurfer +from autogen_ext.agents.web_surfer._utils import message_content_to_str +from autogen_agentchat.ui._console import Console +from autogen_core.models import ( + SystemMessage, + UserMessage, +) +from typing import Tuple + + +class AgentWrapper: + def __init__(self, settings, client, page_log): + self.settings = settings + self.client = client + self.page_log = page_log + self.base_agent_name = self.settings["base_agent"] + + async def assign_task(self, task): + """ + Assigns a task to the base agent. + """ + page = self.page_log.begin_page( + summary="AgentWrapper.assign_task", + details="", + method_call="AgentWrapper.assign_task") + + # Pass the task through to the base agent. + if self.base_agent_name == "MagenticOneGroupChat": + response, work_history = await self.assign_task_to_magentic_one(task) + elif self.base_agent_name == "thin_agent": + response, work_history = await self.assign_task_to_thin_agent(task) + else: + assert False, "Invalid base agent" + + self.page_log.finish_page(page) + return response, work_history + + async def assign_task_to_thin_agent(self, task): + page = self.page_log.begin_page( + summary="AgentWrapper.assign_task_to_thin_agent", + details='', + method_call="AgentWrapper.assign_task_to_thin_agent") + + page.add_lines(task) + + system_message_content = """You are a helpful and thoughtful assistant. +In responding to every user message, you follow the same multi-step process given here: +1. Explain your understanding of the user message in detail, covering all the important points. +2. List as many possible responses as you can think of. +3. Carefully list and weigh the pros and cons (if any) of each possible response. +4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. +5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. +6. Finish by providing your final response in the particular format requested by the user.""" + + system_message = SystemMessage(content=system_message_content) + user_message = UserMessage(content=task, source="User") + + input_messages = [system_message] + [user_message] + response = await self.client.create(input_messages) + response_str = response.content + + # Log the model call + self.page_log.add_model_call(description="Ask the model", + details="to complete the task", input_messages=input_messages, + response=response, + num_input_tokens=0, caller='assign_task_to_client') + page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + + # Use the response as the work history as well. + work_history = response_str + + self.page_log.finish_page(page) + return response_str, work_history + + async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: + page = self.page_log.begin_page( + summary="AgentWrapper.assign_task_to_magentic_one", + details='', + method_call="AgentWrapper.assign_task_to_magentic_one") + + page.add_lines(task) + + general_agent = AssistantAgent( + "general_agent", + self.client, + description="A general GPT-4o AI assistant capable of performing a variety of tasks.", ) + + web_surfer = MultimodalWebSurfer( + name="web_surfer", + model_client=self.client, + downloads_folder="logs", + debug_dir="logs", + to_save_screenshots=True, + ) + + team = MagenticOneGroupChat( + [general_agent, web_surfer], + model_client=self.client, + max_turns=20, + ) + + # Get the team's text response to the task. + stream = team.run_stream(task=task) + task_result = await Console(stream) + response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) + page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + + # MagenticOne's response is the chat history, which we use here as the work history. + work_history = response_str + + self.page_log.finish_page(page) + return response_str, work_history diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index e93dc654f531..8e781e828c6b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -17,23 +17,22 @@ class AgenticMemoryBank: """ Stores task-completion insights in a vector DB for later retrieval. """ - def __init__( - self, + def __init__(self, + settings: Dict, verbosity: Optional[int] = 0, reset: Optional[bool] = False, - memory_dir: str = "tmp/memory", page_log=None, ): """ Args: - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print string-pair lists. - reset (Optional, bool): True to clear the DB before starting. Default False - - memory_dir (Optional, str): path to the directory where this run's memory data is stored. - page_log (Optional, PageLog): the PageLog object to use for logging. """ - memory_dir = os.path.expanduser(memory_dir) - path_to_db_dir = os.path.join(memory_dir, "string_map") - self.path_to_dict = os.path.join(memory_dir, "uid_insight_dict.pkl") + self.settings = settings + memory_dir_path = os.path.expanduser(self.settings["path"]) + path_to_db_dir = os.path.join(memory_dir_path, "string_map") + self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") self.page_log = page_log parent_page = self.page_log.last_page() diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py index 27c4962f3398..8f9b79050c3d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py @@ -5,11 +5,14 @@ class AgenticMemoryController: - def __init__(self, reset, client, page_log, memory_dir): + def __init__(self, settings, agent, reset, client, page_log): + self.settings = settings + self.agent = agent self.client = client self.page_log = page_log self.prompter = Prompter(client, page_log) - self.memory_bank = AgenticMemoryBank(verbosity=0, reset=reset, memory_dir=memory_dir, page_log=page_log) + self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], + verbosity=0, reset=reset, page_log=page_log) self.grader = Grader(client, page_log) def reset_memory(self): @@ -18,7 +21,6 @@ def reset_memory(self): async def train_on_task(self, task: str, # The task to be completed. expected_answer: str, # The expected answer to the task. - task_assignment_callback: Callable, # The function through which to assign the task. final_format_instructions: str, # Instructions for formatting the final response, if any. max_train_trials: int, # The maximum number of training trials to attempt. max_test_trials: int, # The number of successful test trials to qualify as success. @@ -33,8 +35,7 @@ async def train_on_task(self, # Attempt to create useful new memories. page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) - _, insight = await self._iterate_on_task(task, expected_answer, task_assignment_callback, - final_format_instructions, max_train_trials, max_test_trials) + _, insight = await self._iterate_on_task(task, expected_answer, final_format_instructions, max_train_trials, max_test_trials) if insight is None: page.add_lines("No useful insight was discovered.\n", flush=True) else: @@ -44,7 +45,7 @@ async def train_on_task(self, self.page_log.finish_page(page) - async def test_on_task(self, task: str, expected_answer: str, task_assignment_callback: Callable, num_trials=1): + async def test_on_task(self, task: str, expected_answer: str, num_trials=1): """ Assigns a task to the completion agent, along with any relevant insights/memories. """ @@ -70,7 +71,7 @@ async def test_on_task(self, task: str, expected_answer: str, task_assignment_ca # Attempt to solve the task. page.add_lines("Try to solve the task.\n", flush=True) - response, _ = await task_assignment_callback(task_plus_insights, self.client, self.page_log) + response, _ = await self.agent.assign_task(task_plus_insights) response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer) @@ -188,8 +189,7 @@ def format_memory_section(self, memories): memory_section += ('- ' + mem + '\n') return memory_section - async def _test_for_failure(self, task: str, task_plus_insights: str, expected_answer: str, - assign_task_to_completer: Callable, num_trials: int): + async def _test_for_failure(self, task: str, task_plus_insights: str, expected_answer: str, num_trials: int): """ Attempts to solve the given task multiple times to find a failure case to learn from. """ @@ -209,7 +209,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a # Attempt to solve the task. page.add_lines("Try to solve the task.", flush=True) - response, work_history = await assign_task_to_completer(task_plus_insights, self.client, self.page_log) + response, work_history = await self.agent.assign_task(task_plus_insights) response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer) @@ -224,7 +224,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a self.page_log.finish_page(page) return failure_found, response, work_history - async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to_completer: Callable, + async def _iterate_on_task(self, task: str, expected_answer: str, final_format_instructions: str, max_train_trials: int, max_test_trials: int): page = self.page_log.begin_page( summary="AgenticMemoryController._iterate_on_task", @@ -257,7 +257,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to # Can we find a failure case to learn from? failure_found, response, work_history = await self._test_for_failure( - task, task_plus_insights, expected_answer, assign_task_to_completer, max_test_trials) + task, task_plus_insights, expected_answer, max_test_trials) if not failure_found: # No. Time to exit the loop. page.add_lines("\nResponse is CORRECT.\n Stop looking for insights.\n", flush=True) @@ -289,7 +289,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, assign_task_to self.page_log.finish_page(page) return final_response, successful_insight - async def assign_task(self, task: str, task_assignment_callback: Callable, use_memory: bool = True, + async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): """ Assigns a task to the agent, along with any relevant insights/memories. @@ -312,34 +312,26 @@ async def assign_task(self, task: str, task_assignment_callback: Callable, use_m # Attempt to solve the task. page.add_lines("Try to solve the task.\n", flush=True) if should_await: - response, _ = await task_assignment_callback(task, self.client, self.page_log) + response, _ = await self.agent.assign_task(task) else: - response, _ = task_assignment_callback(task, self.client, self.page_log) - - # page.add_lines("Response: {}\n".format(response), flush=True) + response, _ = self.agent.assign_task(task) self.page_log.finish_page(page) return response - async def handle_user_message(self, text, task_assignment_callback, should_await=True): + async def handle_user_message(self, text, should_await=True): page = self.page_log.begin_page( summary="AgenticMemoryController.handle_user_message", details="", method_call="AgenticMemoryController.handle_user_message") - # task = await self.prompter.extract_task(text) - # page.add_lines("Task: {}".format(task), flush=True) - advice = await self.prompter.extract_advice(text) page.add_lines("Advice: {}".format(advice), flush=True) if advice is not None: - print("Adding advice to memory.") await self.add_insight_without_task_to_memory(advice) - print("Passing task to completion agent.") - response = await self.assign_task(text, task_assignment_callback, use_memory=(advice is None), - should_await=should_await) + response = await self.assign_task(text, use_memory=(advice is None), should_await=should_await) self.page_log.finish_page(page) return response diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py index 25853decc120..9b54d161f772 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py @@ -41,12 +41,14 @@ def __init__( self.uid_text_dict = {} self.last_string_pair_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - print("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) - print(" Location = {}".format(self.path_to_dict)) + if self.verbosity >= 1: + print("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) + print(" Location = {}".format(self.path_to_dict)) with open(self.path_to_dict, "rb") as f: self.uid_text_dict = pickle.load(f) self.last_string_pair_id = len(self.uid_text_dict) - print("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) + if self.verbosity >= 1: + print("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) if self.verbosity >= 3: self.list_string_pairs() @@ -67,10 +69,12 @@ def save_string_pairs_to_text_files(self): for file in os.listdir("mem_text"): os.remove(os.path.join("mem_text", file)) - print("LIST OF STRING PAIRS") + if self.verbosity >= 1: + print("LIST OF STRING PAIRS") for uid, text in self.uid_text_dict.items(): input_text, output_text = text - print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + if self.verbosity >= 1: + print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) # Save the input string to a file with the same name as the string-pair ID in the mem_text dir, which is a subdir of the dir containing this file. with open("mem_text/{}.txt".format(uid), "w") as file: file.write(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) @@ -82,7 +86,8 @@ def save_string_pairs(self): def reset_db(self): """Forces immediate deletion of the DB's contents, in memory and on disk.""" - print("\nCLEARING STRING-PAIR MAP") + if self.verbosity >= 1: + print("\nCLEARING STRING-PAIR MAP") self.db_client.delete_collection("string-pairs") self.vec_db = self.db_client.create_collection("string-pairs") self.uid_text_dict = {} diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py index 630fe05ba000..eab64b71abb7 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py @@ -1,5 +1,5 @@ -from typing import Callable from ._agentic_memory_controller import AgenticMemoryController +from ._agent_wrapper import AgentWrapper class FastLearner: @@ -9,24 +9,24 @@ def __init__(self, settings, evaluator, client, page_log): self.client = client self.page_log = page_log + # Create the agent wrapper, which creates the base agent. + self.agent_settings = settings["AgentWrapper"] + self.agent = AgentWrapper(settings=self.agent_settings, client=self.client, page_log=self.page_log) + # Create the AgenticMemoryController, which creates the AgenticMemoryBank. - self.memory_controller_settings = settings["AgenticMemoryController"] self.memory_controller = AgenticMemoryController( + settings=self.settings["AgenticMemoryController"], + agent=self.agent, reset=False, client=self.client, - page_log=self.page_log, - memory_dir=self.memory_controller_settings["AgenticMemoryBank"]["path"] + page_log=self.page_log ) - # Create the agent wrapper, which creates the base agent. - self.agent_settings = settings["AgentWrapper"] - self.agent = None - def reset_memory(self): if self.memory_controller is not None: self.memory_controller.reset_memory() - async def handle_user_message(self, text, task_assignment_callback, should_await=True): + async def handle_user_message(self, text, should_await=True): """A foreground operation, intended for immediate response to the user.""" page = self.page_log.begin_page( summary="FastLearner.handle_user_message", @@ -34,7 +34,7 @@ async def handle_user_message(self, text, task_assignment_callback, should_await method_call="FastLearner.handle_user_message") # Pass the user message through to the memory controller. - response = await self.memory_controller.handle_user_message(text, task_assignment_callback, should_await) + response = await self.memory_controller.handle_user_message(text, should_await) self.page_log.finish_page(page) return response @@ -51,8 +51,7 @@ async def learn_from_demonstration(self, task, demonstration): self.page_log.finish_page(page) - async def assign_task(self, task: str, task_assignment_callback: Callable, use_memory: bool = True, - should_await: bool = True): + async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): """ Assigns a task to the agent, along with any relevant insights/memories. """ @@ -62,13 +61,12 @@ async def assign_task(self, task: str, task_assignment_callback: Callable, use_m method_call="FastLearner.assign_task") # Pass the task through to the memory controller. - response = await self.memory_controller.assign_task(task, task_assignment_callback, use_memory, should_await) + response = await self.memory_controller.assign_task(task, use_memory, should_await) self.page_log.finish_page(page) return response - async def train_on_task(self, task, expected_answer, task_assignment_callback, final_format_instructions, - max_train_trials, max_test_trials): + async def train_on_task(self, task, expected_answer, final_format_instructions, max_train_trials, max_test_trials): """A background operation, not intended for immediate response.""" page = self.page_log.begin_page( summary="FastLearner.train_on_task", @@ -76,8 +74,7 @@ async def train_on_task(self, task, expected_answer, task_assignment_callback, f method_call="FastLearner.train_on_task") # Pass the task through to the memory controller. - await self.memory_controller.train_on_task( - task, expected_answer, task_assignment_callback, final_format_instructions, + await self.memory_controller.train_on_task(task, expected_answer, final_format_instructions, max_train_trials, max_test_trials) self.page_log.finish_page(page) From 1f20b79e09c8e5545e3258c67f29b8b86e8e0e63 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 9 Jan 2025 16:35:40 -0800 Subject: [PATCH 28/93] Segregate files into subfolders, eval framework vs. implementation, etc. --- .../src/autogen_ext/agentic_memory/__init__.py | 11 +++++------ .../agentic_memory/{ => eval_framework}/_grader.py | 2 +- .../agentic_memory/{ => eval_framework}/_page_log.py | 0 .../{ => eval_framework}/client_wrapper.py | 0 .../agentic_memory/eval_framework}/eval.py | 0 .../apprentice_v1}/_agent_wrapper.py | 0 .../apprentice_v1}/_agentic_memory_bank.py | 0 .../apprentice_v1}/_agentic_memory_controller.py | 2 +- .../{ => fast_learners/apprentice_v1}/_prompter.py | 2 +- .../apprentice_v1}/_string_similarity_map.py | 0 .../{ => fast_learners/apprentice_v1}/fast_learner.py | 0 .../agentic_memory/settings_files}/long.yaml | 0 .../agentic_memory/settings_files}/m1.yaml | 0 .../agentic_memory/settings_files}/short.yaml | 2 +- 14 files changed, 9 insertions(+), 10 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => eval_framework}/_grader.py (99%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => eval_framework}/_page_log.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => eval_framework}/client_wrapper.py (100%) rename python/packages/autogen-ext/{samples => src/autogen_ext/agentic_memory/eval_framework}/eval.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => fast_learners/apprentice_v1}/_agent_wrapper.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => fast_learners/apprentice_v1}/_agentic_memory_bank.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => fast_learners/apprentice_v1}/_agentic_memory_controller.py (99%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => fast_learners/apprentice_v1}/_prompter.py (99%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => fast_learners/apprentice_v1}/_string_similarity_map.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{ => fast_learners/apprentice_v1}/fast_learner.py (100%) rename python/packages/autogen-ext/{samples => src/autogen_ext/agentic_memory/settings_files}/long.yaml (100%) rename python/packages/autogen-ext/{samples => src/autogen_ext/agentic_memory/settings_files}/m1.yaml (100%) rename python/packages/autogen-ext/{samples => src/autogen_ext/agentic_memory/settings_files}/short.yaml (97%) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index 7b1c37f58519..3d57901089b6 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,7 +1,6 @@ -from .fast_learner import FastLearner -from ._agentic_memory_controller import AgenticMemoryController -from ._page_log import PageLog -from ._grader import Grader -from .client_wrapper import ClientWrapper +from .fast_learners.apprentice_v1.fast_learner import FastLearner +from .eval_framework._page_log import PageLog +from .eval_framework._grader import Grader +from .eval_framework.client_wrapper import ClientWrapper -__all__ = ["FastLearner", "AgenticMemoryController", "PageLog", "Grader", "ClientWrapper"] +__all__ = ["FastLearner", "PageLog", "Grader", "ClientWrapper"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_grader.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_grader.py index 55f70c651ee9..6b4601ce91d6 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_grader.py @@ -8,7 +8,7 @@ CreateResult, ) -from ._utils import UserContent +from .._utils import UserContent class Grader: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_page_log.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_page_log.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_page_log.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/client_wrapper.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/client_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/client_wrapper.py diff --git a/python/packages/autogen-ext/samples/eval.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py similarity index 100% rename from python/packages/autogen-ext/samples/eval.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agent_wrapper.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agent_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agent_wrapper.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_bank.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_bank.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_controller.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_controller.py index 8f9b79050c3d..8135547f0f0b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_controller.py @@ -1,7 +1,7 @@ from typing import Callable, List from ._prompter import Prompter from ._agentic_memory_bank import AgenticMemoryBank -from ._grader import Grader +from ...eval_framework._grader import Grader class AgenticMemoryController: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_prompter.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_prompter.py index 6e90998f59bc..4c39e36cf2ae 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_prompter.py @@ -11,7 +11,7 @@ from autogen_core import FunctionCall, Image -from ._utils import message_content_to_str, UserContent, text_from_user_content, single_image_from_user_content +from ..._utils import message_content_to_str, UserContent, text_from_user_content, single_image_from_user_content class Prompter: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_string_similarity_map.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_string_similarity_map.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/fast_learner.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learner.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/fast_learner.py diff --git a/python/packages/autogen-ext/samples/long.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/long.yaml similarity index 100% rename from python/packages/autogen-ext/samples/long.yaml rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/long.yaml diff --git a/python/packages/autogen-ext/samples/m1.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/m1.yaml similarity index 100% rename from python/packages/autogen-ext/samples/m1.yaml rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/m1.yaml diff --git a/python/packages/autogen-ext/samples/short.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml similarity index 97% rename from python/packages/autogen-ext/samples/short.yaml rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml index 2a55bb9a819a..c3072183ce37 100644 --- a/python/packages/autogen-ext/samples/short.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml @@ -1,7 +1,7 @@ Evaluator: PageLog: - path: ~/pagelogs/temp + path: ~/pagelogs/temp2 client: provider: trapi # openai, azure_openai, or trapi From de4c12b80ad4e7ab7536ab5731dc726b1c03f962 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 9 Jan 2025 18:15:39 -0800 Subject: [PATCH 29/93] Rename FastLearner subclass to Apprentice, and import it only as specified in settings. --- .../autogen_ext/agentic_memory/__init__.py | 3 +- .../agentic_memory/eval_framework/eval.py | 28 +++++++++++++++---- .../fast_learners/apprentice/__init__.py | 3 ++ .../_agent_wrapper.py | 0 .../_agentic_memory_bank.py | 0 .../_agentic_memory_controller.py | 0 .../_prompter.py | 0 .../_string_similarity_map.py | 0 .../apprentice.py} | 18 ++++++------ .../agentic_memory/settings_files/short.yaml | 2 ++ 10 files changed, 38 insertions(+), 16 deletions(-) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/{apprentice_v1 => apprentice}/_agent_wrapper.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/{apprentice_v1 => apprentice}/_agentic_memory_bank.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/{apprentice_v1 => apprentice}/_agentic_memory_controller.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/{apprentice_v1 => apprentice}/_prompter.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/{apprentice_v1 => apprentice}/_string_similarity_map.py (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/{apprentice_v1/fast_learner.py => apprentice/apprentice.py} (86%) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index 3d57901089b6..f4c6fbe7a158 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,6 +1,5 @@ -from .fast_learners.apprentice_v1.fast_learner import FastLearner from .eval_framework._page_log import PageLog from .eval_framework._grader import Grader from .eval_framework.client_wrapper import ClientWrapper -__all__ = ["FastLearner", "PageLog", "Grader", "ClientWrapper"] +__all__ = ["PageLog", "Grader", "ClientWrapper"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py index 572279526e09..5afef35f588f 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py @@ -1,11 +1,12 @@ -import sys +import sys, os import yaml import asyncio +import importlib from autogen_ext.models.openai import OpenAIChatCompletionClient from autogen_ext.models.openai import AzureOpenAIChatCompletionClient from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider from typing import Tuple -from autogen_ext.agentic_memory import FastLearner, PageLog, Grader, ClientWrapper +from autogen_ext.agentic_memory import PageLog, Grader, ClientWrapper def define_tasks_with_answers(): @@ -340,11 +341,28 @@ async def run(self, settings_filepath): details='', method_call="Evaluator.main") - # Create the client, which is used by both the fast_learner and the evaluator. + # Create the client, passed to both the fast_learner and the evaluator. client = self.create_client(settings["client"]) - # Create the fast_learner. - fast_learner = FastLearner(settings["FastLearner"], self, client, self.page_log) + # Create the specified fast_learner implementation. + fast_learner_settings = settings["FastLearner"] + module_path = fast_learner_settings["module_path"] + try: + module = importlib.import_module(module_path) + except ModuleNotFoundError: + print('Failed to import {}'.format(module_path)) + raise + class_name = fast_learner_settings["class_name"] + try: + fast_learner_class = getattr(module, class_name) + except AttributeError: + print('Failed to import {}.{}'.format(module_path, class_name)) + raise + try: + fast_learner = fast_learner_class(fast_learner_settings, self, client, self.page_log) + except Exception as err: + print("Error creating \"{}\": {}".format(fast_learner_class, err)) + raise # Execute each evaluation. for evaluation in settings["evaluations"]: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py new file mode 100644 index 000000000000..521bdfa714e7 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py @@ -0,0 +1,3 @@ +from .apprentice import Apprentice + +__all__ = ["Apprentice"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agent_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_bank.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_bank.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_bank.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_agentic_memory_controller.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_prompter.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_string_similarity_map.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/_string_similarity_map.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_string_similarity_map.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/fast_learner.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py similarity index 86% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/fast_learner.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py index eab64b71abb7..5cb4bcc43c8a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice_v1/fast_learner.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py @@ -2,7 +2,7 @@ from ._agent_wrapper import AgentWrapper -class FastLearner: +class Apprentice: def __init__(self, settings, evaluator, client, page_log): self.settings = settings self.evaluator = evaluator @@ -29,9 +29,9 @@ def reset_memory(self): async def handle_user_message(self, text, should_await=True): """A foreground operation, intended for immediate response to the user.""" page = self.page_log.begin_page( - summary="FastLearner.handle_user_message", + summary="Apprentice.handle_user_message", details="", - method_call="FastLearner.handle_user_message") + method_call="Apprentice.handle_user_message") # Pass the user message through to the memory controller. response = await self.memory_controller.handle_user_message(text, should_await) @@ -42,9 +42,9 @@ async def handle_user_message(self, text, should_await=True): async def learn_from_demonstration(self, task, demonstration): """A foreground operation, assuming that the task and demonstration are already known.""" page = self.page_log.begin_page( - summary="FastLearner.learn_from_demonstration", + summary="Apprentice.learn_from_demonstration", details="", - method_call="FastLearner.learn_from_demonstration") + method_call="Apprentice.learn_from_demonstration") # Pass the task and demonstration through to the memory controller. await self.memory_controller.learn_from_demonstration(task, demonstration) @@ -56,9 +56,9 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo Assigns a task to the agent, along with any relevant insights/memories. """ page = self.page_log.begin_page( - summary="FastLearner.assign_task", + summary="Apprentice.assign_task", details="", - method_call="FastLearner.assign_task") + method_call="Apprentice.assign_task") # Pass the task through to the memory controller. response = await self.memory_controller.assign_task(task, use_memory, should_await) @@ -69,9 +69,9 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo async def train_on_task(self, task, expected_answer, final_format_instructions, max_train_trials, max_test_trials): """A background operation, not intended for immediate response.""" page = self.page_log.begin_page( - summary="FastLearner.train_on_task", + summary="Apprentice.train_on_task", details="", - method_call="FastLearner.train_on_task") + method_call="Apprentice.train_on_task") # Pass the task through to the memory controller. await self.memory_controller.train_on_task(task, expected_answer, final_format_instructions, diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml index c3072183ce37..fb2475edcc57 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml @@ -19,6 +19,8 @@ client: session_name: short-3 FastLearner: + class_name: Apprentice + module_path: autogen_ext.agentic_memory.fast_learners.apprentice AgenticMemoryController: AgenticMemoryBank: path: ~/agentic_memory_bank/temp From a9d610842bc6aad6891f76f9edc4e9eaf4438522 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 10 Jan 2025 17:32:23 -0800 Subject: [PATCH 30/93] Refactoring, preparatory to removing eval_framework from the branch and the eventual PR. --- .../autogen_ext/agentic_memory/__init__.py | 6 ++--- .../{client_wrapper.py => _client_wrapper.py} | 0 .../agentic_memory/eval_framework/eval.py | 27 +++++++++++-------- .../apprentice/_agentic_memory_controller.py | 7 +++-- .../apprentice}/_grader.py | 2 +- .../apprentice}/_page_log.py | 0 .../fast_learners/apprentice/apprentice.py | 5 ++-- .../agentic_memory/settings_files/short.yaml | 10 +++---- 8 files changed, 30 insertions(+), 27 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/{client_wrapper.py => _client_wrapper.py} (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{eval_framework => fast_learners/apprentice}/_grader.py (99%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{eval_framework => fast_learners/apprentice}/_page_log.py (100%) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index f4c6fbe7a158..1b78d5073025 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,5 +1,5 @@ -from .eval_framework._page_log import PageLog -from .eval_framework._grader import Grader -from .eval_framework.client_wrapper import ClientWrapper +from .fast_learners.apprentice._page_log import PageLog +from .fast_learners.apprentice._grader import Grader +from .eval_framework._client_wrapper import ClientWrapper __all__ = ["PageLog", "Grader", "ClientWrapper"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/client_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_client_wrapper.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/client_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_client_wrapper.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py index 5afef35f588f..8d59b8992b27 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py @@ -167,9 +167,7 @@ async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings await fast_learner.train_on_task( task=task_with_answer["task"], expected_answer=task_with_answer["expected_answer"], - final_format_instructions="", - max_train_trials=settings["max_train_trials"], - max_test_trials=settings["max_test_trials"]) + final_format_instructions="") # Test on all tasks. for j, task_with_answer in enumerate(task_with_answer_list): @@ -218,9 +216,8 @@ def create_client(self, settings): def create_oai_client(self, settings): # Create an OpenAI client - model_name = "gpt-4o-2024-08-06" client = OpenAIChatCompletionClient( - model=model_name, + model=settings["model"], api_key=settings["api_key"], temperature=settings["temperature"], max_tokens=settings["max_tokens"], @@ -230,17 +227,20 @@ def create_oai_client(self, settings): max_retries=settings["max_retries"], ) self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(" created through OpenAI directly") + self.page_log.append_entry_line(" created through OpenAI") self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) return client def create_aoai_client(self, settings): # Create an Azure OpenAI client + model = settings["model"] + azure_deployment = model + "-eval" + if model == "gpt-4o-2024-08-06": + azure_endpoint = "https://agentic2.openai.azure.com/" + else: + azure_endpoint = "https://agentic1.openai.azure.com/" token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") - azure_deployment = "gpt-4o-2024-08-06-eval" - model = "gpt-4o-2024-08-06" - azure_endpoint = "https://agentic2.openai.azure.com/" client = AzureOpenAIChatCompletionClient( azure_endpoint=azure_endpoint, azure_ad_token_provider=token_provider, @@ -275,8 +275,13 @@ def create_trapi_client(self, settings): # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs ) ), "api://trapi/.default") - model = "gpt-4o-2024-08-06" # This is (for instance) the OpenAI model name, which is used to look up capabilities. - azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models + model = settings["model"] + if model == "gpt-4o-2024-08-06": + azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models + elif model == "gpt-4o-2024-05-13": + azure_deployment = 'gpt-4o_2024-05-13' + elif model == "o1-preview": + azure_deployment = 'o1-preview_2024-09-12' trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py index 8135547f0f0b..3bb3c36d9dd2 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py @@ -1,7 +1,7 @@ from typing import Callable, List from ._prompter import Prompter from ._agentic_memory_bank import AgenticMemoryBank -from ...eval_framework._grader import Grader +from ._grader import Grader class AgenticMemoryController: @@ -22,8 +22,6 @@ async def train_on_task(self, task: str, # The task to be completed. expected_answer: str, # The expected answer to the task. final_format_instructions: str, # Instructions for formatting the final response, if any. - max_train_trials: int, # The maximum number of training trials to attempt. - max_test_trials: int, # The number of successful test trials to qualify as success. ): """ Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. @@ -35,7 +33,8 @@ async def train_on_task(self, # Attempt to create useful new memories. page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) - _, insight = await self._iterate_on_task(task, expected_answer, final_format_instructions, max_train_trials, max_test_trials) + _, insight = await self._iterate_on_task(task, expected_answer, final_format_instructions, + self.settings["max_train_trials"], self.settings["max_test_trials"]) if insight is None: page.add_lines("No useful insight was discovered.\n", flush=True) else: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_grader.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py index 6b4601ce91d6..c41c7cf48c75 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py @@ -8,7 +8,7 @@ CreateResult, ) -from .._utils import UserContent +from ..._utils import UserContent class Grader: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_page_log.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_page_log.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_page_log.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py index 5cb4bcc43c8a..78fe1bcbdba9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py @@ -66,7 +66,7 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo self.page_log.finish_page(page) return response - async def train_on_task(self, task, expected_answer, final_format_instructions, max_train_trials, max_test_trials): + async def train_on_task(self, task, expected_answer, final_format_instructions): """A background operation, not intended for immediate response.""" page = self.page_log.begin_page( summary="Apprentice.train_on_task", @@ -74,7 +74,6 @@ async def train_on_task(self, task, expected_answer, final_format_instructions, method_call="Apprentice.train_on_task") # Pass the task through to the memory controller. - await self.memory_controller.train_on_task(task, expected_answer, final_format_instructions, - max_train_trials, max_test_trials) + await self.memory_controller.train_on_task(task, expected_answer, final_format_instructions) self.page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml index fb2475edcc57..57d2ef66f939 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml @@ -1,12 +1,12 @@ Evaluator: PageLog: - path: ~/pagelogs/temp2 + path: ~/pagelogs/temp3 client: + model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06 etc. provider: trapi # openai, azure_openai, or trapi - api_key: sk-xx # only for openai - # Add the model name here. + api_key: sk- # only for openai temperature: 0.8 max_tokens: 4096 presence_penalty: 0.0 @@ -22,6 +22,8 @@ FastLearner: class_name: Apprentice module_path: autogen_ext.agentic_memory.fast_learners.apprentice AgenticMemoryController: + max_train_trials: 2 # 2-10 + max_test_trials: 1 # 1-3 AgenticMemoryBank: path: ~/agentic_memory_bank/temp AgentWrapper: @@ -35,6 +37,4 @@ evaluations: - name: eval_self_teaching num_loops: 1 # 1-10 - max_train_trials: 2 # 2-10 Move to AMC - max_test_trials: 1 # 1-3 Move to AMC num_final_test_trials: 1 # 1-3 From d67e2cca760984cc01b7a14886c4b81487c3e16f Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 10 Jan 2025 17:54:51 -0800 Subject: [PATCH 31/93] Remove the outdated final_format_instructions parameter. --- .../src/autogen_ext/agentic_memory/eval_framework/eval.py | 3 +-- .../apprentice/_agentic_memory_controller.py | 8 +++----- .../agentic_memory/fast_learners/apprentice/_prompter.py | 6 +----- .../agentic_memory/fast_learners/apprentice/apprentice.py | 4 ++-- 4 files changed, 7 insertions(+), 14 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py index 8d59b8992b27..72c369fd8fea 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py @@ -166,8 +166,7 @@ async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings task_with_answer = task_with_answer_list[0] await fast_learner.train_on_task( task=task_with_answer["task"], - expected_answer=task_with_answer["expected_answer"], - final_format_instructions="") + expected_answer=task_with_answer["expected_answer"]) # Test on all tasks. for j, task_with_answer in enumerate(task_with_answer_list): diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py index 3bb3c36d9dd2..20f7f5aa4e87 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py @@ -21,7 +21,6 @@ def reset_memory(self): async def train_on_task(self, task: str, # The task to be completed. expected_answer: str, # The expected answer to the task. - final_format_instructions: str, # Instructions for formatting the final response, if any. ): """ Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. @@ -33,7 +32,7 @@ async def train_on_task(self, # Attempt to create useful new memories. page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) - _, insight = await self._iterate_on_task(task, expected_answer, final_format_instructions, + _, insight = await self._iterate_on_task(task, expected_answer, self.settings["max_train_trials"], self.settings["max_test_trials"]) if insight is None: page.add_lines("No useful insight was discovered.\n", flush=True) @@ -223,8 +222,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a self.page_log.finish_page(page) return failure_found, response, work_history - async def _iterate_on_task(self, task: str, expected_answer: str, - final_format_instructions: str, max_train_trials: int, max_test_trials: int): + async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int): page = self.page_log.begin_page( summary="AgenticMemoryController._iterate_on_task", details="", @@ -278,7 +276,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, # Try to learn from this failure. page.add_lines("\nResponse is INCORRECT. Try to learn from this failure.\n", flush=True) insight = await self.prompter.learn_from_failure( - task, memory_section, response, expected_answer, work_history, final_format_instructions, new_insights) + task, memory_section, response, expected_answer, work_history, new_insights) page.add_lines("\nInsight: {}\n".format(insight), flush=True) new_insights.append(insight) last_insight = insight diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py index 4c39e36cf2ae..72f71f8361e3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py @@ -95,7 +95,7 @@ def clear_history(self): self._chat_history = [] async def learn_from_failure(self, task_description, memory_section, final_response, expected_answer, - work_history, final_format_instructions, insights): + work_history, insights): # Try to create an insight to help avoid this failure in the future. sys_message = """- You are a patient and thorough teacher. @@ -108,10 +108,6 @@ async def learn_from_failure(self, task_description, memory_section, final_respo if len(memory_section) > 0: user_message.append(memory_section) - if len(final_format_instructions) > 0: - user_message.append("# The following answer-formatting instructions were given to the students:\n") - user_message.append(final_format_instructions) - user_message.append("# Here's the expected answer, which would have been correct:\n") user_message.append(expected_answer) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py index 78fe1bcbdba9..da79f2731cdc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py @@ -66,7 +66,7 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo self.page_log.finish_page(page) return response - async def train_on_task(self, task, expected_answer, final_format_instructions): + async def train_on_task(self, task, expected_answer): """A background operation, not intended for immediate response.""" page = self.page_log.begin_page( summary="Apprentice.train_on_task", @@ -74,6 +74,6 @@ async def train_on_task(self, task, expected_answer, final_format_instructions): method_call="Apprentice.train_on_task") # Pass the task through to the memory controller. - await self.memory_controller.train_on_task(task, expected_answer, final_format_instructions) + await self.memory_controller.train_on_task(task, expected_answer) self.page_log.finish_page(page) From 6470fd8ec7c247f13a35784a6ea00d1c9fd0ddd8 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sat, 11 Jan 2025 21:16:26 -0800 Subject: [PATCH 32/93] Move tasks into yaml files. --- .../agentic_memory/eval_framework/eval.py | 111 +++++------------- .../settings}/long.yaml | 0 .../settings}/m1.yaml | 0 .../settings}/short.yaml | 0 .../eval_framework/tasks/100_vampires.yaml | 22 ++++ .../eval_framework/tasks/10_liars.yaml | 8 ++ .../eval_framework/tasks/3_to_third.yaml | 5 + .../eval_framework/tasks/autogen_package.yaml | 5 + .../eval_framework/tasks/cell_towers.yaml | 9 ++ 9 files changed, 80 insertions(+), 80 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{settings_files => eval_framework/settings}/long.yaml (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{settings_files => eval_framework/settings}/m1.yaml (100%) rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/{settings_files => eval_framework/settings}/short.yaml (100%) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py index 72c369fd8fea..2c6013fa8931 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py @@ -9,54 +9,14 @@ from autogen_ext.agentic_memory import PageLog, Grader, ClientWrapper -def define_tasks_with_answers(): - tasks_with_answers = [] - - # Task index 0 - tasks_with_answers.append({ - "task": """You ask 100 people: 'How many of you are liars?' They all answer: 'At least one of us is not a liar.' But you know that at least one of the 100 is a liar. How many of them are liars?""", - "expected_answer": "100"}) - - # Task index 1 - tasks_with_answers.append({ - "task": """You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021bcu IV, son of Costea, has tasked you with investigating the village of \u0218irnea in neighboring Wallachia. The Count's advisors have reported that a vampire was spotted crossing the border near the village, and would like you to investigate it. - -You travel to the village of \u0218irnea, and you begin your investigation. One night, just before dawn, you catch a glimpse of a man in a long black cape with red lining leaping from roof-top to roof-top with superhuman agility. It's a vampire! You try to chase the creature back to its home, but the creature is too fast. However, because of the remoteness of the village, you know with absolute certainty that the vampire must be a resident of the village. You decide that your best course of action will be to visit all 100 residents of the town during the day. You know something about vampires and humans that will make your investigation possible; humans always tell the truth, but vampires always lie. - -In the afternoon, you go from house to house, speaking with all 100 residents of \u0218irnea. You ask everyone the same question: \"How many vampires are living in \u0218irnea\". Everyone in the village gives the same response, \"At least one of us is a human.\"\n\nHow many residents of \u0218irnea have been turned into vampires?""", - "expected_answer": "100"}) - - # Task index 2 - tasks_with_answers.append({ - "task": """Three guards stand at a door. You need to determine how many of them are truthful, and you already know for a fact that at least one of them never tells the truth. You ask each one 'How many guards here always tell the truth?' Each one says 'One or more of us always tells the truth'. How many of the guards always tell the truth?""", - "expected_answer": "None of them do"}) - - # Task index 3 - tasks_with_answers.append({ - "task": """You ask ten people 'How many of you are liars?' They all answer 'At least one of us is not a liar.' You happen to know that at least one of them IS a liar. How many of them are liars in total?""", - "expected_answer": "All of them are liars."}) - - # Task index 4 - tasks_with_answers.append({ - "task": "As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen?", - "expected_answer": "It's best to have your agent in its own repo, then add the autogen-extension topic to that repo."}) - - # Task index 5 - tasks_with_answers.append({ - "task": "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 17, 19, 11, 9, 10, 2, 5, 4. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value.", - "expected_answer": "2"}) - - # Task index 6 - tasks_with_answers.append({ - "task": "What is 4^4?", - "expected_answer": "256"}) - - # Task index 7 - tasks_with_answers.append({ - "task": "What is 3^3?", - "expected_answer": "27"}) - - return tasks_with_answers +def get_task_by_name(task_name): + path_to_this_file = os.path.abspath(__file__) + dir_of_this_file = os.path.dirname(path_to_this_file) + task_filepath = os.path.join(dir_of_this_file, 'tasks', task_name + '.yaml') + with open(task_filepath, "r") as file: + task = yaml.load(file, Loader=yaml.FullLoader) + assert task["name"] == task_name + return task async def eval_teachability(fast_learner, evaluator, client, page_log, settings): @@ -66,20 +26,18 @@ async def eval_teachability(fast_learner, evaluator, client, page_log, settings) details='', method_call="eval_teachability") - tasklist = define_tasks_with_answers() - task_index = 4 - task_with_answer = tasklist[task_index] - task = task_with_answer["task"] - answer = task_with_answer["expected_answer"] + task_details = get_task_by_name("autogen_package") + task_description = task_details["task_description"] + answer = task_details["expected_answer"] grader = Grader(client, page_log) fast_learner.reset_memory() # First test without memory. page.add_lines("\nClear memory, then ask the question.") - response = await fast_learner.handle_user_message(task) + response = await fast_learner.handle_user_message(task_description) # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) + response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) @@ -93,10 +51,10 @@ async def eval_teachability(fast_learner, evaluator, client, page_log, settings) # Now ask the question again to see if the advice is retrieved from memory. page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") - response = await fast_learner.handle_user_message(task) + response = await fast_learner.handle_user_message(task_description) # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct(task, response, answer) + response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) @@ -113,15 +71,14 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, page details='', method_call="eval_learning_from_demonstration") - task_index = 5 - task_with_answer = define_tasks_with_answers()[task_index] + task_details = get_task_by_name("cell_towers") num_trials = settings["num_trials"] fast_learner.reset_memory() # Start by clearing memory then running a baseline test. page.add_lines("To get a baseline, clear memory, then assign the task.") num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=num_trials, + fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, use_memory=True, client=client, page_log=page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -135,7 +92,7 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, page # Now test again to see if the demonstration (retrieved from memory) helps. page.add_lines("Assign the task again to see if the demonstration helps.") num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=num_trials, + fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, use_memory=True, client=client, page_log=page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) @@ -152,26 +109,20 @@ async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings fast_learner.reset_memory() - # Choose the tasks from those listed at the top. - task_index_list = [3, 1] - - # Train and test on any number of tasks using memory. - tasklist = define_tasks_with_answers() - task_with_answer_list = [tasklist[task_index] for task_index in task_index_list] - - total_num_successes_list = [0 for _ in task_index_list] + task_details_list = [get_task_by_name("10_liars"), get_task_by_name("100_vampires")] + total_num_successes_list = [0 for _ in range(len(task_details_list))] total_num_trials = 0 for i in range(settings["num_loops"]): - # Always train on the first task. - task_with_answer = task_with_answer_list[0] + # Always train on the first task in the list. + task_details = task_details_list[0] await fast_learner.train_on_task( - task=task_with_answer["task"], - expected_answer=task_with_answer["expected_answer"]) + task=task_details["task_description"], + expected_answer=task_details["expected_answer"]) - # Test on all tasks. - for j, task_with_answer in enumerate(task_with_answer_list): + # Test on all tasks in the list. + for j, task_details in enumerate(task_details_list): num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_with_answer=task_with_answer, num_trials=settings["num_final_test_trials"], + fast_learner=fast_learner, task_details=task_details, num_trials=settings["num_final_test_trials"], use_memory=True, client=client, page_log=page_log) page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) @@ -302,7 +253,7 @@ def create_trapi_client(self, settings): self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) return client - async def test_fast_learner(self, fast_learner, task_with_answer, num_trials, use_memory, + async def test_fast_learner(self, fast_learner, task_details, num_trials, use_memory, client, page_log) -> Tuple[int, int]: page = page_log.begin_page( summary="Evaluator.test_fast_learner", @@ -317,10 +268,10 @@ async def test_fast_learner(self, fast_learner, task_with_answer, num_trials, us for trial in range(num_trials): page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) page.add_lines("Try to solve the task.\n", flush=True) - task = task_with_answer["task"] - response = await fast_learner.assign_task(task, use_memory=use_memory) + task_description = task_details["task_description"] + response = await fast_learner.assign_task(task_description, use_memory=use_memory) response_is_correct, extracted_answer = await grader.is_response_correct( - task, response, task_with_answer["expected_answer"]) + task_description, response, task_details["expected_answer"]) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/long.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/long.yaml rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/m1.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/m1.yaml rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/settings_files/short.yaml rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml new file mode 100644 index 000000000000..a4fffbcd8cb4 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml @@ -0,0 +1,22 @@ +name: 100_vampires + +task_description: "You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021B\ + cu IV, son of Costea, has tasked you with investigating the village of \u0218\ + irnea in neighboring Wallachia. The Count's advisors have reported that a vampire\ + \ was spotted crossing the border near the village, and would like you to investigate\ + \ it.\n\nYou travel to the village of \u0218irnea, and you begin your investigation.\ + \ One night, just before dawn, you catch a glimpse of a man in a long black\ + \ cape with red lining leaping from roof-top to roof-top with superhuman agility.\ + \ It's a vampire! You try to chase the creature back to its home, but the creature\ + \ is too fast. However, because of the remoteness of the village, you know with\ + \ absolute certainty that the vampire must be a resident of the village. You\ + \ decide that your best course of action will be to visit all 100 residents\ + \ of the town during the day. You know something about vampires and humans that\ + \ will make your investigation possible; humans always tell the truth, but vampires\ + \ always lie.\n\nIn the afternoon, you go from house to house, speaking with\ + \ all 100 residents of \u0218irnea. You ask everyone the same question: \"How\ + \ many vampires are living in \u0218irnea\". Everyone in the village gives the\ + \ same response, \"At least one of us is a human.\"\n\nHow many residents of\ + \ \u0218irnea have been turned into vampires?" + +expected_answer: '100' diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml new file mode 100644 index 000000000000..ecc32a88ed46 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml @@ -0,0 +1,8 @@ +name: 10_liars + +task_description: 'You ask ten people ''How many of you are liars?'' + They all answer ''At least one of us is not a liar.'' + You happen to know that at least one of them IS a liar. + How many of them are liars in total?' + +expected_answer: All of them are liars. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml new file mode 100644 index 000000000000..0b29503e5925 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml @@ -0,0 +1,5 @@ +name: 3_to_third + +task_description: What is 3^3? + +expected_answer: '27' diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml new file mode 100644 index 000000000000..234fe66bece8 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml @@ -0,0 +1,5 @@ +name: autogen_package + +task_description: As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen? + +expected_answer: It's best to have your agent in its own repo, then add the autogen-extension topic to that repo. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml new file mode 100644 index 000000000000..4db06c4db7aa --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml @@ -0,0 +1,9 @@ +name: cell_towers + +task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. + Houses are located at mile markers 16, 17, 19, 11, 9, 10, 2, 5, 4. + Each cell phone tower can cover houses located next to the road within a 4-mile radius. + Find the minimum number of cell phone towers needed to cover all houses next to the road. + Your answer should be a positive numerical integer value. + +expected_answer: '2' From b025199dce5a2fd583781e89eafef00ea048536d Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sun, 12 Jan 2025 12:59:32 -0800 Subject: [PATCH 33/93] Move client support to a subdir. --- .../autogen_ext/agentic_memory/__init__.py | 3 +- .../eval_framework/clients/_client_creator.py | 125 ++++++++++++++++++ .../{ => clients}/_client_wrapper.py | 0 .../agentic_memory/eval_framework/eval.py | 122 +---------------- .../eval_framework/tasks/100_vampires.yaml | 2 + .../eval_framework/tasks/10_liars.yaml | 2 + .../eval_framework/tasks/3_to_third.yaml | 2 + .../eval_framework/tasks/autogen_package.yaml | 2 + .../eval_framework/tasks/cell_towers.yaml | 2 + 9 files changed, 141 insertions(+), 119 deletions(-) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py rename python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/{ => clients}/_client_wrapper.py (100%) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index 1b78d5073025..9c3b0f966c6a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,5 +1,4 @@ from .fast_learners.apprentice._page_log import PageLog from .fast_learners.apprentice._grader import Grader -from .eval_framework._client_wrapper import ClientWrapper -__all__ = ["PageLog", "Grader", "ClientWrapper"] +__all__ = ["PageLog", "Grader"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py new file mode 100644 index 000000000000..7ca92041c3e1 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py @@ -0,0 +1,125 @@ +from autogen_ext.models.openai import OpenAIChatCompletionClient +from autogen_ext.models.openai import AzureOpenAIChatCompletionClient +from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider +from ._client_wrapper import ClientWrapper + + +class ClientCreator: + def __init__(self, settings, page_log): + self.settings = settings + self.page_log = page_log + + def create_client(self): + client = None + provider = self.settings["provider"] + if provider == "openai": + client = self.create_oai_client() + elif provider == "azure_openai": + client = self.create_aoai_client() + elif provider == "trapi": + client = self.create_trapi_client() + else: + assert False, "Invalid client provider" + + # Check if the client should be wrapped. + if "ClientWrapper" in self.settings: + wrapper_settings = self.settings["ClientWrapper"] + if wrapper_settings["enabled"]: + # Wrap the client. + client = ClientWrapper( + client, wrapper_settings["mode"], wrapper_settings["session_name"], self.page_log) + + return client + + + def create_oai_client(self): + # Create an OpenAI client + client = OpenAIChatCompletionClient( + model=self.settings["model"], + api_key=self.settings["api_key"], + temperature=self.settings["temperature"], + max_tokens=self.settings["max_tokens"], + presence_penalty=self.settings["presence_penalty"], + frequency_penalty=self.settings["frequency_penalty"], + top_p=self.settings["top_p"], + max_retries=self.settings["max_retries"], + ) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) + self.page_log.append_entry_line(" created through OpenAI") + self.page_log.append_entry_line(" temperature: {}".format(self.settings["temperature"])) + return client + + + def create_aoai_client(self): + # Create an Azure OpenAI client + model = self.settings["model"] + azure_deployment = model + "-eval" + if model == "gpt-4o-2024-08-06": + azure_endpoint = "https://agentic2.openai.azure.com/" + else: + azure_endpoint = "https://agentic1.openai.azure.com/" + token_provider = get_bearer_token_provider(DefaultAzureCredential(), + "https://cognitiveservices.azure.com/.default") + client = AzureOpenAIChatCompletionClient( + azure_endpoint=azure_endpoint, + azure_ad_token_provider=token_provider, + azure_deployment=azure_deployment, + api_version="2024-06-01", + model=model, + temperature=self.settings["temperature"], + max_tokens=self.settings["max_tokens"], + presence_penalty=self.settings["presence_penalty"], + frequency_penalty=self.settings["frequency_penalty"], + top_p=self.settings["top_p"], + max_retries=self.settings["max_retries"], + ) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) + self.page_log.append_entry_line(" created through Azure OpenAI") + self.page_log.append_entry_line(" temperature: {}".format(self.settings["temperature"])) + return client + + + def create_trapi_client(self): + # Create an Azure OpenAI client through TRAPI + token_provider = get_bearer_token_provider(ChainedTokenCredential( + AzureCliCredential(), + DefaultAzureCredential( + exclude_cli_credential=True, + # Exclude other credentials we are not interested in. + exclude_environment_credential=True, + exclude_shared_token_cache_credential=True, + exclude_developer_cli_credential=True, + exclude_powershell_credential=True, + exclude_interactive_browser_credential=True, + exclude_visual_studio_code_credentials=True, + # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs + ) + ), "api://trapi/.default") + model = self.settings["model"] + if model == "gpt-4o-2024-08-06": + azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models + elif model == "gpt-4o-2024-05-13": + azure_deployment = 'gpt-4o_2024-05-13' + elif model == "o1-preview": + azure_deployment = 'o1-preview_2024-09-12' + trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models + endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' + api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release + client = AzureOpenAIChatCompletionClient( + azure_ad_token_provider=token_provider, + model=model, + azure_deployment=azure_deployment, + azure_endpoint=endpoint, + api_version=api_version, + temperature=self.settings["temperature"], + max_tokens=self.settings["max_tokens"], + presence_penalty=self.settings["presence_penalty"], + frequency_penalty=self.settings["frequency_penalty"], + top_p=self.settings["top_p"], + max_retries=self.settings["max_retries"], + ) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) + self.page_log.append_entry_line(" created through TRAPI") + self.page_log.append_entry_line(" temperature: {}".format(self.settings["temperature"])) + return client + diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_client_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/_client_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py index 2c6013fa8931..695063151e3f 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py @@ -2,11 +2,9 @@ import yaml import asyncio import importlib -from autogen_ext.models.openai import OpenAIChatCompletionClient -from autogen_ext.models.openai import AzureOpenAIChatCompletionClient -from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider from typing import Tuple -from autogen_ext.agentic_memory import PageLog, Grader, ClientWrapper +from autogen_ext.agentic_memory import PageLog, Grader +from autogen_ext.agentic_memory.eval_framework.clients._client_creator import ClientCreator def get_task_by_name(task_name): @@ -126,7 +124,6 @@ async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings use_memory=True, client=client, page_log=page_log) page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) - print("SUCCESS RATE ({}): {}%\n".format(j, round((num_successes / num_trials) * 100))) total_num_successes_list[j] += num_successes total_num_trials += settings["num_final_test_trials"] page.add_lines("") @@ -141,117 +138,7 @@ async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings class Evaluator: def __init__(self): self.page_log = None - - def create_client(self, settings): - client = None - provider = settings["provider"] - if provider == "openai": - client = self.create_oai_client(settings) - elif provider == "azure_openai": - client = self.create_aoai_client(settings) - elif provider == "trapi": - client = self.create_trapi_client(settings) - else: - assert False, "Invalid client provider" - - # Check if the client should be wrapped. - if "ClientWrapper" in settings: - wrapper_settings = settings["ClientWrapper"] - if wrapper_settings["enabled"]: - # Wrap the client. - client = ClientWrapper( - client, wrapper_settings["mode"], wrapper_settings["session_name"], self.page_log) - - return client - - def create_oai_client(self, settings): - # Create an OpenAI client - client = OpenAIChatCompletionClient( - model=settings["model"], - api_key=settings["api_key"], - temperature=settings["temperature"], - max_tokens=settings["max_tokens"], - presence_penalty=settings["presence_penalty"], - frequency_penalty=settings["frequency_penalty"], - top_p=settings["top_p"], - max_retries=settings["max_retries"], - ) - self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(" created through OpenAI") - self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) - return client - - def create_aoai_client(self, settings): - # Create an Azure OpenAI client - model = settings["model"] - azure_deployment = model + "-eval" - if model == "gpt-4o-2024-08-06": - azure_endpoint = "https://agentic2.openai.azure.com/" - else: - azure_endpoint = "https://agentic1.openai.azure.com/" - token_provider = get_bearer_token_provider(DefaultAzureCredential(), - "https://cognitiveservices.azure.com/.default") - client = AzureOpenAIChatCompletionClient( - azure_endpoint=azure_endpoint, - azure_ad_token_provider=token_provider, - azure_deployment=azure_deployment, - api_version="2024-06-01", - model=model, - temperature=settings["temperature"], - max_tokens=settings["max_tokens"], - presence_penalty=settings["presence_penalty"], - frequency_penalty=settings["frequency_penalty"], - top_p=settings["top_p"], - max_retries=settings["max_retries"], - ) - self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(" created through Azure OpenAI") - self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) - return client - - def create_trapi_client(self, settings): - # Create an Azure OpenAI client through TRAPI - token_provider = get_bearer_token_provider(ChainedTokenCredential( - AzureCliCredential(), - DefaultAzureCredential( - exclude_cli_credential=True, - # Exclude other credentials we are not interested in. - exclude_environment_credential=True, - exclude_shared_token_cache_credential=True, - exclude_developer_cli_credential=True, - exclude_powershell_credential=True, - exclude_interactive_browser_credential=True, - exclude_visual_studio_code_credentials=True, - # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs - ) - ), "api://trapi/.default") - model = settings["model"] - if model == "gpt-4o-2024-08-06": - azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models - elif model == "gpt-4o-2024-05-13": - azure_deployment = 'gpt-4o_2024-05-13' - elif model == "o1-preview": - azure_deployment = 'o1-preview_2024-09-12' - trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models - endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' - api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - client = AzureOpenAIChatCompletionClient( - azure_ad_token_provider=token_provider, - model=model, - azure_deployment=azure_deployment, - azure_endpoint=endpoint, - api_version=api_version, - temperature=settings["temperature"], - max_tokens=settings["max_tokens"], - presence_penalty=settings["presence_penalty"], - frequency_penalty=settings["frequency_penalty"], - top_p=settings["top_p"], - max_retries=settings["max_retries"], - ) - self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(" created through TRAPI") - self.page_log.append_entry_line(" temperature: {}".format(settings["temperature"])) - return client + self.client_creator = None async def test_fast_learner(self, fast_learner, task_details, num_trials, use_memory, client, page_log) -> Tuple[int, int]: @@ -297,7 +184,8 @@ async def run(self, settings_filepath): method_call="Evaluator.main") # Create the client, passed to both the fast_learner and the evaluator. - client = self.create_client(settings["client"]) + client_creator = ClientCreator(settings=settings["client"], page_log=self.page_log) + client = client_creator.create_client() # Create the specified fast_learner implementation. fast_learner_settings = settings["FastLearner"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml index a4fffbcd8cb4..7a18b728981f 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml @@ -1,3 +1,5 @@ +# From GAIA L1 + name: 100_vampires task_description: "You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021B\ diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml index ecc32a88ed46..053cecd15ba4 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml @@ -1,3 +1,5 @@ +# Similar to the 100 vampires task, for testing generalization from one to the other. + name: 10_liars task_description: 'You ask ten people ''How many of you are liars?'' diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml index 0b29503e5925..d5b4dfd172be 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml @@ -1,3 +1,5 @@ +# A simple test that doesn't require memory. + name: 3_to_third task_description: What is 3^3? diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml index 234fe66bece8..091090982f75 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml @@ -1,3 +1,5 @@ +# Test where human advice is needed. + name: autogen_package task_description: As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen? diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml index 4db06c4db7aa..c89e2635e21a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml @@ -1,3 +1,5 @@ +# File-free version of a GAIA L1 task. + name: cell_towers task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. From 4f9267c7cb57c5b93e78f391b81cfda93dfe8edf Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sun, 12 Jan 2025 15:38:40 -0800 Subject: [PATCH 34/93] Move evaluations to a separate dir. --- .../agentic_memory/eval_framework/eval.py | 159 +++--------------- .../eval_learning_from_demonstration.py | 35 ++++ .../evaluations/eval_self_teaching.py | 38 +++++ .../evaluations/eval_teachability.py | 46 +++++ .../eval_framework/settings/short.yaml | 9 +- 5 files changed, 150 insertions(+), 137 deletions(-) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_learning_from_demonstration.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_self_teaching.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_teachability.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py index 695063151e3f..091919afe2d1 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py @@ -7,141 +7,21 @@ from autogen_ext.agentic_memory.eval_framework.clients._client_creator import ClientCreator -def get_task_by_name(task_name): - path_to_this_file = os.path.abspath(__file__) - dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, 'tasks', task_name + '.yaml') - with open(task_filepath, "r") as file: - task = yaml.load(file, Loader=yaml.FullLoader) - assert task["name"] == task_name - return task - - -async def eval_teachability(fast_learner, evaluator, client, page_log, settings): - """An evaluation""" - page = page_log.begin_page( - summary="eval_teachability", - details='', - method_call="eval_teachability") - - task_details = get_task_by_name("autogen_package") - task_description = task_details["task_description"] - answer = task_details["expected_answer"] - grader = Grader(client, page_log) - fast_learner.reset_memory() - - # First test without memory. - page.add_lines("\nClear memory, then ask the question.") - response = await fast_learner.handle_user_message(task_description) - - # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) - if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) - else: - page.add_lines("Answer is INCORRECT.\n", flush=True) - - # Give the advice. - page.add_lines("Give the advice.") - insight = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" - await fast_learner.handle_user_message(insight) - - # Now ask the question again to see if the advice is retrieved from memory. - page.add_lines("\nAsk the question again to see if the advice is retrieved from memory.") - response = await fast_learner.handle_user_message(task_description) - - # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) - if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) - else: - page.add_lines("Answer is INCORRECT.\n", flush=True) - - page_log.finish_page(page) - - -async def eval_learning_from_demonstration(fast_learner, evaluator, client, page_log, settings): - """An evaluation""" - page = page_log.begin_page( - summary="eval_learning_from_demonstration", - details='', - method_call="eval_learning_from_demonstration") - - task_details = get_task_by_name("cell_towers") - num_trials = settings["num_trials"] - fast_learner.reset_memory() - - # Start by clearing memory then running a baseline test. - page.add_lines("To get a baseline, clear memory, then assign the task.") - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, - use_memory=True, client=client, page_log=page_log) - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) - - # Provide the demonstration. - page.add_lines("Demonstrate a solution to a similar task.") - demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." - demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." - await fast_learner.learn_from_demonstration(demo_task, demonstration) - - # Now test again to see if the demonstration (retrieved from memory) helps. - page.add_lines("Assign the task again to see if the demonstration helps.") - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, - use_memory=True, client=client, page_log=page_log) - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) - - page_log.finish_page(page) - - -async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings): - """An evaluation""" - page = page_log.begin_page( - summary="eval_self_teaching", - details='', - method_call="eval_self_teaching") - - fast_learner.reset_memory() - - task_details_list = [get_task_by_name("10_liars"), get_task_by_name("100_vampires")] - total_num_successes_list = [0 for _ in range(len(task_details_list))] - total_num_trials = 0 - for i in range(settings["num_loops"]): - # Always train on the first task in the list. - task_details = task_details_list[0] - await fast_learner.train_on_task( - task=task_details["task_description"], - expected_answer=task_details["expected_answer"]) - - # Test on all tasks in the list. - for j, task_details in enumerate(task_details_list): - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_details=task_details, num_trials=settings["num_final_test_trials"], - use_memory=True, client=client, page_log=page_log) - - page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) - total_num_successes_list[j] += num_successes - total_num_trials += settings["num_final_test_trials"] - page.add_lines("") - - for i, total_num_successes in enumerate(total_num_successes_list): - success_rate = round((total_num_successes / total_num_trials) * 100) - page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) - - page_log.finish_page(page) - - class Evaluator: def __init__(self): self.page_log = None - self.client_creator = None - async def test_fast_learner(self, fast_learner, task_details, num_trials, use_memory, - client, page_log) -> Tuple[int, int]: + def get_task_details_by_name(self, task_name): + path_to_this_file = os.path.abspath(__file__) + dir_of_this_file = os.path.dirname(path_to_this_file) + task_filepath = os.path.join(dir_of_this_file, 'tasks', task_name + '.yaml') + with open(task_filepath, "r") as file: + task_details = yaml.load(file, Loader=yaml.FullLoader) + assert task_details["name"] == task_name + return task_details + + async def test_fast_learner(self, fast_learner, task_details, num_trials, + use_memory, client, page_log) -> Tuple[int, int]: page = page_log.begin_page( summary="Evaluator.test_fast_learner", details='', @@ -208,9 +88,20 @@ async def run(self, settings_filepath): raise # Execute each evaluation. - for evaluation in settings["evaluations"]: - eval_function = globals()[evaluation["name"]] - await eval_function(fast_learner, self, client, self.page_log, evaluation) + for evaluation_settings in settings["evaluations"]: + module_path = evaluation_settings["module_path"] + try: + module = importlib.import_module(module_path) + except ModuleNotFoundError: + print('Failed to import {}'.format(module_path)) + raise + function_name = evaluation_settings["function_name"] + try: + eval_function = getattr(module, function_name) + except AttributeError: + print('Failed to import {}.{}'.format(module_path, function_name)) + raise + await eval_function(fast_learner, self, client, self.page_log, evaluation_settings) if hasattr(client, "finalize"): # If this is a client wrapper, it needs to be finalized. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_learning_from_demonstration.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_learning_from_demonstration.py new file mode 100644 index 000000000000..7ed4ea344823 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_learning_from_demonstration.py @@ -0,0 +1,35 @@ + +async def eval_learning_from_demonstration(fast_learner, evaluator, client, page_log, settings): + """An evaluation""" + page = page_log.begin_page( + summary="eval_learning_from_demonstration", + details='', + method_call="eval_learning_from_demonstration") + + task_details = evaluator.get_task_details_by_name("cell_towers") + num_trials = settings["num_trials"] + + # Start by clearing memory then running a baseline test. + page.add_lines("To get a baseline, clear memory, then assign the task.") + fast_learner.reset_memory() + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, + use_memory=True, client=client, page_log=page_log) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + + # Provide the demonstration. + page.add_lines("Demonstrate a solution to a similar task.") + demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." + demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." + await fast_learner.learn_from_demonstration(demo_task, demonstration) + + # Now test again to see if the demonstration (retrieved from memory) helps. + page.add_lines("Assign the task again to see if the demonstration helps.") + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, + use_memory=True, client=client, page_log=page_log) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + + page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_self_teaching.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_self_teaching.py new file mode 100644 index 000000000000..d66af47fb3e7 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_self_teaching.py @@ -0,0 +1,38 @@ + +async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings): + """An evaluation""" + page = page_log.begin_page( + summary="eval_self_teaching", + details='', + method_call="eval_self_teaching") + + # Start the test with empty memory. + fast_learner.reset_memory() + + task_details_list = [ + evaluator.get_task_details_by_name("10_liars"), + evaluator.get_task_details_by_name("100_vampires")] + total_num_successes_list = [0 for _ in range(len(task_details_list))] + total_num_trials = 0 + for i in range(settings["num_loops"]): + # Train on the first task in the list. + task_details = task_details_list[0] + await fast_learner.train_on_task(task=task_details["task_description"], + expected_answer=task_details["expected_answer"]) + + # Test on all tasks in the list. + for j, task_details in enumerate(task_details_list): + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_details=task_details, num_trials=settings["num_final_test_trials"], + use_memory=True, client=client, page_log=page_log) + + page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) + total_num_successes_list[j] += num_successes + total_num_trials += settings["num_final_test_trials"] + page.add_lines("") + + for i, total_num_successes in enumerate(total_num_successes_list): + success_rate = round((total_num_successes / total_num_trials) * 100) + page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) + + page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_teachability.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_teachability.py new file mode 100644 index 000000000000..1eacd652682d --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_teachability.py @@ -0,0 +1,46 @@ +from autogen_ext.agentic_memory import PageLog, Grader + + +async def eval_teachability(fast_learner, evaluator, client, page_log, settings): + """An evaluation""" + page = page_log.begin_page( + summary="eval_teachability", + details='', + method_call="eval_teachability") + + task_details = evaluator.get_task_details_by_name("autogen_package") + task_description = task_details["task_description"] + answer = task_details["expected_answer"] + grader = Grader(client, page_log) + + # First test without memory. + fast_learner.reset_memory() + page.add_lines("\nClear memory, then ask the question.") + response = await fast_learner.handle_user_message(task_description) + + # Check the response. + response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) + + # Give advice that should help solve this task. + page.add_lines("Give the advice.") + advice = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" + await fast_learner.handle_user_message(advice) + + # Now ask the question again to see if the advice helps. + page.add_lines("\nAsk the question again to see if the advice helps.") + response = await fast_learner.handle_user_message(task_description) + + # Check the response. + response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) + page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + if response_is_correct: + page.add_lines("Answer is CORRECT.\n", flush=True) + else: + page.add_lines("Answer is INCORRECT.\n", flush=True) + + page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml index 57d2ef66f939..48a481be02b3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml @@ -30,11 +30,14 @@ FastLearner: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. evaluations: - - name: eval_teachability + - function_name: eval_teachability + module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_teachability - - name: eval_learning_from_demonstration + - function_name: eval_learning_from_demonstration + module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_learning_from_demonstration num_trials: 1 # 1-10 - - name: eval_self_teaching + - function_name: eval_self_teaching + module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_self_teaching num_loops: 1 # 1-10 num_final_test_trials: 1 # 1-3 From db34844bab702765503ef280ab482f5345e9dcf7 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 13 Jan 2025 16:05:58 -0800 Subject: [PATCH 35/93] single line --- .../fast_learners/apprentice/_agentic_memory_controller.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py index 20f7f5aa4e87..979896687a78 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py @@ -286,8 +286,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria self.page_log.finish_page(page) return final_response, successful_insight - async def assign_task(self, task: str, use_memory: bool = True, - should_await: bool = True): + async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): """ Assigns a task to the agent, along with any relevant insights/memories. """ From c78085201a382537bc1179a86885d3446063d425 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 15 Jan 2025 16:46:47 -0800 Subject: [PATCH 36/93] Add baseline evaluation for the no-memory case. --- .../evaluations/eval_without_learning.py | 21 +++++++++++ .../eval_framework/settings/baseline.yaml | 36 +++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_without_learning.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_without_learning.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_without_learning.py new file mode 100644 index 000000000000..7d6a4186779a --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_without_learning.py @@ -0,0 +1,21 @@ + +async def eval_without_learning(fast_learner, evaluator, client, page_log, settings): + """An evaluation""" + page = page_log.begin_page( + summary="eval_without_learning", + details='', + method_call="eval_without_learning") + + task_details = evaluator.get_task_details_by_name(settings["task_name"]) + num_trials = settings["num_trials"] + + # Clear memory then run a baseline test. + page.add_lines("To get a baseline, clear memory, then assign the task.") + fast_learner.reset_memory() + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, + use_memory=True, client=client, page_log=page_log) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + + page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml new file mode 100644 index 000000000000..c7f7bf9e10e4 --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml @@ -0,0 +1,36 @@ + +Evaluator: + PageLog: + path: ~/pagelogs/base + +client: + model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06 etc. + provider: trapi # openai, azure_openai, or trapi + api_key: sk- # only for openai + temperature: 0.8 + max_tokens: 4096 + presence_penalty: 0.0 + frequency_penalty: 0.0 + top_p: 1.0 + max_retries: 65535 + ClientWrapper: # Provides record & replay functionality + enabled: 0 # Only works for thin_agent currently + mode: check-replay # pass-through, record, or check-replay + session_name: short-3 + +FastLearner: + class_name: Apprentice + module_path: autogen_ext.agentic_memory.fast_learners.apprentice + AgenticMemoryController: + max_train_trials: 2 # 2-10 + max_test_trials: 1 # 1-3 + AgenticMemoryBank: + path: ~/agentic_memory_bank/temp + AgentWrapper: + base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + +evaluations: + - function_name: eval_without_learning + module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_without_learning + task_name: 100_vampires + num_trials: 10 From 43bda2fb1b1dc732315cc1ead8e0455bc27e7629 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 17 Jan 2025 18:03:18 -0800 Subject: [PATCH 37/93] Support o1 models --- .../eval_framework/clients/_client_creator.py | 100 ++++++++++-------- .../eval_framework/clients/_client_wrapper.py | 1 + .../eval_framework/settings/baseline.yaml | 8 +- .../eval_framework/settings/long.yaml | 2 +- .../eval_framework/settings/m1.yaml | 2 +- .../eval_framework/settings/short.yaml | 6 +- .../apprentice/_agent_wrapper.py | 9 +- .../fast_learners/apprentice/_grader.py | 19 ++-- .../fast_learners/apprentice/_prompter.py | 10 +- 9 files changed, 94 insertions(+), 63 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py index 7ca92041c3e1..1792b3de7f49 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py @@ -31,51 +31,65 @@ def create_client(self): return client + def add_shared_args(self, args): + args["model"] = self.settings["model"] + args["max_completion_tokens"] = self.settings["max_completion_tokens"] + args["max_retries"] = self.settings["max_retries"] + + def add_family_args(self, args): + # Does the model name start with 'o1'? + if not args["model"].startswith("o1"): + # No. A few more things can be specified. + args["temperature"] = self.settings["temperature"] + args["presence_penalty"] = self.settings["presence_penalty"] + args["frequency_penalty"] = self.settings["frequency_penalty"] + args["top_p"] = self.settings["top_p"] def create_oai_client(self): # Create an OpenAI client - client = OpenAIChatCompletionClient( - model=self.settings["model"], - api_key=self.settings["api_key"], - temperature=self.settings["temperature"], - max_tokens=self.settings["max_tokens"], - presence_penalty=self.settings["presence_penalty"], - frequency_penalty=self.settings["frequency_penalty"], - top_p=self.settings["top_p"], - max_retries=self.settings["max_retries"], - ) + args = {"api_key": self.settings["api_key"]} + self.add_shared_args(args) + self.add_family_args(args) + + client = OpenAIChatCompletionClient(**args) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) self.page_log.append_entry_line(" created through OpenAI") - self.page_log.append_entry_line(" temperature: {}".format(self.settings["temperature"])) return client def create_aoai_client(self): # Create an Azure OpenAI client model = self.settings["model"] - azure_deployment = model + "-eval" if model == "gpt-4o-2024-08-06": + azure_deployment = 'gpt-4o-2024-08-06-eval' # This is DeploymentName in the table at https://aka.ms/trapi/models azure_endpoint = "https://agentic2.openai.azure.com/" - else: + elif model == "gpt-4o-2024-05-13": + azure_deployment = 'gpt-4o-2024-05-13-eval' + azure_endpoint = "https://agentic1.openai.azure.com/" + elif model == "o1-preview": + azure_deployment = 'o1-preview-2024-09-12-eval' azure_endpoint = "https://agentic1.openai.azure.com/" + else: + assert False, "Unsupported model" + token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") - client = AzureOpenAIChatCompletionClient( - azure_endpoint=azure_endpoint, - azure_ad_token_provider=token_provider, - azure_deployment=azure_deployment, - api_version="2024-06-01", - model=model, - temperature=self.settings["temperature"], - max_tokens=self.settings["max_tokens"], - presence_penalty=self.settings["presence_penalty"], - frequency_penalty=self.settings["frequency_penalty"], - top_p=self.settings["top_p"], - max_retries=self.settings["max_retries"], - ) + api_version = '2024-12-01-preview' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release + + args = { + "azure_ad_token_provider": token_provider, + "azure_deployment": azure_deployment, + "azure_endpoint": azure_endpoint, + "api_version": api_version, + } + self.add_shared_args(args) + self.add_family_args(args) + + client = AzureOpenAIChatCompletionClient(**args) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) self.page_log.append_entry_line(" created through Azure OpenAI") - self.page_log.append_entry_line(" temperature: {}".format(self.settings["temperature"])) return client @@ -95,6 +109,7 @@ def create_trapi_client(self): # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs ) ), "api://trapi/.default") + api_version = '2024-12-01-preview' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release model = self.settings["model"] if model == "gpt-4o-2024-08-06": azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models @@ -102,24 +117,25 @@ def create_trapi_client(self): azure_deployment = 'gpt-4o_2024-05-13' elif model == "o1-preview": azure_deployment = 'o1-preview_2024-09-12' + elif model == "o1": + azure_deployment = 'o1_2024-12-17' + else: + assert False, "Unsupported model" trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' - api_version = '2024-10-21' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - client = AzureOpenAIChatCompletionClient( - azure_ad_token_provider=token_provider, - model=model, - azure_deployment=azure_deployment, - azure_endpoint=endpoint, - api_version=api_version, - temperature=self.settings["temperature"], - max_tokens=self.settings["max_tokens"], - presence_penalty=self.settings["presence_penalty"], - frequency_penalty=self.settings["frequency_penalty"], - top_p=self.settings["top_p"], - max_retries=self.settings["max_retries"], - ) + + args = { + "azure_ad_token_provider": token_provider, + "azure_deployment": azure_deployment, + "azure_endpoint": endpoint, + "api_version": api_version, + } + self.add_shared_args(args) + self.add_family_args(args) + + client = AzureOpenAIChatCompletionClient(**args) + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) self.page_log.append_entry_line(" created through TRAPI") - self.page_log.append_entry_line(" temperature: {}".format(self.settings["temperature"])) return client diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py index a0b5e9c4bb4f..98aed82a9461 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py @@ -27,6 +27,7 @@ def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, sess self.mode = mode self.page_log = page_log self.next_item_index = 0 + self.model_info = {"family": self.base_client.model_info["family"]} self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), session_name + ".yaml") if page_log is not None: page.add_lines("Wrapping the base client in a ClientWrapper.") diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml index c7f7bf9e10e4..7201ea323e9e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml @@ -4,11 +4,11 @@ Evaluator: path: ~/pagelogs/base client: - model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06 etc. + model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. provider: trapi # openai, azure_openai, or trapi api_key: sk- # only for openai temperature: 0.8 - max_tokens: 4096 + max_completion_tokens: 4096 presence_penalty: 0.0 frequency_penalty: 0.0 top_p: 1.0 @@ -32,5 +32,5 @@ FastLearner: evaluations: - function_name: eval_without_learning module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_without_learning - task_name: 100_vampires - num_trials: 10 + task_name: 10_liars + num_trials: 1 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml index 219be2d1d78e..e81a63687165 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml @@ -8,7 +8,7 @@ client: api_key: sk-xx # only for openai # Add the model name here. temperature: 0.8 - max_tokens: 4096 + max_completion_tokens: 4096 presence_penalty: 0.0 frequency_penalty: 0.0 top_p: 1.0 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml index 11f32da605bb..a0c1df5af009 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml @@ -8,7 +8,7 @@ client: api_key: sk-xx # only for openai # Add the model name here. temperature: 0.8 - max_tokens: 4096 + max_completion_tokens: 4096 presence_penalty: 0.0 frequency_penalty: 0.0 top_p: 1.0 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml index 48a481be02b3..dc1e45f44a8b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml @@ -1,14 +1,14 @@ Evaluator: PageLog: - path: ~/pagelogs/temp3 + path: ~/pagelogs/temp4 client: - model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06 etc. + model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. provider: trapi # openai, azure_openai, or trapi api_key: sk- # only for openai temperature: 0.8 - max_tokens: 4096 + max_completion_tokens: 4096 presence_penalty: 0.0 frequency_penalty: 0.0 top_p: 1.0 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py index 3ab458441e76..542bfa19c2de 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py @@ -53,11 +53,16 @@ async def assign_task_to_thin_agent(self, task): 4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. 5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. 6. Finish by providing your final response in the particular format requested by the user.""" + if self.client.model_info["family"] == "o1": + # No system message allowed, so pass it as the first user message. + system_message = UserMessage(content=system_message_content, source="User") + else: + # System message allowed. + system_message = SystemMessage(content=system_message_content) - system_message = SystemMessage(content=system_message_content) user_message = UserMessage(content=task, source="User") - input_messages = [system_message] + [user_message] + response = await self.client.create(input_messages) response_str = response.content diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py index c41c7cf48c75..d815c0507f01 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py @@ -22,13 +22,18 @@ def __init__(self, client, page_log): # Create the chat history self._chat_history: List[LLMMessage] = [] - async def call_model(self, details, user_content: UserContent = None, system_message=None, keep_these_messages=True): + async def call_model(self, details, user_content: UserContent = None, system_message_content=None, keep_these_messages=True): # Prepare the input message list - user_message = UserMessage(content=user_content, source="User") - if system_message is None: - system_message = "You are a helpful assistant." - system_message = SystemMessage(content=system_message) + if system_message_content is None: + system_message_content = "You are a helpful assistant." + if self.client.model_info["family"] == "o1": + # No system message allowed, so pass it as the first user message. + system_message = UserMessage(content=system_message_content, source="User") + else: + # System message allowed. + system_message = SystemMessage(content=system_message_content) + user_message = UserMessage(content=user_content, source="User") input_messages = [system_message] + self._chat_history + [user_message] # Call the model. @@ -80,7 +85,7 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor user_message.append(response_to_be_graded) self.clear_history() extracted_answer, _ = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to extract the answer") page.add_lines("Extracted answer: " + extracted_answer) @@ -101,7 +106,7 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor user_message.append(extracted_answer) self.clear_history() decision, _ = await self.call_model( - system_message=sys_message, + system_message_content=sys_message, user_content=user_message, details="to check the answer for correctness") page.add_lines("Decision: " + decision) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py index 72f71f8361e3..6ccf63db5cfb 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py @@ -28,12 +28,16 @@ def __init__(self, client, page_log): async def call_model(self, details, user_content: UserContent = None, system_message_content=None, keep_these_messages=True): # Prepare the input message list - user_message = UserMessage(content=user_content, source="User") - if system_message_content is None: system_message_content = self.default_system_message_content - system_message = SystemMessage(content=system_message_content) + if self.client.model_info["family"] == "o1": + # No system message allowed, so pass it as the first user message. + system_message = UserMessage(content=system_message_content, source="User") + else: + # System message allowed. + system_message = SystemMessage(content=system_message_content) + user_message = UserMessage(content=user_content, source="User") input_messages = [system_message] + self._chat_history + [user_message] # Double check the types of the input messages. From be081b387ca959e1ebd8d9fb55cd7d0f42d5c61e Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sat, 18 Jan 2025 12:51:06 -0800 Subject: [PATCH 38/93] simplification of client creation code --- .../eval_framework/clients/_client_creator.py | 99 +++++++------------ 1 file changed, 37 insertions(+), 62 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py index 1792b3de7f49..3a9b83d45b10 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py @@ -10,17 +10,34 @@ def __init__(self, settings, page_log): self.page_log = page_log def create_client(self): + # A few args are shared by all clients. + args = {} + args["model"] = self.settings["model"] + args["max_completion_tokens"] = self.settings["max_completion_tokens"] + args["max_retries"] = self.settings["max_retries"] + + # The following args don't apply to the 'o1' family of models. + if not args["model"].startswith("o1"): + args["temperature"] = self.settings["temperature"] + args["presence_penalty"] = self.settings["presence_penalty"] + args["frequency_penalty"] = self.settings["frequency_penalty"] + args["top_p"] = self.settings["top_p"] + client = None provider = self.settings["provider"] if provider == "openai": - client = self.create_oai_client() + client, source = self.create_oai_client(args) elif provider == "azure_openai": - client = self.create_aoai_client() + client, source = self.create_aoai_client(args) elif provider == "trapi": - client = self.create_trapi_client() + client, source = self.create_trapi_client(args) else: assert False, "Invalid client provider" + # Log some details. + self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) + self.page_log.append_entry_line(source) + # Check if the client should be wrapped. if "ClientWrapper" in self.settings: wrapper_settings = self.settings["ClientWrapper"] @@ -28,38 +45,18 @@ def create_client(self): # Wrap the client. client = ClientWrapper( client, wrapper_settings["mode"], wrapper_settings["session_name"], self.page_log) - return client - def add_shared_args(self, args): - args["model"] = self.settings["model"] - args["max_completion_tokens"] = self.settings["max_completion_tokens"] - args["max_retries"] = self.settings["max_retries"] - - def add_family_args(self, args): - # Does the model name start with 'o1'? - if not args["model"].startswith("o1"): - # No. A few more things can be specified. - args["temperature"] = self.settings["temperature"] - args["presence_penalty"] = self.settings["presence_penalty"] - args["frequency_penalty"] = self.settings["frequency_penalty"] - args["top_p"] = self.settings["top_p"] - - def create_oai_client(self): + def create_oai_client(self, args): # Create an OpenAI client - args = {"api_key": self.settings["api_key"]} - self.add_shared_args(args) - self.add_family_args(args) - + args["api_key"] = self.settings["api_key"] client = OpenAIChatCompletionClient(**args) - - self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(" created through OpenAI") - return client + return client, " created through OpenAI" - def create_aoai_client(self): + def create_aoai_client(self, args): # Create an Azure OpenAI client + token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") model = self.settings["model"] if model == "gpt-4o-2024-08-06": azure_deployment = 'gpt-4o-2024-08-06-eval' # This is DeploymentName in the table at https://aka.ms/trapi/models @@ -72,28 +69,16 @@ def create_aoai_client(self): azure_endpoint = "https://agentic1.openai.azure.com/" else: assert False, "Unsupported model" - - token_provider = get_bearer_token_provider(DefaultAzureCredential(), - "https://cognitiveservices.azure.com/.default") api_version = '2024-12-01-preview' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - - args = { - "azure_ad_token_provider": token_provider, - "azure_deployment": azure_deployment, - "azure_endpoint": azure_endpoint, - "api_version": api_version, - } - self.add_shared_args(args) - self.add_family_args(args) - + args["azure_ad_token_provider"] = token_provider + args["azure_deployment"] = azure_deployment + args["azure_endpoint"] = azure_endpoint + args["api_version"] = api_version client = AzureOpenAIChatCompletionClient(**args) - - self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(" created through Azure OpenAI") - return client + return client, " created through Azure OpenAI" - def create_trapi_client(self): + def create_trapi_client(self, args): # Create an Azure OpenAI client through TRAPI token_provider = get_bearer_token_provider(ChainedTokenCredential( AzureCliCredential(), @@ -109,7 +94,6 @@ def create_trapi_client(self): # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs ) ), "api://trapi/.default") - api_version = '2024-12-01-preview' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release model = self.settings["model"] if model == "gpt-4o-2024-08-06": azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models @@ -123,19 +107,10 @@ def create_trapi_client(self): assert False, "Unsupported model" trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' - - args = { - "azure_ad_token_provider": token_provider, - "azure_deployment": azure_deployment, - "azure_endpoint": endpoint, - "api_version": api_version, - } - self.add_shared_args(args) - self.add_family_args(args) - + api_version = '2024-12-01-preview' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release + args["azure_ad_token_provider"] = token_provider + args["azure_deployment"] = azure_deployment + args["azure_endpoint"] = endpoint + args["api_version"] = api_version client = AzureOpenAIChatCompletionClient(**args) - - self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(" created through TRAPI") - return client - + return client, " created through TRAPI" From 29d1494d50a3f8875199b8af3a98f4578e35ba4f Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sat, 18 Jan 2025 14:29:58 -0800 Subject: [PATCH 39/93] simplify folder structure --- python/packages/ame/LICENSE-CODE | 21 ++++++++++ python/packages/ame/README.md | 1 + python/packages/ame/pyproject.toml | 30 +++++++++++++++ python/packages/ame/src/ame/__init__.py | 3 ++ .../src/ame}/clients/_client_creator.py | 0 .../src/ame}/clients/_client_wrapper.py | 2 +- .../eval_framework => ame/src/ame}/eval.py | 6 +-- .../eval_learning_from_demonstration.py | 0 .../ame}/evaluations/eval_self_teaching.py | 0 .../src/ame}/evaluations/eval_teachability.py | 2 +- .../ame}/evaluations/eval_without_learning.py | 0 .../src/ame}/settings/baseline.yaml | 6 +-- .../src/ame/settings/check.yaml} | 10 ++--- .../src/ame}/settings/m1.yaml | 24 ++++++------ .../src/ame}/tasks/100_vampires.yaml | 0 .../src/ame}/tasks/10_liars.yaml | 0 .../src/ame}/tasks/3_to_third.yaml | 0 .../src/ame}/tasks/autogen_package.yaml | 0 .../src/ame}/tasks/cell_towers.yaml | 0 .../autogen_ext/agentic_memory/__init__.py | 4 -- .../eval_framework/settings/long.yaml | 38 ------------------- .../fast_learners/apprentice/__init__.py | 3 -- .../src/autogen_ext/apprentice/__init__.py | 5 +++ .../apprentice/_agent_wrapper.py | 0 .../apprentice/_agentic_memory_bank.py | 0 .../apprentice/_agentic_memory_controller.py | 0 .../fast_learners => }/apprentice/_grader.py | 2 +- .../apprentice/_page_log.py | 0 .../apprentice/_prompter.py | 2 +- .../apprentice/_string_similarity_map.py | 0 .../{agentic_memory => apprentice}/_utils.py | 0 .../apprentice/apprentice.py | 0 32 files changed, 86 insertions(+), 73 deletions(-) create mode 100644 python/packages/ame/LICENSE-CODE create mode 100644 python/packages/ame/README.md create mode 100644 python/packages/ame/pyproject.toml create mode 100644 python/packages/ame/src/ame/__init__.py rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/clients/_client_creator.py (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/clients/_client_wrapper.py (99%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/eval.py (96%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/evaluations/eval_learning_from_demonstration.py (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/evaluations/eval_self_teaching.py (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/evaluations/eval_teachability.py (97%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/evaluations/eval_without_learning.py (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/settings/baseline.yaml (84%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml => ame/src/ame/settings/check.yaml} (74%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/settings/m1.yaml (51%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/tasks/100_vampires.yaml (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/tasks/10_liars.yaml (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/tasks/3_to_third.yaml (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/tasks/autogen_package.yaml (100%) rename python/packages/{autogen-ext/src/autogen_ext/agentic_memory/eval_framework => ame/src/ame}/tasks/cell_towers.yaml (100%) delete mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py delete mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml delete mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py create mode 100644 python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/_agent_wrapper.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/_agentic_memory_bank.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/_agentic_memory_controller.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/_grader.py (99%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/_page_log.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/_prompter.py (99%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/_string_similarity_map.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory => apprentice}/_utils.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{agentic_memory/fast_learners => }/apprentice/apprentice.py (100%) diff --git a/python/packages/ame/LICENSE-CODE b/python/packages/ame/LICENSE-CODE new file mode 100644 index 000000000000..9e841e7a26e4 --- /dev/null +++ b/python/packages/ame/LICENSE-CODE @@ -0,0 +1,21 @@ + MIT License + + Copyright (c) Microsoft Corporation. + + Permission is hereby granted, free of charge, to any person obtaining a copy + of this software and associated documentation files (the "Software"), to deal + in the Software without restriction, including without limitation the rights + to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + copies of the Software, and to permit persons to whom the Software is + furnished to do so, subject to the following conditions: + + The above copyright notice and this permission notice shall be included in all + copies or substantial portions of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + SOFTWARE diff --git a/python/packages/ame/README.md b/python/packages/ame/README.md new file mode 100644 index 000000000000..f2b0f7a18b9a --- /dev/null +++ b/python/packages/ame/README.md @@ -0,0 +1 @@ +# Agentic Memory Evaluation Framework diff --git a/python/packages/ame/pyproject.toml b/python/packages/ame/pyproject.toml new file mode 100644 index 000000000000..aee909668c3c --- /dev/null +++ b/python/packages/ame/pyproject.toml @@ -0,0 +1,30 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "ame" +version = "0.1.1" +license = {file = "LICENSE-CODE"} +description = "Agentic Memory Evaluation Framework" +readme = "README.md" +requires-python = ">=3.10" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] +dependencies = [ + "autogen-core==0.4.3", +] + +[tool.hatch.build.targets.wheel] +packages = ["src/ame"] + +[tool.ruff] +extend = "../../pyproject.toml" +include = ["src/**", "tests/*.py"] + +[tool.pyright] +extends = "../../pyproject.toml" +include = ["src", "tests"] diff --git a/python/packages/ame/src/ame/__init__.py b/python/packages/ame/src/ame/__init__.py new file mode 100644 index 000000000000..98c56045ead7 --- /dev/null +++ b/python/packages/ame/src/ame/__init__.py @@ -0,0 +1,3 @@ +import importlib.metadata + +__version__ = importlib.metadata.version("ame") diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py b/python/packages/ame/src/ame/clients/_client_creator.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_creator.py rename to python/packages/ame/src/ame/clients/_client_creator.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py rename to python/packages/ame/src/ame/clients/_client_wrapper.py index 98aed82a9461..91172d70c7f3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -9,7 +9,7 @@ RequestUsage, ) from autogen_core.tools import Tool, ToolSchema -from autogen_ext.agentic_memory import PageLog +from autogen_ext.apprentice import PageLog class ClientWrapper: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py b/python/packages/ame/src/ame/eval.py similarity index 96% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py rename to python/packages/ame/src/ame/eval.py index 091919afe2d1..cb2ecc45bb11 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -3,8 +3,8 @@ import asyncio import importlib from typing import Tuple -from autogen_ext.agentic_memory import PageLog, Grader -from autogen_ext.agentic_memory.eval_framework.clients._client_creator import ClientCreator +from autogen_ext.apprentice import PageLog, Grader +from ame.clients._client_creator import ClientCreator class Evaluator: @@ -68,7 +68,7 @@ async def run(self, settings_filepath): client = client_creator.create_client() # Create the specified fast_learner implementation. - fast_learner_settings = settings["FastLearner"] + fast_learner_settings = settings["fast_learning_agent"] module_path = fast_learner_settings["module_path"] try: module = importlib.import_module(module_path) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/evaluations/eval_learning_from_demonstration.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_learning_from_demonstration.py rename to python/packages/ame/src/ame/evaluations/eval_learning_from_demonstration.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_self_teaching.py b/python/packages/ame/src/ame/evaluations/eval_self_teaching.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_self_teaching.py rename to python/packages/ame/src/ame/evaluations/eval_self_teaching.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_teachability.py b/python/packages/ame/src/ame/evaluations/eval_teachability.py similarity index 97% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_teachability.py rename to python/packages/ame/src/ame/evaluations/eval_teachability.py index 1eacd652682d..8cb12efe1d18 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_teachability.py +++ b/python/packages/ame/src/ame/evaluations/eval_teachability.py @@ -1,4 +1,4 @@ -from autogen_ext.agentic_memory import PageLog, Grader +from autogen_ext.apprentice import PageLog, Grader async def eval_teachability(fast_learner, evaluator, client, page_log, settings): diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_without_learning.py b/python/packages/ame/src/ame/evaluations/eval_without_learning.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/evaluations/eval_without_learning.py rename to python/packages/ame/src/ame/evaluations/eval_without_learning.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml similarity index 84% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml rename to python/packages/ame/src/ame/settings/baseline.yaml index 7201ea323e9e..a0a53946ad4e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -18,9 +18,9 @@ client: mode: check-replay # pass-through, record, or check-replay session_name: short-3 -FastLearner: +fast_learning_agent: class_name: Apprentice - module_path: autogen_ext.agentic_memory.fast_learners.apprentice + module_path: autogen_ext.apprentice AgenticMemoryController: max_train_trials: 2 # 2-10 max_test_trials: 1 # 1-3 @@ -31,6 +31,6 @@ FastLearner: evaluations: - function_name: eval_without_learning - module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_without_learning + module_path: ame.evaluations.eval_without_learning task_name: 10_liars num_trials: 1 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml b/python/packages/ame/src/ame/settings/check.yaml similarity index 74% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml rename to python/packages/ame/src/ame/settings/check.yaml index dc1e45f44a8b..005453ff58cc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/short.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -18,9 +18,9 @@ client: mode: check-replay # pass-through, record, or check-replay session_name: short-3 -FastLearner: +fast_learning_agent: class_name: Apprentice - module_path: autogen_ext.agentic_memory.fast_learners.apprentice + module_path: autogen_ext.apprentice AgenticMemoryController: max_train_trials: 2 # 2-10 max_test_trials: 1 # 1-3 @@ -31,13 +31,13 @@ FastLearner: evaluations: - function_name: eval_teachability - module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_teachability + module_path: ame.evaluations.eval_teachability - function_name: eval_learning_from_demonstration - module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_learning_from_demonstration + module_path: ame.evaluations.eval_learning_from_demonstration num_trials: 1 # 1-10 - function_name: eval_self_teaching - module_path: autogen_ext.agentic_memory.eval_framework.evaluations.eval_self_teaching + module_path: ame.evaluations.eval_self_teaching num_loops: 1 # 1-10 num_final_test_trials: 1 # 1-3 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml similarity index 51% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml rename to python/packages/ame/src/ame/settings/m1.yaml index a0c1df5af009..1774748f5461 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -4,9 +4,9 @@ Evaluator: path: ~/pagelogs/m1 client: + model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. provider: trapi # openai, azure_openai, or trapi - api_key: sk-xx # only for openai - # Add the model name here. + api_key: sk- # only for openai temperature: 0.8 max_completion_tokens: 4096 presence_penalty: 0.0 @@ -14,21 +14,19 @@ client: top_p: 1.0 max_retries: 65535 -FastLearner: +fast_learning_agent: + class_name: Apprentice + module_path: autogen_ext.apprentice AgenticMemoryController: + max_train_trials: 2 # 2-10 + max_test_trials: 1 # 1-3 AgenticMemoryBank: path: ~/agentic_memory_bank/m1 AgentWrapper: base_agent: MagenticOneGroupChat # MagenticOneGroupChat, thin_agent, etc. evaluations: -# - name: eval_teachability -# -# - name: eval_learning_from_demonstration -# num_trials: 1 # 1-10 - - - name: eval_self_teaching - num_loops: 1 # 1-10 - max_train_trials: 2 # 2-10 Move to AMC - max_test_trials: 1 # 1-3 Move to AMC - num_final_test_trials: 1 # 1-3 + - function_name: eval_without_learning + module_path: ame.evaluations.eval_without_learning + task_name: 10_liars + num_trials: 1 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml b/python/packages/ame/src/ame/tasks/100_vampires.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/100_vampires.yaml rename to python/packages/ame/src/ame/tasks/100_vampires.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml b/python/packages/ame/src/ame/tasks/10_liars.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/10_liars.yaml rename to python/packages/ame/src/ame/tasks/10_liars.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml b/python/packages/ame/src/ame/tasks/3_to_third.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/3_to_third.yaml rename to python/packages/ame/src/ame/tasks/3_to_third.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml b/python/packages/ame/src/ame/tasks/autogen_package.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/autogen_package.yaml rename to python/packages/ame/src/ame/tasks/autogen_package.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml b/python/packages/ame/src/ame/tasks/cell_towers.yaml similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/tasks/cell_towers.yaml rename to python/packages/ame/src/ame/tasks/cell_towers.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py deleted file mode 100644 index 9c3b0f966c6a..000000000000 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ /dev/null @@ -1,4 +0,0 @@ -from .fast_learners.apprentice._page_log import PageLog -from .fast_learners.apprentice._grader import Grader - -__all__ = ["PageLog", "Grader"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml deleted file mode 100644 index e81a63687165..000000000000 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/eval_framework/settings/long.yaml +++ /dev/null @@ -1,38 +0,0 @@ - -Evaluator: - PageLog: - path: ~/pagelogs/long - -client: - provider: trapi # openai, azure_openai, or trapi - api_key: sk-xx # only for openai - # Add the model name here. - temperature: 0.8 - max_completion_tokens: 4096 - presence_penalty: 0.0 - frequency_penalty: 0.0 - top_p: 1.0 - max_retries: 65535 - ClientWrapper: # Provides record & replay functionality - enabled: 0 # Only works for thin_agent currently - mode: check-replay # pass-through, record, or check-replay - session_name: long - -FastLearner: - AgenticMemoryController: - AgenticMemoryBank: - path: ~/agentic_memory_bank/long - AgentWrapper: - base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. - -evaluations: - - name: eval_teachability - - - name: eval_learning_from_demonstration - num_trials: 10 # 1-10 - - - name: eval_self_teaching - num_loops: 10 # 1-10 - max_train_trials: 10 # 2-10 Move to AMC - max_test_trials: 3 # 1-3 Move to AMC - num_final_test_trials: 3 # 1-3 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py deleted file mode 100644 index 521bdfa714e7..000000000000 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from .apprentice import Apprentice - -__all__ = ["Apprentice"] diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py new file mode 100644 index 000000000000..9426eb4bd11d --- /dev/null +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py @@ -0,0 +1,5 @@ +from .apprentice import Apprentice +from ._page_log import PageLog +from ._grader import Grader + +__all__ = ["Apprentice", "PageLog", "Grader"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agent_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_bank.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_agentic_memory_controller.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index d815c0507f01..88d122f01aaf 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -8,7 +8,7 @@ CreateResult, ) -from ..._utils import UserContent +from ._utils import UserContent class Grader: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_page_log.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py index 6ccf63db5cfb..ad4e28312c61 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py @@ -11,7 +11,7 @@ from autogen_core import FunctionCall, Image -from ..._utils import message_content_to_str, UserContent, text_from_user_content, single_image_from_user_content +from ._utils import message_content_to_str, UserContent, text_from_user_content, single_image_from_user_content class Prompter: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/_string_similarity_map.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/agentic_memory/fast_learners/apprentice/apprentice.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py From 8e9a5500a349297345f0c027626de279a60806da Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 20 Jan 2025 15:51:59 -0800 Subject: [PATCH 40/93] Move task data strings out of the eval functions. --- python/packages/ame/src/ame/eval.py | 39 ++++++++++---- .../eval_learning_from_demonstration.py | 42 +++++++++++++++ .../ame/eval_functions/eval_self_teaching.py | 52 +++++++++++++++++++ .../eval_teachability.py | 19 ++++--- .../eval_without_learning.py | 11 ++-- .../eval_learning_from_demonstration.py | 35 ------------- .../src/ame/evaluations/eval_self_teaching.py | 38 -------------- .../ame/src/ame/settings/baseline.yaml | 10 ++-- .../packages/ame/src/ame/settings/check.yaml | 31 +++++++---- python/packages/ame/src/ame/settings/m1.yaml | 10 ++-- .../src/ame/task_data/advice/add_topic.yaml | 6 +++ .../task_data/demos/cell_towers_2_demo.yaml | 11 ++++ .../{ => task_data}/tasks/100_vampires.yaml | 2 - .../ame/{ => task_data}/tasks/10_liars.yaml | 2 - .../ame/{ => task_data}/tasks/3_to_third.yaml | 2 - .../tasks/autogen_package.yaml | 2 - .../tasks/cell_towers_1.yaml} | 2 - .../ame/task_data/tasks/cell_towers_2.yaml | 9 ++++ 18 files changed, 201 insertions(+), 122 deletions(-) create mode 100644 python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py create mode 100644 python/packages/ame/src/ame/eval_functions/eval_self_teaching.py rename python/packages/ame/src/ame/{evaluations => eval_functions}/eval_teachability.py (67%) rename python/packages/ame/src/ame/{evaluations => eval_functions}/eval_without_learning.py (62%) delete mode 100644 python/packages/ame/src/ame/evaluations/eval_learning_from_demonstration.py delete mode 100644 python/packages/ame/src/ame/evaluations/eval_self_teaching.py create mode 100644 python/packages/ame/src/ame/task_data/advice/add_topic.yaml create mode 100644 python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml rename python/packages/ame/src/ame/{ => task_data}/tasks/100_vampires.yaml (98%) rename python/packages/ame/src/ame/{ => task_data}/tasks/10_liars.yaml (95%) rename python/packages/ame/src/ame/{ => task_data}/tasks/3_to_third.yaml (84%) rename python/packages/ame/src/ame/{ => task_data}/tasks/autogen_package.yaml (92%) rename python/packages/ame/src/ame/{tasks/cell_towers.yaml => task_data/tasks/cell_towers_1.yaml} (96%) create mode 100644 python/packages/ame/src/ame/task_data/tasks/cell_towers_2.yaml diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index cb2ecc45bb11..792cbbeb4fc0 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -11,16 +11,31 @@ class Evaluator: def __init__(self): self.page_log = None - def get_task_details_by_name(self, task_name): + def get_task_description_and_answer_from_file(self, task_filename): path_to_this_file = os.path.abspath(__file__) dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, 'tasks', task_name + '.yaml') + task_filepath = os.path.join(dir_of_this_file, 'task_data', 'tasks', task_filename + '.yaml') with open(task_filepath, "r") as file: task_details = yaml.load(file, Loader=yaml.FullLoader) - assert task_details["name"] == task_name - return task_details + return task_details["task_description"], task_details["expected_answer"] - async def test_fast_learner(self, fast_learner, task_details, num_trials, + def get_advice_from_file(self, advice_filename): + path_to_this_file = os.path.abspath(__file__) + dir_of_this_file = os.path.dirname(path_to_this_file) + task_filepath = os.path.join(dir_of_this_file, 'task_data', 'advice', advice_filename + '.yaml') + with open(task_filepath, "r") as file: + advice_dict = yaml.load(file, Loader=yaml.FullLoader) + return advice_dict["advice"] + + def get_demo_from_file(self, demo_filename): + path_to_this_file = os.path.abspath(__file__) + dir_of_this_file = os.path.dirname(path_to_this_file) + task_filepath = os.path.join(dir_of_this_file, 'task_data', 'demos', demo_filename + '.yaml') + with open(task_filepath, "r") as file: + demo_dict = yaml.load(file, Loader=yaml.FullLoader) + return demo_dict["demo"] + + async def test_fast_learner(self, fast_learner, task_description, expected_answer, num_trials, use_memory, client, page_log) -> Tuple[int, int]: page = page_log.begin_page( summary="Evaluator.test_fast_learner", @@ -35,10 +50,9 @@ async def test_fast_learner(self, fast_learner, task_details, num_trials, for trial in range(num_trials): page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) page.add_lines("Try to solve the task.\n", flush=True) - task_description = task_details["task_description"] response = await fast_learner.assign_task(task_description, use_memory=use_memory) response_is_correct, extracted_answer = await grader.is_response_correct( - task_description, response, task_details["expected_answer"]) + task_description, response, expected_answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) @@ -89,19 +103,24 @@ async def run(self, settings_filepath): # Execute each evaluation. for evaluation_settings in settings["evaluations"]: - module_path = evaluation_settings["module_path"] + # Import the function. + function_settings = evaluation_settings["eval_function"] + module_path = function_settings["module_path"] try: module = importlib.import_module(module_path) except ModuleNotFoundError: print('Failed to import {}'.format(module_path)) raise - function_name = evaluation_settings["function_name"] + function_name = function_settings["function_name"] try: eval_function = getattr(module, function_name) except AttributeError: print('Failed to import {}.{}'.format(module_path, function_name)) raise - await eval_function(fast_learner, self, client, self.page_log, evaluation_settings) + + # Call the eval function for each listed run. + for run_dict in evaluation_settings["runs"]: + await eval_function(fast_learner, self, client, self.page_log, function_settings, run_dict) if hasattr(client, "finalize"): # If this is a client wrapper, it needs to be finalized. diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py new file mode 100644 index 000000000000..b20553436ff2 --- /dev/null +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -0,0 +1,42 @@ + +async def eval_learning_from_demonstration(fast_learner, evaluator, client, page_log, settings, run_dict): + """An evaluation""" + page = page_log.begin_page( + summary="eval_learning_from_demonstration", + details='', + method_call="eval_learning_from_demonstration") + + num_trials = settings["num_trials"] + + # This eval function needs 3 data strings for each run. + task_1_file = run_dict["task_1_file"] # The task being tested. + task_2_file = run_dict["task_2_file"] # A similar but different task. + demo_2_file = run_dict["demo_2_file"] # A demonstration of solving task 2. + + # Get the actual task and advice strings from their files. + task_description_1, expected_answer_1 = evaluator.get_task_description_and_answer_from_file(task_1_file) + demo_task, _ = evaluator.get_task_description_and_answer_from_file(task_2_file) + demo = evaluator.get_demo_from_file(demo_2_file) + + # Start by clearing memory then running a baseline test. + page.add_lines("To get a baseline, clear memory, then assign the task.") + fast_learner.reset_memory() + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, + num_trials=num_trials, use_memory=True, client=client, page_log=page_log) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + + # Provide a demonstration for a similar but different task. + page.add_lines("Demonstrate a solution to a similar task.") + await fast_learner.learn_from_demonstration(demo_task, demo) + + # Now test again to see if the demonstration (retrieved from memory) helps. + page.add_lines("Assign the task again to see if the demonstration helps.") + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, + num_trials=num_trials, use_memory=True, client=client, page_log=page_log) + success_rate = round((num_successes / num_trials) * 100) + page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + + page_log.finish_page(page) diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py new file mode 100644 index 000000000000..3edf7551d3dd --- /dev/null +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -0,0 +1,52 @@ + +async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings, run_dict): + """An evaluation""" + page = page_log.begin_page( + summary="eval_self_teaching", + details='', + method_call="eval_self_teaching") + + num_loops = settings["num_loops"] + num_final_test_trials = settings["num_final_test_trials"] + + # This eval function needs 2 data strings for each run. + task_file_1 = run_dict["task_file_1"] # Train and test on this task. + task_file_2 = run_dict["task_file_2"] # Test generalization on a different, similar task. + + # Get the actual task and advice strings from their files. + task_description_1, expected_answer_1 = evaluator.get_task_description_and_answer_from_file(task_file_1) + task_description_2, expected_answer_2 = evaluator.get_task_description_and_answer_from_file(task_file_2) + + # Start the test with empty memory. + fast_learner.reset_memory() + + total_num_successes_1 = 0 + total_num_successes_2 = 0 + total_num_trials = 0 + for i in range(num_loops): + # Train on the first task. + await fast_learner.train_on_task(task=task_description_1, expected_answer=expected_answer_1) + + # Test on the first task. + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, + num_trials=num_final_test_trials, use_memory=True, client=client, page_log=page_log) + page.add_lines("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100)), flush=True) + total_num_successes_1 += num_successes + + # Test on the second task. + num_successes, num_trials = await evaluator.test_fast_learner( + fast_learner=fast_learner, task_description=task_description_2, expected_answer=expected_answer_2, + num_trials=num_final_test_trials, use_memory=True, client=client, page_log=page_log) + page.add_lines("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100)), flush=True) + total_num_successes_2 += num_successes + + total_num_trials += num_final_test_trials + page.add_lines("") + + overall_success_rate_1 = round((total_num_successes_1 / total_num_trials) * 100) + overall_success_rate_2 = round((total_num_successes_2 / total_num_trials) * 100) + page.add_lines("\nOverall task 1 success rate (1): {}%".format(overall_success_rate_1), flush=True) + page.add_lines("Overall task 2 success rate (2): {}%".format(overall_success_rate_2), flush=True) + + page_log.finish_page(page) diff --git a/python/packages/ame/src/ame/evaluations/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py similarity index 67% rename from python/packages/ame/src/ame/evaluations/eval_teachability.py rename to python/packages/ame/src/ame/eval_functions/eval_teachability.py index 8cb12efe1d18..ce7ff7580e78 100644 --- a/python/packages/ame/src/ame/evaluations/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -1,17 +1,20 @@ from autogen_ext.apprentice import PageLog, Grader -async def eval_teachability(fast_learner, evaluator, client, page_log, settings): +async def eval_teachability(fast_learner, evaluator, client, page_log, settings, run_dict): """An evaluation""" page = page_log.begin_page( summary="eval_teachability", details='', method_call="eval_teachability") - task_details = evaluator.get_task_details_by_name("autogen_package") - task_description = task_details["task_description"] - answer = task_details["expected_answer"] - grader = Grader(client, page_log) + # This eval function needs 2 data strings for each run. + task_file = run_dict["task_file"] # The task being tested. + advice_file = run_dict["advice_file"] # Advice for solving such tasks. + + # Get the actual task and advice strings from their files. + task_description, expected_answer = evaluator.get_task_description_and_answer_from_file(task_file) + advice = evaluator.get_advice_from_file(advice_file) # First test without memory. fast_learner.reset_memory() @@ -19,7 +22,8 @@ async def eval_teachability(fast_learner, evaluator, client, page_log, settings) response = await fast_learner.handle_user_message(task_description) # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) + grader = Grader(client, page_log) + response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) @@ -28,7 +32,6 @@ async def eval_teachability(fast_learner, evaluator, client, page_log, settings) # Give advice that should help solve this task. page.add_lines("Give the advice.") - advice = "When somebody builds something on autogen and wants to contribute it to autogen, instead of creating a new autogen package, it's better for them to implement it in a separate github repo, then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen's page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html" await fast_learner.handle_user_message(advice) # Now ask the question again to see if the advice helps. @@ -36,7 +39,7 @@ async def eval_teachability(fast_learner, evaluator, client, page_log, settings) response = await fast_learner.handle_user_message(task_description) # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, answer) + response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) if response_is_correct: page.add_lines("Answer is CORRECT.\n", flush=True) diff --git a/python/packages/ame/src/ame/evaluations/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py similarity index 62% rename from python/packages/ame/src/ame/evaluations/eval_without_learning.py rename to python/packages/ame/src/ame/eval_functions/eval_without_learning.py index 7d6a4186779a..628f047f13a6 100644 --- a/python/packages/ame/src/ame/evaluations/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -1,20 +1,23 @@ -async def eval_without_learning(fast_learner, evaluator, client, page_log, settings): +async def eval_without_learning(fast_learner, evaluator, client, page_log, settings, run_dict): """An evaluation""" page = page_log.begin_page( summary="eval_without_learning", details='', method_call="eval_without_learning") - task_details = evaluator.get_task_details_by_name(settings["task_name"]) num_trials = settings["num_trials"] + # Get the task and advice strings. + task_file = run_dict["task_file"] + task_description, expected_answer = evaluator.get_task_description_and_answer_from_file(task_file) + # Clear memory then run a baseline test. page.add_lines("To get a baseline, clear memory, then assign the task.") fast_learner.reset_memory() num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, - use_memory=True, client=client, page_log=page_log) + fast_learner=fast_learner, task_description=task_description, expected_answer=expected_answer, + num_trials=num_trials, use_memory=True, client=client, page_log=page_log) success_rate = round((num_successes / num_trials) * 100) page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) diff --git a/python/packages/ame/src/ame/evaluations/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/evaluations/eval_learning_from_demonstration.py deleted file mode 100644 index 7ed4ea344823..000000000000 --- a/python/packages/ame/src/ame/evaluations/eval_learning_from_demonstration.py +++ /dev/null @@ -1,35 +0,0 @@ - -async def eval_learning_from_demonstration(fast_learner, evaluator, client, page_log, settings): - """An evaluation""" - page = page_log.begin_page( - summary="eval_learning_from_demonstration", - details='', - method_call="eval_learning_from_demonstration") - - task_details = evaluator.get_task_details_by_name("cell_towers") - num_trials = settings["num_trials"] - - # Start by clearing memory then running a baseline test. - page.add_lines("To get a baseline, clear memory, then assign the task.") - fast_learner.reset_memory() - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, - use_memory=True, client=client, page_log=page_log) - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) - - # Provide the demonstration. - page.add_lines("Demonstrate a solution to a similar task.") - demo_task = "You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. Each cell phone tower can cover houses located next to the road within a 4-mile radius. Find the minimum number of cell phone towers needed to cover all houses next to the road. Your answer should be a positive numerical integer value." - demonstration = "Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. Then start at one end and place the towers only where absolutely needed. The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. This obviously covers houses up to mile 7. But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. The next uncovered house would be at mile 12 (not 10), requiring a second tower. It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough." - await fast_learner.learn_from_demonstration(demo_task, demonstration) - - # Now test again to see if the demonstration (retrieved from memory) helps. - page.add_lines("Assign the task again to see if the demonstration helps.") - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_details=task_details, num_trials=num_trials, - use_memory=True, client=client, page_log=page_log) - success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) - - page_log.finish_page(page) diff --git a/python/packages/ame/src/ame/evaluations/eval_self_teaching.py b/python/packages/ame/src/ame/evaluations/eval_self_teaching.py deleted file mode 100644 index d66af47fb3e7..000000000000 --- a/python/packages/ame/src/ame/evaluations/eval_self_teaching.py +++ /dev/null @@ -1,38 +0,0 @@ - -async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings): - """An evaluation""" - page = page_log.begin_page( - summary="eval_self_teaching", - details='', - method_call="eval_self_teaching") - - # Start the test with empty memory. - fast_learner.reset_memory() - - task_details_list = [ - evaluator.get_task_details_by_name("10_liars"), - evaluator.get_task_details_by_name("100_vampires")] - total_num_successes_list = [0 for _ in range(len(task_details_list))] - total_num_trials = 0 - for i in range(settings["num_loops"]): - # Train on the first task in the list. - task_details = task_details_list[0] - await fast_learner.train_on_task(task=task_details["task_description"], - expected_answer=task_details["expected_answer"]) - - # Test on all tasks in the list. - for j, task_details in enumerate(task_details_list): - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_details=task_details, num_trials=settings["num_final_test_trials"], - use_memory=True, client=client, page_log=page_log) - - page.add_lines("Success rate ({}): {}%".format(j, round((num_successes / num_trials) * 100)), flush=True) - total_num_successes_list[j] += num_successes - total_num_trials += settings["num_final_test_trials"] - page.add_lines("") - - for i, total_num_successes in enumerate(total_num_successes_list): - success_rate = round((total_num_successes / total_num_trials) * 100) - page.add_lines("\nOverall success rate ({}): {}%\n".format(i, success_rate), flush=True) - - page_log.finish_page(page) diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index a0a53946ad4e..f6047f38fe1b 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -30,7 +30,9 @@ fast_learning_agent: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. evaluations: - - function_name: eval_without_learning - module_path: ame.evaluations.eval_without_learning - task_name: 10_liars - num_trials: 1 + - eval_function: + function_name: eval_without_learning + module_path: ame.eval_functions.eval_without_learning + num_trials: 1 # 1-10 + runs: + - task_file: 10_liars diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 005453ff58cc..c8d40950aab0 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -30,14 +30,27 @@ fast_learning_agent: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. evaluations: - - function_name: eval_teachability - module_path: ame.evaluations.eval_teachability + - eval_function: + function_name: eval_teachability + module_path: ame.eval_functions.eval_teachability + runs: + - task_file: autogen_package + advice_file: add_topic - - function_name: eval_learning_from_demonstration - module_path: ame.evaluations.eval_learning_from_demonstration - num_trials: 1 # 1-10 + - eval_function: + function_name: eval_learning_from_demonstration + module_path: ame.eval_functions.eval_learning_from_demonstration + num_trials: 1 # 1-10 + runs: + - task_1_file: cell_towers_1 # The task being tested. + task_2_file: cell_towers_2 # A similar but different task. + demo_2_file: cell_towers_2_demo # A demonstration of solving task 2. - - function_name: eval_self_teaching - module_path: ame.evaluations.eval_self_teaching - num_loops: 1 # 1-10 - num_final_test_trials: 1 # 1-3 + - eval_function: + function_name: eval_self_teaching + module_path: ame.eval_functions.eval_self_teaching + num_loops: 1 # 1-10 + num_final_test_trials: 1 # 1-3 + runs: + - task_file_1: 10_liars # Train and test on this task. + task_file_2: 100_vampires # Test generalization on a different, similar task. diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index 1774748f5461..0240a80ce8fb 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -26,7 +26,9 @@ fast_learning_agent: base_agent: MagenticOneGroupChat # MagenticOneGroupChat, thin_agent, etc. evaluations: - - function_name: eval_without_learning - module_path: ame.evaluations.eval_without_learning - task_name: 10_liars - num_trials: 1 + - eval_function: + function_name: eval_without_learning + module_path: ame.eval_functions.eval_without_learning + num_trials: 1 + runs: + - task_file: 10_liars diff --git a/python/packages/ame/src/ame/task_data/advice/add_topic.yaml b/python/packages/ame/src/ame/task_data/advice/add_topic.yaml new file mode 100644 index 000000000000..c95f6703133b --- /dev/null +++ b/python/packages/ame/src/ame/task_data/advice/add_topic.yaml @@ -0,0 +1,6 @@ +# Advice created manually for solving the autogen_package task. + +advice: 'When somebody builds something on autogen and wants to contribute it to autogen, + instead of creating a new autogen package, it''s better for them to implement it in a separate github repo, + then just add the autogen-extension topic to that repo. + That way their contribution will be automatically discoverable through autogen''s page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html' diff --git a/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml b/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml new file mode 100644 index 000000000000..d94d87d9eb35 --- /dev/null +++ b/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml @@ -0,0 +1,11 @@ +# Demo created manually for solving the cell_towers_2 task. + +demo: 'Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. + Then start at one end and place the towers only where absolutely needed. + The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. + This obviously covers houses up to mile 7. + But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. + So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. + The next uncovered house would be at mile 12 (not 10), requiring a second tower. + It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), + covering the remaining houses. So 2 towers would be enough.' diff --git a/python/packages/ame/src/ame/tasks/100_vampires.yaml b/python/packages/ame/src/ame/task_data/tasks/100_vampires.yaml similarity index 98% rename from python/packages/ame/src/ame/tasks/100_vampires.yaml rename to python/packages/ame/src/ame/task_data/tasks/100_vampires.yaml index 7a18b728981f..2e2341d91fd1 100644 --- a/python/packages/ame/src/ame/tasks/100_vampires.yaml +++ b/python/packages/ame/src/ame/task_data/tasks/100_vampires.yaml @@ -1,7 +1,5 @@ # From GAIA L1 -name: 100_vampires - task_description: "You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021B\ cu IV, son of Costea, has tasked you with investigating the village of \u0218\ irnea in neighboring Wallachia. The Count's advisors have reported that a vampire\ diff --git a/python/packages/ame/src/ame/tasks/10_liars.yaml b/python/packages/ame/src/ame/task_data/tasks/10_liars.yaml similarity index 95% rename from python/packages/ame/src/ame/tasks/10_liars.yaml rename to python/packages/ame/src/ame/task_data/tasks/10_liars.yaml index 053cecd15ba4..096e12775935 100644 --- a/python/packages/ame/src/ame/tasks/10_liars.yaml +++ b/python/packages/ame/src/ame/task_data/tasks/10_liars.yaml @@ -1,7 +1,5 @@ # Similar to the 100 vampires task, for testing generalization from one to the other. -name: 10_liars - task_description: 'You ask ten people ''How many of you are liars?'' They all answer ''At least one of us is not a liar.'' You happen to know that at least one of them IS a liar. diff --git a/python/packages/ame/src/ame/tasks/3_to_third.yaml b/python/packages/ame/src/ame/task_data/tasks/3_to_third.yaml similarity index 84% rename from python/packages/ame/src/ame/tasks/3_to_third.yaml rename to python/packages/ame/src/ame/task_data/tasks/3_to_third.yaml index d5b4dfd172be..ab9a7ffe0133 100644 --- a/python/packages/ame/src/ame/tasks/3_to_third.yaml +++ b/python/packages/ame/src/ame/task_data/tasks/3_to_third.yaml @@ -1,7 +1,5 @@ # A simple test that doesn't require memory. -name: 3_to_third - task_description: What is 3^3? expected_answer: '27' diff --git a/python/packages/ame/src/ame/tasks/autogen_package.yaml b/python/packages/ame/src/ame/task_data/tasks/autogen_package.yaml similarity index 92% rename from python/packages/ame/src/ame/tasks/autogen_package.yaml rename to python/packages/ame/src/ame/task_data/tasks/autogen_package.yaml index 091090982f75..f80840b30073 100644 --- a/python/packages/ame/src/ame/tasks/autogen_package.yaml +++ b/python/packages/ame/src/ame/task_data/tasks/autogen_package.yaml @@ -1,7 +1,5 @@ # Test where human advice is needed. -name: autogen_package - task_description: As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen? expected_answer: It's best to have your agent in its own repo, then add the autogen-extension topic to that repo. diff --git a/python/packages/ame/src/ame/tasks/cell_towers.yaml b/python/packages/ame/src/ame/task_data/tasks/cell_towers_1.yaml similarity index 96% rename from python/packages/ame/src/ame/tasks/cell_towers.yaml rename to python/packages/ame/src/ame/task_data/tasks/cell_towers_1.yaml index c89e2635e21a..f86e370db3ee 100644 --- a/python/packages/ame/src/ame/tasks/cell_towers.yaml +++ b/python/packages/ame/src/ame/task_data/tasks/cell_towers_1.yaml @@ -1,7 +1,5 @@ # File-free version of a GAIA L1 task. -name: cell_towers - task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. Houses are located at mile markers 16, 17, 19, 11, 9, 10, 2, 5, 4. Each cell phone tower can cover houses located next to the road within a 4-mile radius. diff --git a/python/packages/ame/src/ame/task_data/tasks/cell_towers_2.yaml b/python/packages/ame/src/ame/task_data/tasks/cell_towers_2.yaml new file mode 100644 index 000000000000..5ddc046920c9 --- /dev/null +++ b/python/packages/ame/src/ame/task_data/tasks/cell_towers_2.yaml @@ -0,0 +1,9 @@ +# Similar to the cell_towers_1 task. + +task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. + Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. + Each cell phone tower can cover houses located next to the road within a 4-mile radius. + Find the minimum number of cell phone towers needed to cover all houses next to the road. + Your answer should be a positive numerical integer value. + +expected_answer: '2' From b3fe08486dd83a8124149c3209b5190b22c04278 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 21 Jan 2025 10:26:48 -0800 Subject: [PATCH 41/93] simplify page_log --- .../ame/src/ame/clients/_client_wrapper.py | 5 +- python/packages/ame/src/ame/eval.py | 115 ++++++++-------- .../eval_learning_from_demonstration.py | 5 +- .../ame/eval_functions/eval_self_teaching.py | 5 +- .../ame/eval_functions/eval_teachability.py | 5 +- .../eval_functions/eval_without_learning.py | 5 +- .../packages/ame/src/ame/settings/check.yaml | 2 +- .../autogen_ext/apprentice/_agent_wrapper.py | 21 +-- .../apprentice/_agentic_memory_bank.py | 15 ++- .../apprentice/_agentic_memory_controller.py | 54 ++------ .../src/autogen_ext/apprentice/_grader.py | 24 ++-- .../src/autogen_ext/apprentice/_page_log.py | 94 +++---------- .../src/autogen_ext/apprentice/_prompter.py | 126 +++++------------- .../src/autogen_ext/apprentice/apprentice.py | 20 +-- 14 files changed, 155 insertions(+), 341 deletions(-) diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index 91172d70c7f3..e7385a574dfb 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -18,10 +18,7 @@ class ClientWrapper: or check the messages and replay the responses (in check-replay mode). """ def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, page_log: PageLog) -> None: - page = page_log.begin_page( - summary="ClientWrapper.__init__", - details='', - method_call="ClientWrapper.__init__") + page = page_log.begin_page(summary="ClientWrapper.__init__") self.base_client = base_client self.mode = mode diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 792cbbeb4fc0..fd98c0fa1ccd 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -37,10 +37,7 @@ def get_demo_from_file(self, demo_filename): async def test_fast_learner(self, fast_learner, task_description, expected_answer, num_trials, use_memory, client, page_log) -> Tuple[int, int]: - page = page_log.begin_page( - summary="Evaluator.test_fast_learner", - details='', - method_call="Evaluator.test_fast_learner") + page = page_log.begin_page(summary="Evaluator.test_fast_learner") page.add_lines("Testing the fast learner on the given task.\n", flush=True) @@ -64,70 +61,70 @@ async def test_fast_learner(self, fast_learner, task_description, expected_answe page_log.finish_page(page) return num_successes, num_trials - async def run(self, settings_filepath): - # Load the settings from yaml. - with open(settings_filepath, "r") as file: - settings = yaml.load(file, Loader=yaml.FullLoader) - evaluator_settings = settings["Evaluator"] - - # Create the PageLog. - self.page_log = PageLog(evaluator_settings["PageLog"]) - page = self.page_log.begin_page( - summary="Evaluator.main", - details='', - method_call="Evaluator.main") - - # Create the client, passed to both the fast_learner and the evaluator. - client_creator = ClientCreator(settings=settings["client"], page_log=self.page_log) - client = client_creator.create_client() - - # Create the specified fast_learner implementation. - fast_learner_settings = settings["fast_learning_agent"] - module_path = fast_learner_settings["module_path"] + async def perform_evaluations(self, settings, page_log): + page = self.page_log.begin_page(summary="Evaluator.perform_evaluations") + + # Create the client, passed to both the fast_learner and the evaluator. + client_creator = ClientCreator(settings=settings["client"], page_log=self.page_log) + client = client_creator.create_client() + + # Create the specified fast_learner implementation. + fast_learner_settings = settings["fast_learning_agent"] + module_path = fast_learner_settings["module_path"] + try: + module = importlib.import_module(module_path) + except ModuleNotFoundError: + print('Failed to import {}'.format(module_path)) + raise + class_name = fast_learner_settings["class_name"] + try: + fast_learner_class = getattr(module, class_name) + except AttributeError: + print('Failed to import {}.{}'.format(module_path, class_name)) + raise + try: + fast_learner = fast_learner_class(fast_learner_settings, self, client, self.page_log) + except Exception as err: + print("Error creating \"{}\": {}".format(fast_learner_class, err)) + raise + + # Execute each evaluation. + for evaluation_settings in settings["evaluations"]: + # Import the function. + function_settings = evaluation_settings["eval_function"] + module_path = function_settings["module_path"] try: module = importlib.import_module(module_path) except ModuleNotFoundError: print('Failed to import {}'.format(module_path)) raise - class_name = fast_learner_settings["class_name"] + function_name = function_settings["function_name"] try: - fast_learner_class = getattr(module, class_name) + eval_function = getattr(module, function_name) except AttributeError: - print('Failed to import {}.{}'.format(module_path, class_name)) - raise - try: - fast_learner = fast_learner_class(fast_learner_settings, self, client, self.page_log) - except Exception as err: - print("Error creating \"{}\": {}".format(fast_learner_class, err)) + print('Failed to import {}.{}'.format(module_path, function_name)) raise - # Execute each evaluation. - for evaluation_settings in settings["evaluations"]: - # Import the function. - function_settings = evaluation_settings["eval_function"] - module_path = function_settings["module_path"] - try: - module = importlib.import_module(module_path) - except ModuleNotFoundError: - print('Failed to import {}'.format(module_path)) - raise - function_name = function_settings["function_name"] - try: - eval_function = getattr(module, function_name) - except AttributeError: - print('Failed to import {}.{}'.format(module_path, function_name)) - raise - - # Call the eval function for each listed run. - for run_dict in evaluation_settings["runs"]: - await eval_function(fast_learner, self, client, self.page_log, function_settings, run_dict) - - if hasattr(client, "finalize"): - # If this is a client wrapper, it needs to be finalized. - client.finalize() - - self.page_log.flush(final=True) # Finalize the page log - self.page_log.finish_page(page) + # Call the eval function for each listed run. + for run_dict in evaluation_settings["runs"]: + await eval_function(fast_learner, self, client, self.page_log, function_settings, run_dict) + + if hasattr(client, "finalize"): + # If this is a client wrapper, it needs to be finalized. + client.finalize() + + self.page_log.flush(final=True) # Finalize the page log + self.page_log.finish_page(page) + + async def run(self, settings_filepath): + # Load the settings from yaml. + with open(settings_filepath, "r") as file: + settings = yaml.load(file, Loader=yaml.FullLoader) + evaluator_settings = settings["Evaluator"] + self.page_log = PageLog(evaluator_settings["PageLog"]) + + # Perform the evaluations. + await self.perform_evaluations(settings, self.page_log) if __name__ == "__main__": diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index b20553436ff2..7d4f776daf35 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -1,10 +1,7 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, page_log, settings, run_dict): """An evaluation""" - page = page_log.begin_page( - summary="eval_learning_from_demonstration", - details='', - method_call="eval_learning_from_demonstration") + page = page_log.begin_page(summary="eval_learning_from_demonstration") num_trials = settings["num_trials"] diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index 3edf7551d3dd..719a3c70d8bf 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -1,10 +1,7 @@ async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings, run_dict): """An evaluation""" - page = page_log.begin_page( - summary="eval_self_teaching", - details='', - method_call="eval_self_teaching") + page = page_log.begin_page(summary="eval_self_teaching") num_loops = settings["num_loops"] num_final_test_trials = settings["num_final_test_trials"] diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index ce7ff7580e78..90d537d0f586 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -3,10 +3,7 @@ async def eval_teachability(fast_learner, evaluator, client, page_log, settings, run_dict): """An evaluation""" - page = page_log.begin_page( - summary="eval_teachability", - details='', - method_call="eval_teachability") + page = page_log.begin_page(summary="eval_teachability") # This eval function needs 2 data strings for each run. task_file = run_dict["task_file"] # The task being tested. diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index 628f047f13a6..91d0a6905f56 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -1,10 +1,7 @@ async def eval_without_learning(fast_learner, evaluator, client, page_log, settings, run_dict): """An evaluation""" - page = page_log.begin_page( - summary="eval_without_learning", - details='', - method_call="eval_without_learning") + page = page_log.begin_page(summary="eval_without_learning") num_trials = settings["num_trials"] diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index c8d40950aab0..83618b9936e7 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -1,7 +1,7 @@ Evaluator: PageLog: - path: ~/pagelogs/temp4 + path: ~/pagelogs/temp5 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py index 542bfa19c2de..ac370a2e778b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py @@ -21,10 +21,7 @@ async def assign_task(self, task): """ Assigns a task to the base agent. """ - page = self.page_log.begin_page( - summary="AgentWrapper.assign_task", - details="", - method_call="AgentWrapper.assign_task") + page = self.page_log.begin_page(summary="AgentWrapper.assign_task") # Pass the task through to the base agent. if self.base_agent_name == "MagenticOneGroupChat": @@ -38,10 +35,7 @@ async def assign_task(self, task): return response, work_history async def assign_task_to_thin_agent(self, task): - page = self.page_log.begin_page( - summary="AgentWrapper.assign_task_to_thin_agent", - details='', - method_call="AgentWrapper.assign_task_to_thin_agent") + page = self.page_log.begin_page(summary="AgentWrapper.assign_task_to_thin_agent") page.add_lines(task) @@ -67,10 +61,8 @@ async def assign_task_to_thin_agent(self, task): response_str = response.content # Log the model call - self.page_log.add_model_call(description="Ask the model", - details="to complete the task", input_messages=input_messages, - response=response, - num_input_tokens=0, caller='assign_task_to_client') + self.page_log.add_model_call(summary="Ask the model to complete the task", + input_messages=input_messages, response=response) page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) # Use the response as the work history as well. @@ -80,10 +72,7 @@ async def assign_task_to_thin_agent(self, task): return response_str, work_history async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: - page = self.page_log.begin_page( - summary="AgentWrapper.assign_task_to_magentic_one", - details='', - method_call="AgentWrapper.assign_task_to_magentic_one") + page = self.page_log.begin_page(summary="AgentWrapper.assign_task_to_magentic_one") page.add_lines(task) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index 8e781e828c6b..f2972c9570ff 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -29,14 +29,15 @@ def __init__(self, - reset (Optional, bool): True to clear the DB before starting. Default False - page_log (Optional, PageLog): the PageLog object to use for logging. """ + page = page_log.begin_page(summary="AgenticMemoryBank.__init__") + self.settings = settings memory_dir_path = os.path.expanduser(self.settings["path"]) path_to_db_dir = os.path.join(memory_dir_path, "string_map") self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") self.page_log = page_log - parent_page = self.page_log.last_page() - parent_page.add_lines("Creating AgenticMemoryBank", flush=True) + page.add_lines("Creating AgenticMemoryBank", flush=True) self.string_map = StringSimilarityMap(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir) @@ -44,17 +45,19 @@ def __init__(self, self.uid_insight_dict = {} self.last_insight_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - parent_page.add_lines("\nLOADING INSIGHTS FROM DISK {}".format(self.path_to_dict)) - parent_page.add_lines(" Location = {}".format(self.path_to_dict)) + page.add_lines("\nLOADING INSIGHTS FROM DISK {}".format(self.path_to_dict)) + page.add_lines(" Location = {}".format(self.path_to_dict)) with open(self.path_to_dict, "rb") as f: self.uid_insight_dict = pickle.load(f) self.last_insight_id = len(self.uid_insight_dict) - parent_page.add_lines("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) + page.add_lines("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) # Clear the DB if requested. if reset: self.reset_insights() + self.page_log.finish_page(page) + def reset(self): self.string_map.reset_db() self.reset_insights() @@ -69,8 +72,6 @@ def contains_insights(self): def save_insights(self): self.string_map.save_string_pairs() - parent_page = self.page_log.last_page() - parent_page.add_lines("\nSAVING INSIGHTS TO DISK {}".format(self.path_to_dict)) with open(self.path_to_dict, "wb") as file: pickle.dump(self.uid_insight_dict, file) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py index 979896687a78..1c483df6e609 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py @@ -6,6 +6,8 @@ class AgenticMemoryController: def __init__(self, settings, agent, reset, client, page_log): + page = page_log.begin_page(summary="AgenticMemoryController.__init__") + self.settings = settings self.agent = agent self.client = client @@ -15,6 +17,8 @@ def __init__(self, settings, agent, reset, client, page_log): verbosity=0, reset=reset, page_log=page_log) self.grader = Grader(client, page_log) + self.page_log.finish_page(page) + def reset_memory(self): self.memory_bank.reset() @@ -25,10 +29,7 @@ async def train_on_task(self, """ Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ - page = self.page_log.begin_page( - summary="AgenticMemoryController.train_on_task", - details="", - method_call="AgenticMemoryController.train_on_task") + page = self.page_log.begin_page(summary="AgenticMemoryController.train_on_task") # Attempt to create useful new memories. page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) @@ -47,10 +48,7 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1): """ Assigns a task to the completion agent, along with any relevant insights/memories. """ - page = self.page_log.begin_page( - summary="AgenticMemoryController.test_on_task", - details="", - method_call="AgenticMemoryController.test_on_task") + page = self.page_log.begin_page(summary="AgenticMemoryController.test_on_task") response = None num_successes = 0 @@ -88,10 +86,7 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1): async def add_insight_to_memory(self, task: str, insight: str): # Adds an insight to the DB. - page = self.page_log.begin_page( - summary="AgenticMemoryController.add_insight_to_memory", - details="", - method_call="AgenticMemoryController.add_insight_to_memory") + page = self.page_log.begin_page(summary="AgenticMemoryController.add_insight_to_memory") page.add_lines("\nGIVEN TASK:") page.add_lines(task) @@ -116,10 +111,7 @@ async def add_insight_to_memory(self, task: str, insight: str): async def add_insight_without_task_to_memory(self, insight: str): # Adds an insight to the DB. - page = self.page_log.begin_page( - summary="AgenticMemoryController.add_insight_without_task_to_memory", - details="", - method_call="AgenticMemoryController.add_insight_without_task_to_memory") + page = self.page_log.begin_page(summary="AgenticMemoryController.add_insight_without_task_to_memory") page.add_lines("\nGIVEN INSIGHT:") page.add_lines(insight) @@ -137,10 +129,7 @@ async def add_insight_without_task_to_memory(self, insight: str): async def retrieve_relevant_insights(self, task: str): # Retrieve insights from the DB that are relevant to the task. - page = self.page_log.begin_page( - summary="AgenticMemoryController.retrieve_relevant_insights", - details="", - method_call="AgenticMemoryController.retrieve_relevant_insights") + page = self.page_log.begin_page(summary="AgenticMemoryController.retrieve_relevant_insights") if self.memory_bank.contains_insights(): page.add_lines("\nCURRENT TASK:") @@ -191,10 +180,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a """ Attempts to solve the given task multiple times to find a failure case to learn from. """ - page = self.page_log.begin_page( - summary="AgenticMemoryController._test_for_failure", - details="", - method_call="AgenticMemoryController._test_for_failure") + page = self.page_log.begin_page(summary="AgenticMemoryController._test_for_failure") page.add_lines("\nTask description, including any insights: {}".format(task_plus_insights)) page.add_lines("\nExpected answer: {}\n".format(expected_answer)) @@ -223,10 +209,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a return failure_found, response, work_history async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int): - page = self.page_log.begin_page( - summary="AgenticMemoryController._iterate_on_task", - details="", - method_call="AgenticMemoryController._iterate_on_task") + page = self.page_log.begin_page(summary="AgenticMemoryController._iterate_on_task") page.add_lines("\nTask description: {}".format(task)) page.add_lines("\nExpected answer: {}\n".format(expected_answer)) @@ -290,10 +273,7 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo """ Assigns a task to the agent, along with any relevant insights/memories. """ - page = self.page_log.begin_page( - summary="AgenticMemoryController.assign_task", - details="", - method_call="AgenticMemoryController.assign_task") + page = self.page_log.begin_page(summary="AgenticMemoryController.assign_task") if use_memory: # Try to retrieve any relevant memories from the DB. @@ -316,10 +296,7 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo return response async def handle_user_message(self, text, should_await=True): - page = self.page_log.begin_page( - summary="AgenticMemoryController.handle_user_message", - details="", - method_call="AgenticMemoryController.handle_user_message") + page = self.page_log.begin_page(summary="AgenticMemoryController.handle_user_message") advice = await self.prompter.extract_advice(text) page.add_lines("Advice: {}".format(advice), flush=True) @@ -333,10 +310,7 @@ async def handle_user_message(self, text, should_await=True): return response async def learn_from_demonstration(self, task, demonstration): - page = self.page_log.begin_page( - summary="AgenticMemoryController.learn_from_demonstration", - details="", - method_call="AgenticMemoryController.learn_from_demonstration") + page = self.page_log.begin_page(summary="AgenticMemoryController.learn_from_demonstration") page.add_lines("\nEXAMPLE TASK:") page.add_lines(task) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index 88d122f01aaf..423ac25fa6ad 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -22,7 +22,7 @@ def __init__(self, client, page_log): # Create the chat history self._chat_history: List[LLMMessage] = [] - async def call_model(self, details, user_content: UserContent = None, system_message_content=None, keep_these_messages=True): + async def call_model(self, summary, user_content: UserContent = None, system_message_content=None, keep_these_messages=True): # Prepare the input message list if system_message_content is None: system_message_content = "You are a helpful assistant." @@ -45,8 +45,7 @@ async def call_model(self, details, user_content: UserContent = None, system_mes assert isinstance(response_message, AssistantMessage) # Log the model call - parent_page = self.page_log.add_model_call(description="Ask the model", - details=details, input_messages=input_messages, response=response, caller='Grader') + self.page_log.add_model_call(summary=summary, input_messages=input_messages, response=response) # Manage the chat history if keep_these_messages: @@ -54,7 +53,7 @@ async def call_model(self, details, user_content: UserContent = None, system_mes self._chat_history.append(response_message) # Return the response as a string for now - return response_string, parent_page + return response_string def remove_last_turn(self): if len(self._chat_history) > 0: @@ -65,10 +64,7 @@ def clear_history(self): async def is_response_correct(self, task_description, response_to_be_graded, correct_answer): # Returns only the insights that the client verifies are relevant to the task. - page = self.page_log.begin_page( - summary="Grader.is_response_correct", - details="", - method_call="Grader.is_response_correct") + page = self.page_log.begin_page(summary="Grader.is_response_correct") sys_message = """You are a helpful and thoughtful assistant.""" @@ -84,10 +80,8 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor user_message.append("\n# Text that may contain an answer") user_message.append(response_to_be_graded) self.clear_history() - extracted_answer, _ = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to extract the answer") + extracted_answer = await self.call_model(summary="Ask the model to extract the answer", + system_message_content=sys_message, user_content=user_message) page.add_lines("Extracted answer: " + extracted_answer) user_message = ["""Your job is to decide whether a given answer to a task is correct or not. @@ -105,10 +99,8 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor user_message.append("\n# Answer to be graded") user_message.append(extracted_answer) self.clear_history() - decision, _ = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to check the answer for correctness") + decision = await self.call_model(summary="Ask the model to check the answer for correctness", + system_message_content=sys_message, user_content=user_message) page.add_lines("Decision: " + decision) self.page_log.finish_page(page) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py index a7a371764ee9..863b3d48aa1d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py @@ -17,7 +17,7 @@ class Page: - def __init__(self, page_log, index, summary, details, method_call, indent_level, show_in_overview=True, final=True): + def __init__(self, page_log, index, summary, indent_level, show_in_overview=True, final=True): self.page_log = page_log self.index_str = str(index) self.link_text = None @@ -27,32 +27,25 @@ def __init__(self, page_log, index, summary, details, method_call, indent_level, self.line_text = None self.indentation_text = None self.summary = summary - self.details = details - self.method_call = method_call self.indent_level = indent_level self.show_in_overview = show_in_overview self.final = final - self.compose_line(details) + self.compose_line() self.lines = [] self.flush() - def compose_line(self, details, flush=False): - self.details = details + def compose_line(self, flush=False): self.link_text = self.index_str + ' ' + self.summary self.indentation_text = "" for i in range(self.indent_level): self.indentation_text += "| " - self.file_title = self.link_text + ' ' + self.details + self.file_title = self.link_text self.full_link = self.link_to_page_file() - self.unindented_line_text = self.full_link + ' ' + self.details + self.unindented_line_text = self.full_link self.line_text = self.indentation_text + self.unindented_line_text if flush: self.flush() - def update_details(self, details): - self.compose_line(details, flush=True) - self.page_log.flush() - def link_to_page_file(self): return f'{self.link_text}' @@ -85,10 +78,6 @@ def add_link_to_image(self, description, source_image_path): self.add_lines('\n' + description) self.add_lines(self.link_to_image(target_image_filename, description), flush=True) - def delete_last_line(self): - if len(self.lines) > 0: - self.lines.pop() - def flush(self): page_path = os.path.join(self.page_log.log_dir, self.index_str + ".html") with open(page_path, "w") as f: @@ -115,7 +104,6 @@ def __init__(self, settings): self.exit_lines = [] self.name = "0 Overview" self.create_run_dir() - self.token_counts_path = self.create_token_counts_file() self.flush() def get_next_page_id(self): @@ -128,17 +116,6 @@ def create_run_dir(self): shutil.rmtree(self.log_dir) os.makedirs(self.log_dir) - def create_token_counts_file(self): - token_counts_path = os.path.join(self.log_dir, "token_counts.csv") - f = open(token_counts_path, "w") - f.close() # The file starts empty and will be appended to later. - return token_counts_path - - def write_token_count(self, num_input_tokens, caller, details_path=None): - # Write the number of input tokens to the file, with caller and path to other details. - with open(self.token_counts_path, "a") as f: - f.write(f"{num_input_tokens},{caller},{details_path}\n") - def html_opening(self, file_title, final=False): # Return the opening text of a simple HTML file. refresh_tag = '' if not final else "" @@ -160,13 +137,11 @@ def html_closing(self): # Return the closing text of a simple HTML file. return """""" - def add_page(self, summary, details, method_call=None, show_in_overview=True, final=True): + def add_page(self, summary, show_in_overview=True, final=True): # Add a page to the log. page = Page(page_log=self, index=self.get_next_page_id(), summary=summary, - details=details, - method_call=method_call, indent_level=len(self.page_stack.stack), show_in_overview=show_in_overview, final=final) @@ -237,51 +212,25 @@ def message_content(self, page, message=None, message_content=None): output += f"\n{item}\n" return output - def add_message_content(self, message_content, summary, details=""): + def add_message_content(self, message_content, summary): # Add a page containing a message's content. - page = self.add_page(summary=summary, - details=details, - show_in_overview=False) + page = self.add_page(summary=summary, show_in_overview=False) self.page_stack.write_stack_to_page(page) page.add_lines(self.message_content(page, message_content=message_content)) page.flush() - def add_broadcast_message(self, message, operation): - # Add a page containing a message being broadcast. - page = self.add_page(summary="Broadcast Message", - details=operation, - method_call="broadcast message", - show_in_overview=False) - self.page_stack.write_stack_to_page(page) - page.add_lines(self.message_source(message)) - page.add_lines(self.message_content(page, message=message)) - page.flush() - - def add_model_call(self, description, details, input_messages, response, - tools=None, json_output=None, extra_create_args=None, - num_input_tokens=None, caller=None): + def add_model_call(self, summary, input_messages, response): # Add a model call to the log. - page = self.add_page(summary=description, - details=details, - method_call="model call", - show_in_overview=False) + page = self.add_page(summary=summary, show_in_overview=False) self.page_stack.write_stack_to_page(page) - if num_input_tokens is not None and num_input_tokens > 0: - page.add_lines("{} prompt tokens from count_tokens".format(num_input_tokens)) page.add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) page.add_lines("{} completion tokens".format(response.usage.completion_tokens)) for i, m in enumerate(input_messages): page.add_lines('\n' + self.message_source(m)) page.add_lines(self.message_content(page, message=m)) page.add_lines("\n" + self.decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) - if response is None: - page.add_lines("\n TOO MANY INPUT TOKENS, NO RESPONSE GENERATED") - else: - page.add_lines(self.message_content(page, message=response)) + page.add_lines(self.message_content(page, message=response)) page.flush() - if num_input_tokens is not None and caller is not None: - # Add a line to the token count file. - self.write_token_count(num_input_tokens, caller, page.index_str + ".html") return page def prepend_entry_line(self, line): @@ -301,12 +250,6 @@ def link_to_local_file(self, file_path): link = f'{file_name}' return link - def last_page(self): - if len(self.page_stack.stack) > 0: - return self.page_stack.stack[-1] - else: - return None - def flush(self, final=False): # Create an overview of the log. overview_path = os.path.join(self.log_dir, self.name + ".html") @@ -325,25 +268,22 @@ def flush(self, final=False): f.write(self.html_closing()) time.sleep(0.1) - def begin_page(self, summary, details, method_call, show_in_overview=True): + def begin_page(self, summary, show_in_overview=True): + assert show_in_overview # Perform a set of logging actions that are often performed at the beginning of a caller's method. - page = self.add_page( - summary=summary, - details=details, - method_call=method_call, - show_in_overview=show_in_overview, - final=False) + page = self.add_page(summary=summary, show_in_overview=show_in_overview, final=False) self.page_stack.push(page) self.page_stack.write_stack_to_page(page) - page.add_lines("\nENTER {}".format(method_call), flush=True) + page.add_lines("\nENTER {}".format(summary), flush=True) return page def finish_page(self, page): # Perform a set of logging actions that are often performed at the end of a caller's method. + page = self.page_stack.top() page.final = True - page.add_lines("LEAVE {}".format(page.method_call), flush=True) + page.add_lines("LEAVE {}".format(page.summary), flush=True) self.page_stack.pop() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py index ad4e28312c61..eeedc39aabf9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py @@ -26,7 +26,8 @@ def __init__(self, client, page_log): # Create the chat history self._chat_history: List[LLMMessage] = [] - async def call_model(self, details, user_content: UserContent = None, system_message_content=None, keep_these_messages=True): + async def call_model(self, summary, user_content: UserContent = None, system_message_content=None, + keep_these_messages=True): # Prepare the input message list if system_message_content is None: system_message_content = self.default_system_message_content @@ -50,50 +51,27 @@ async def call_model(self, details, user_content: UserContent = None, system_mes # Call the model start_time = time.time() + response = await self.client.create(input_messages) - # Optional code to pre-count tokens. - # num_input_tokens = self.client.count_tokens(input_messages) - num_input_tokens = 0 - max_input_tokens_per_call = None # This is a placeholder value. - if (max_input_tokens_per_call is not None) and (num_input_tokens > max_input_tokens_per_call): - # The input is too large. - response = None - else: - # Call the model. - response = await self.client.create(input_messages) - - if response is None: - parent_page = self.page_log.add_model_call(description="Ask the model", - details=details + " ({:,} TOO MANY INPUT TOKENS)".format(num_input_tokens), - input_messages=input_messages, response=None, num_input_tokens=num_input_tokens, caller='Orchestrator') - assert False, "TOO MANY INPUT TOKENS" - response_string = "" - else: - assert isinstance(response, CreateResult) - response_string = response.content - assert isinstance(response_string, str) - response_message = AssistantMessage(content=response_string, source="Assistant") - assert isinstance(response_message, AssistantMessage) + assert isinstance(response, CreateResult) + response_string = response.content + assert isinstance(response_string, str) + response_message = AssistantMessage(content=response_string, source="Assistant") + assert isinstance(response_message, AssistantMessage) - self.time_spent_in_model_calls += time.time() - start_time - self.num_model_calls += 1 + self.time_spent_in_model_calls += time.time() - start_time + self.num_model_calls += 1 - # Log the model call - parent_page = self.page_log.add_model_call(description="Ask the model", - details=details, input_messages=input_messages, response=response, - num_input_tokens=num_input_tokens, caller='Orchestrator') + # Log the model call + self.page_log.add_model_call(summary=summary, input_messages=input_messages, response=response) - # Manage the chat history - if keep_these_messages: - self._chat_history.append(user_message) - self._chat_history.append(response_message) + # Manage the chat history + if keep_these_messages: + self._chat_history.append(user_message) + self._chat_history.append(response_message) # Return the response as a string for now - return response_string, parent_page - - def remove_last_turn(self): - if len(self._chat_history) > 0: - self._chat_history.pop() + return response_string def clear_history(self): self._chat_history = [] @@ -127,30 +105,16 @@ async def learn_from_failure(self, task_description, memory_section, final_respo "# Now carefully review the students' work above, explaining in detail what the students did right and what they did wrong.\n") self.clear_history() - response1, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to learn from this failure") - + await self.call_model(summary="Ask the model to learn from this failure", + system_message_content=sys_message, user_content=user_message) user_message = [ "Now put yourself in the mind of the students. What misconception led them to their incorrect answer?"] - response2, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to state the misconception") + await self.call_model(summary="Ask the model to state the misconception", + system_message_content=sys_message, user_content=user_message) user_message = ["Please express your key insights in the form of short, general advice that will be given to the students. Just one or two sentences, or they won't bother to read it."] - # if len(insights) > 0: - # memory_section = "\n## The following insights and advice were given to the students previously, but they didn't help. So do not repeat any of the following:\n" - # for insight in insights: - # memory_section += ('- ' + insight + '\n') - # user_message.append(memory_section) - - insight, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to formulate a concise insight") - + insight = await self.call_model(summary="Ask the model to formulate a concise insight", + system_message_content=sys_message, user_content=user_message) return insight async def find_index_topics(self, input_string): @@ -171,10 +135,8 @@ async def find_index_topics(self, input_string): user_message.append(input_string) self.clear_history() - topics, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to extract topics") + topics = await self.call_model(summary="Ask the model to extract topics", + system_message_content=sys_message, user_content=user_message) # Parse the topics into a python list. topic_list = [] @@ -194,23 +156,16 @@ async def generalize_task(self, task_description): user_message.append(task_description) self.clear_history() - response1, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to rephrase the task in a list of important points") + await self.call_model(summary="Ask the model to rephrase the task in a list of important points", + system_message_content=sys_message, user_content=user_message) user_message = ["Do you see any parts of this list that are irrelevant to actually solving the task? If so, explain which items are irrelevant."] - response2, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to identify irrelevant points") + await self.call_model(summary="Ask the model to identify irrelevant points", + system_message_content=sys_message, user_content=user_message) user_message = ["Revise your original list to include only the most general terms, those that are critical to solving the task, removing any themes or descriptions that are not essential to the solution. Your final list may be shorter, but do not leave out any part of the task that is needed for solving the task. Do not add any additional commentary either before or after the list."] - generalized_task, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to make a final list of general terms") - + generalized_task = await self.call_model(summary="Ask the model to make a final list of general terms", + system_message_content=sys_message, user_content=user_message) return generalized_task async def validate_insight(self, insight, task_description): @@ -228,11 +183,8 @@ async def validate_insight(self, insight, task_description): user_message.append("\n# Possibly useful insight") user_message.append(insight) self.clear_history() - response, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to validate the insight") - + response = await self.call_model(summary="Ask the model to validate the insight", + system_message_content=sys_message, user_content=user_message) return response == "1" async def extract_task(self, text): @@ -245,10 +197,8 @@ async def extract_task(self, text): user_message.append("\n# Text to analyze") user_message.append(text) self.clear_history() - response, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to extract a task") + response = await self.call_model(summary="Ask the model to extract a task", + system_message_content=sys_message, user_content=user_message) return response if response != "None" else None async def extract_advice(self, text): @@ -260,8 +210,6 @@ async def extract_advice(self, text): user_message.append("\n# Text to analyze") user_message.append(text) self.clear_history() - response, page = await self.call_model( - system_message_content=sys_message, - user_content=user_message, - details="to extract advice") + response = await self.call_model(summary="Ask the model to extract advice", + system_message_content=sys_message, user_content=user_message) return response if response != "None" else None diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index da79f2731cdc..cbb55d039bbc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -28,10 +28,7 @@ def reset_memory(self): async def handle_user_message(self, text, should_await=True): """A foreground operation, intended for immediate response to the user.""" - page = self.page_log.begin_page( - summary="Apprentice.handle_user_message", - details="", - method_call="Apprentice.handle_user_message") + page = self.page_log.begin_page(summary="Apprentice.handle_user_message") # Pass the user message through to the memory controller. response = await self.memory_controller.handle_user_message(text, should_await) @@ -41,10 +38,7 @@ async def handle_user_message(self, text, should_await=True): async def learn_from_demonstration(self, task, demonstration): """A foreground operation, assuming that the task and demonstration are already known.""" - page = self.page_log.begin_page( - summary="Apprentice.learn_from_demonstration", - details="", - method_call="Apprentice.learn_from_demonstration") + page = self.page_log.begin_page(summary="Apprentice.learn_from_demonstration") # Pass the task and demonstration through to the memory controller. await self.memory_controller.learn_from_demonstration(task, demonstration) @@ -55,10 +49,7 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo """ Assigns a task to the agent, along with any relevant insights/memories. """ - page = self.page_log.begin_page( - summary="Apprentice.assign_task", - details="", - method_call="Apprentice.assign_task") + page = self.page_log.begin_page(summary="Apprentice.assign_task") # Pass the task through to the memory controller. response = await self.memory_controller.assign_task(task, use_memory, should_await) @@ -68,10 +59,7 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo async def train_on_task(self, task, expected_answer): """A background operation, not intended for immediate response.""" - page = self.page_log.begin_page( - summary="Apprentice.train_on_task", - details="", - method_call="Apprentice.train_on_task") + page = self.page_log.begin_page(summary="Apprentice.train_on_task") # Pass the task through to the memory controller. await self.memory_controller.train_on_task(task, expected_answer) From 077615f22a1acc11a77365a77536de7c1fc4d442 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 21 Jan 2025 11:35:53 -0800 Subject: [PATCH 42/93] simplify page_log --- .../ame/src/ame/clients/_client_creator.py | 8 +++- .../ame/src/ame/clients/_client_wrapper.py | 22 ++++------- .../packages/ame/src/ame/settings/check.yaml | 2 +- .../src/autogen_ext/apprentice/_page_log.py | 39 ++++++------------- 4 files changed, 26 insertions(+), 45 deletions(-) diff --git a/python/packages/ame/src/ame/clients/_client_creator.py b/python/packages/ame/src/ame/clients/_client_creator.py index 3a9b83d45b10..e6dd13c75783 100644 --- a/python/packages/ame/src/ame/clients/_client_creator.py +++ b/python/packages/ame/src/ame/clients/_client_creator.py @@ -10,6 +10,8 @@ def __init__(self, settings, page_log): self.page_log = page_log def create_client(self): + page = self.page_log.begin_page(summary="ClientCreator.create_client") + # A few args are shared by all clients. args = {} args["model"] = self.settings["model"] @@ -35,8 +37,8 @@ def create_client(self): assert False, "Invalid client provider" # Log some details. - self.page_log.append_entry_line("Client: {}".format(client._resolved_model)) - self.page_log.append_entry_line(source) + page.add_lines("Client: {}".format(client._resolved_model)) + page.add_lines(source) # Check if the client should be wrapped. if "ClientWrapper" in self.settings: @@ -45,6 +47,8 @@ def create_client(self): # Wrap the client. client = ClientWrapper( client, wrapper_settings["mode"], wrapper_settings["session_name"], self.page_log) + + self.page_log.finish_page(page) return client def create_oai_client(self, args): diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index e7385a574dfb..e90bffd008e4 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -80,8 +80,7 @@ def check_and_replay_one_turn(self, messages): # Get the next recorded turn. if self.next_item_index >= len(self.recorded_items): error_str = "No more recorded items to check." - self.page_log.append_exit_line(error_str) - self.page_log.flush(final=True) + self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) recorded_turn = self.recorded_items[self.next_item_index] self.next_item_index += 1 @@ -89,8 +88,7 @@ def check_and_replay_one_turn(self, messages): # Check the current message list against the recorded message list. if "messages" not in recorded_turn: error_str = "Recorded turn doesn't contain a messages field. Perhaps a result was recorded instead." - self.page_log.append_exit_line(error_str) - self.page_log.flush(final=True) + self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) recorded_messages = recorded_turn["messages"] current_messages = self.convert_messages(messages) @@ -98,8 +96,7 @@ def check_and_replay_one_turn(self, messages): error_str = "Current message list doesn't match the recorded message list." self.page_log.add_message_content(recorded_messages, "recorded message list") self.page_log.add_message_content(current_messages, "current message list") - self.page_log.append_exit_line(error_str) - self.page_log.flush(final=True) # Finalize the page log + self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) assert current_messages == recorded_messages @@ -125,30 +122,27 @@ def check_result(self, result: Any) -> None: # Check a result. if self.next_item_index >= len(self.recorded_items): error_str = "No more recorded items to check." - self.page_log.append_exit_line(error_str) - self.page_log.flush(final=True) + self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) recorded_result = self.recorded_items[self.next_item_index] self.next_item_index += 1 if "result" not in recorded_result: error_str = "Recorded turn doesn't contain a result field. Perhaps a turn was recorded instead." - self.page_log.append_exit_line(error_str) - self.page_log.flush(final=True) + self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) if result != recorded_result["result"]: error_str = "Recorded result ({}) doesn't match the current result ({}).".format(recorded_result["result"], result) - self.page_log.append_exit_line(error_str) - self.page_log.flush(final=True) + self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) def finalize(self) -> None: self.report_result("Total items = " + str(self.next_item_index)) if self.mode == "record": self.save() - self.page_log.append_exit_line("Recorded session was saved to: " + self.path_to_output_file) + self.page_log.add_lines("Recorded session was saved to: " + self.path_to_output_file) elif self.mode == "check-replay": - self.page_log.append_exit_line("Recorded session was fully replayed and checked.") + self.page_log.add_lines("Recorded session was fully replayed and checked.") def save(self) -> None: # Save the recorded messages and responses to disk. diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 83618b9936e7..ff447386ff81 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -1,7 +1,7 @@ Evaluator: PageLog: - path: ~/pagelogs/temp5 + path: ~/pagelogs/temp6 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py index 863b3d48aa1d..816c1df99038 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py @@ -20,10 +20,8 @@ class Page: def __init__(self, page_log, index, summary, indent_level, show_in_overview=True, final=True): self.page_log = page_log self.index_str = str(index) - self.link_text = None self.full_link = None self.file_title = None - self.unindented_line_text = None self.line_text = None self.indentation_text = None self.summary = summary @@ -35,19 +33,17 @@ def __init__(self, page_log, index, summary, indent_level, show_in_overview=True self.flush() def compose_line(self, flush=False): - self.link_text = self.index_str + ' ' + self.summary + self.file_title = self.index_str + ' ' + self.summary self.indentation_text = "" for i in range(self.indent_level): self.indentation_text += "| " - self.file_title = self.link_text self.full_link = self.link_to_page_file() - self.unindented_line_text = self.full_link - self.line_text = self.indentation_text + self.unindented_line_text + self.line_text = self.indentation_text + self.full_link if flush: self.flush() def link_to_page_file(self): - return f'{self.link_text}' + return f'{self.file_title}' def add_lines(self, line, flush=False): # If the string 'line' consists of multiple lines, separate them into a list. @@ -100,8 +96,6 @@ def __init__(self, settings): self.page_stack = PageStack() self.pages = [] self.last_page_id = 0 - self.entry_lines = [] - self.exit_lines = [] self.name = "0 Overview" self.create_run_dir() self.flush() @@ -150,10 +144,15 @@ def add_page(self, summary, show_in_overview=True, final=True): if len(self.page_stack.stack) > 0: # Insert a link to the new page into the calling page. - self.page_stack.stack[-1].add_lines(page.unindented_line_text, flush=True) + self.add_lines('\n' + page.full_link, flush=True) return page + def add_lines(self, line, flush=False): + # Add lines to the current page (at the top of the page stack). + page = self.page_stack.top() + page.add_lines(line, flush=flush) + def message_source(self, message): source = "UNKNOWN" color = "black" @@ -233,18 +232,6 @@ def add_model_call(self, summary, input_messages, response): page.flush() return page - def prepend_entry_line(self, line): - self.entry_lines.insert(0, line) - - def append_entry_line(self, line): - self.entry_lines.append(line) - - def prepend_exit_line(self, line): - self.exit_lines.insert(0, line) - - def append_exit_line(self, line): - self.exit_lines.append(line) - def link_to_local_file(self, file_path): file_name = os.path.basename(file_path) link = f'{file_name}' @@ -255,16 +242,12 @@ def flush(self, final=False): overview_path = os.path.join(self.log_dir, self.name + ".html") with open(overview_path, "w") as f: f.write(self.html_opening("0 Overview", final=final)) - f.write(f"

{self.name}

\n") - for line in self.entry_lines: - f.write(line + "\n") + f.write(f"

{self.name}

") f.write("\n") for page in self.pages: if page.show_in_overview: f.write(page.line_text + "\n") f.write("\n") - for line in self.exit_lines: - f.write(line + "\n") f.write(self.html_closing()) time.sleep(0.1) @@ -283,7 +266,7 @@ def finish_page(self, page): # Perform a set of logging actions that are often performed at the end of a caller's method. page = self.page_stack.top() page.final = True - page.add_lines("LEAVE {}".format(page.summary), flush=True) + page.add_lines("\nLEAVE {}".format(page.summary), flush=True) self.page_stack.pop() From 884716842a0ef577ae1e0045fa2aaa14a3a0b1a7 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 21 Jan 2025 15:21:51 -0800 Subject: [PATCH 43/93] simplify page_log --- .../ame/src/ame/clients/_client_wrapper.py | 18 ++++++++++-------- .../packages/ame/src/ame/settings/check.yaml | 2 +- .../src/autogen_ext/apprentice/_page_log.py | 13 ++----------- 3 files changed, 13 insertions(+), 20 deletions(-) diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index e90bffd008e4..143aa783190f 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -79,7 +79,7 @@ def check_and_replay_one_turn(self, messages): # Compare the messages to the recorded messages, and return the recorded response. # Get the next recorded turn. if self.next_item_index >= len(self.recorded_items): - error_str = "No more recorded items to check." + error_str = "\nNo more recorded items to check." self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) recorded_turn = self.recorded_items[self.next_item_index] @@ -87,13 +87,13 @@ def check_and_replay_one_turn(self, messages): # Check the current message list against the recorded message list. if "messages" not in recorded_turn: - error_str = "Recorded turn doesn't contain a messages field. Perhaps a result was recorded instead." + error_str = "\nRecorded turn doesn't contain a messages field. Perhaps a result was recorded instead." self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) recorded_messages = recorded_turn["messages"] current_messages = self.convert_messages(messages) if current_messages != recorded_messages: - error_str = "Current message list doesn't match the recorded message list." + error_str = "\nCurrent message list doesn't match the recorded message list." self.page_log.add_message_content(recorded_messages, "recorded message list") self.page_log.add_message_content(current_messages, "current message list") self.page_log.add_lines(error_str, flush=True) @@ -121,28 +121,30 @@ def record_result(self, result: Any) -> None: def check_result(self, result: Any) -> None: # Check a result. if self.next_item_index >= len(self.recorded_items): - error_str = "No more recorded items to check." + error_str = "\nNo more recorded items to check." self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) recorded_result = self.recorded_items[self.next_item_index] self.next_item_index += 1 if "result" not in recorded_result: - error_str = "Recorded turn doesn't contain a result field. Perhaps a turn was recorded instead." + error_str = "\nRecorded turn doesn't contain a result field. Perhaps a turn was recorded instead." self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) if result != recorded_result["result"]: - error_str = "Recorded result ({}) doesn't match the current result ({}).".format(recorded_result["result"], result) + error_str = "\nRecorded result ({}) doesn't match the current result ({}).".format(recorded_result["result"], result) self.page_log.add_lines(error_str, flush=True) raise ValueError(error_str) def finalize(self) -> None: + page = self.page_log.begin_page(summary="ClientWrapper.finalize") self.report_result("Total items = " + str(self.next_item_index)) if self.mode == "record": self.save() - self.page_log.add_lines("Recorded session was saved to: " + self.path_to_output_file) + self.page_log.add_lines("\nRecorded session was saved to: " + self.path_to_output_file) elif self.mode == "check-replay": - self.page_log.add_lines("Recorded session was fully replayed and checked.") + self.page_log.add_lines("\nRecorded session was fully replayed and checked.") + self.page_log.finish_page(page) def save(self) -> None: # Save the recorded messages and responses to disk. diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index ff447386ff81..a03c24c99980 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -1,7 +1,7 @@ Evaluator: PageLog: - path: ~/pagelogs/temp6 + path: ~/pagelogs/temp7 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py index 816c1df99038..dbe3c05444b0 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py @@ -20,27 +20,18 @@ class Page: def __init__(self, page_log, index, summary, indent_level, show_in_overview=True, final=True): self.page_log = page_log self.index_str = str(index) - self.full_link = None - self.file_title = None - self.line_text = None - self.indentation_text = None self.summary = summary self.indent_level = indent_level self.show_in_overview = show_in_overview self.final = final - self.compose_line() - self.lines = [] - self.flush() - - def compose_line(self, flush=False): self.file_title = self.index_str + ' ' + self.summary self.indentation_text = "" for i in range(self.indent_level): self.indentation_text += "| " self.full_link = self.link_to_page_file() self.line_text = self.indentation_text + self.full_link - if flush: - self.flush() + self.lines = [] + self.flush() def link_to_page_file(self): return f'{self.file_title}' From 4091ab3e88894efaf0a67a2bc4ca731cb0b25689 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 21 Jan 2025 17:06:39 -0800 Subject: [PATCH 44/93] conventional logging terminology --- .../ame/src/ame/clients/_client_creator.py | 14 +- .../ame/src/ame/clients/_client_wrapper.py | 41 +++-- python/packages/ame/src/ame/eval.py | 44 ++--- .../eval_learning_from_demonstration.py | 20 +-- .../ame/eval_functions/eval_self_teaching.py | 20 +-- .../ame/eval_functions/eval_teachability.py | 28 +-- .../eval_functions/eval_without_learning.py | 12 +- .../ame/src/ame/settings/baseline.yaml | 2 +- .../packages/ame/src/ame/settings/check.yaml | 4 +- python/packages/ame/src/ame/settings/m1.yaml | 2 +- .../src/autogen_ext/apprentice/__init__.py | 4 +- .../autogen_ext/apprentice/_agent_wrapper.py | 26 +-- .../apprentice/_agentic_memory_bank.py | 18 +- .../apprentice/_agentic_memory_controller.py | 168 +++++++++--------- .../src/autogen_ext/apprentice/_grader.py | 14 +- .../{_page_log.py => _page_logger.py} | 68 ++++--- .../src/autogen_ext/apprentice/_prompter.py | 6 +- .../src/autogen_ext/apprentice/apprentice.py | 24 +-- 18 files changed, 261 insertions(+), 254 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/apprentice/{_page_log.py => _page_logger.py} (81%) diff --git a/python/packages/ame/src/ame/clients/_client_creator.py b/python/packages/ame/src/ame/clients/_client_creator.py index e6dd13c75783..d61a4927ae06 100644 --- a/python/packages/ame/src/ame/clients/_client_creator.py +++ b/python/packages/ame/src/ame/clients/_client_creator.py @@ -5,12 +5,12 @@ class ClientCreator: - def __init__(self, settings, page_log): + def __init__(self, settings, logger): self.settings = settings - self.page_log = page_log + self.logger = logger def create_client(self): - page = self.page_log.begin_page(summary="ClientCreator.create_client") + self.logger.begin_page(summary="ClientCreator.create_client") # A few args are shared by all clients. args = {} @@ -37,8 +37,8 @@ def create_client(self): assert False, "Invalid client provider" # Log some details. - page.add_lines("Client: {}".format(client._resolved_model)) - page.add_lines(source) + self.logger.info("Client: {}".format(client._resolved_model)) + self.logger.info(source) # Check if the client should be wrapped. if "ClientWrapper" in self.settings: @@ -46,9 +46,9 @@ def create_client(self): if wrapper_settings["enabled"]: # Wrap the client. client = ClientWrapper( - client, wrapper_settings["mode"], wrapper_settings["session_name"], self.page_log) + client, wrapper_settings["mode"], wrapper_settings["session_name"], self.logger) - self.page_log.finish_page(page) + self.logger.finish_page() return client def create_oai_client(self, args): diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index 143aa783190f..0a6e3a21d7be 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -9,7 +9,7 @@ RequestUsage, ) from autogen_core.tools import Tool, ToolSchema -from autogen_ext.apprentice import PageLog +from autogen_ext.apprentice import PageLogger class ClientWrapper: @@ -17,27 +17,26 @@ class ClientWrapper: Wraps a client object to record messages and responses (in record mode) or check the messages and replay the responses (in check-replay mode). """ - def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, page_log: PageLog) -> None: - page = page_log.begin_page(summary="ClientWrapper.__init__") + def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, logger: PageLogger) -> None: + self.logger = logger + self.logger.begin_page(summary="ClientWrapper.__init__") self.base_client = base_client self.mode = mode - self.page_log = page_log self.next_item_index = 0 self.model_info = {"family": self.base_client.model_info["family"]} self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), session_name + ".yaml") - if page_log is not None: - page.add_lines("Wrapping the base client in a ClientWrapper.") + self.logger.info("Wrapping the base client in a ClientWrapper.") if self.mode == "record": # Prepare to record the messages and responses. - page.add_lines("Recording mode enabled.\nRecording session to: " + self.path_to_output_file) + self.logger.info("Recording mode enabled.\nRecording session to: " + self.path_to_output_file) self.recorded_items = [] elif self.mode == "check-replay": # Load the recorded messages and responses from disk. - page.add_lines("Check-Replay mode enabled.\nRetrieving session from: " + self.path_to_output_file) + self.logger.info("Check-Replay mode enabled.\nRetrieving session from: " + self.path_to_output_file) self.recorded_items = self.load() - self.page_log.finish_page(page) + self.logger.finish_page() async def create( self, @@ -80,7 +79,7 @@ def check_and_replay_one_turn(self, messages): # Get the next recorded turn. if self.next_item_index >= len(self.recorded_items): error_str = "\nNo more recorded items to check." - self.page_log.add_lines(error_str, flush=True) + self.logger.error(error_str) raise ValueError(error_str) recorded_turn = self.recorded_items[self.next_item_index] self.next_item_index += 1 @@ -88,15 +87,15 @@ def check_and_replay_one_turn(self, messages): # Check the current message list against the recorded message list. if "messages" not in recorded_turn: error_str = "\nRecorded turn doesn't contain a messages field. Perhaps a result was recorded instead." - self.page_log.add_lines(error_str, flush=True) + self.logger.error(error_str) raise ValueError(error_str) recorded_messages = recorded_turn["messages"] current_messages = self.convert_messages(messages) if current_messages != recorded_messages: error_str = "\nCurrent message list doesn't match the recorded message list." - self.page_log.add_message_content(recorded_messages, "recorded message list") - self.page_log.add_message_content(current_messages, "current message list") - self.page_log.add_lines(error_str, flush=True) + self.logger.add_message_content(recorded_messages, "recorded message list") + self.logger.add_message_content(current_messages, "current message list") + self.logger.error(error_str) raise ValueError(error_str) assert current_messages == recorded_messages @@ -122,29 +121,29 @@ def check_result(self, result: Any) -> None: # Check a result. if self.next_item_index >= len(self.recorded_items): error_str = "\nNo more recorded items to check." - self.page_log.add_lines(error_str, flush=True) + self.logger.error(error_str) raise ValueError(error_str) recorded_result = self.recorded_items[self.next_item_index] self.next_item_index += 1 if "result" not in recorded_result: error_str = "\nRecorded turn doesn't contain a result field. Perhaps a turn was recorded instead." - self.page_log.add_lines(error_str, flush=True) + self.logger.error(error_str) raise ValueError(error_str) if result != recorded_result["result"]: error_str = "\nRecorded result ({}) doesn't match the current result ({}).".format(recorded_result["result"], result) - self.page_log.add_lines(error_str, flush=True) + self.logger.error(error_str) raise ValueError(error_str) def finalize(self) -> None: - page = self.page_log.begin_page(summary="ClientWrapper.finalize") + self.logger.begin_page(summary="ClientWrapper.finalize") self.report_result("Total items = " + str(self.next_item_index)) if self.mode == "record": self.save() - self.page_log.add_lines("\nRecorded session was saved to: " + self.path_to_output_file) + self.logger.error("\nRecorded session was saved to: " + self.path_to_output_file) elif self.mode == "check-replay": - self.page_log.add_lines("\nRecorded session was fully replayed and checked.") - self.page_log.finish_page(page) + self.logger.error("\nRecorded session was fully replayed and checked.") + self.logger.finish_page() def save(self) -> None: # Save the recorded messages and responses to disk. diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index fd98c0fa1ccd..15f660304c7d 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -3,13 +3,13 @@ import asyncio import importlib from typing import Tuple -from autogen_ext.apprentice import PageLog, Grader +from autogen_ext.apprentice import PageLogger, Grader from ame.clients._client_creator import ClientCreator class Evaluator: def __init__(self): - self.page_log = None + self.logger = None def get_task_description_and_answer_from_file(self, task_filename): path_to_this_file = os.path.abspath(__file__) @@ -36,36 +36,36 @@ def get_demo_from_file(self, demo_filename): return demo_dict["demo"] async def test_fast_learner(self, fast_learner, task_description, expected_answer, num_trials, - use_memory, client, page_log) -> Tuple[int, int]: - page = page_log.begin_page(summary="Evaluator.test_fast_learner") + use_memory, client, logger) -> Tuple[int, int]: + logger.begin_page(summary="Evaluator.test_fast_learner") - page.add_lines("Testing the fast learner on the given task.\n", flush=True) + self.logger.info("Testing the fast learner on the given task.\n") - grader = Grader(client, page_log) + grader = Grader(client, logger) num_successes = 0 for trial in range(num_trials): - page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) - page.add_lines("Try to solve the task.\n", flush=True) + self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) + self.logger.info("Try to solve the task.\n") response = await fast_learner.assign_task(task_description, use_memory=use_memory) response_is_correct, extracted_answer = await grader.is_response_correct( task_description, response, expected_answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + self.logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) + self.logger.info("Answer is CORRECT.\n") num_successes += 1 else: - page.add_lines("Answer is INCORRECT.\n", flush=True) + self.logger.info("Answer is INCORRECT.\n") - page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) - page_log.finish_page(page) + self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) + logger.finish_page() return num_successes, num_trials - async def perform_evaluations(self, settings, page_log): - page = self.page_log.begin_page(summary="Evaluator.perform_evaluations") + async def perform_evaluations(self, settings): + self.logger.begin_page(summary="Evaluator.perform_evaluations") # Create the client, passed to both the fast_learner and the evaluator. - client_creator = ClientCreator(settings=settings["client"], page_log=self.page_log) + client_creator = ClientCreator(settings=settings["client"], logger=self.logger) client = client_creator.create_client() # Create the specified fast_learner implementation. @@ -83,7 +83,7 @@ async def perform_evaluations(self, settings, page_log): print('Failed to import {}.{}'.format(module_path, class_name)) raise try: - fast_learner = fast_learner_class(fast_learner_settings, self, client, self.page_log) + fast_learner = fast_learner_class(fast_learner_settings, self, client, self.logger) except Exception as err: print("Error creating \"{}\": {}".format(fast_learner_class, err)) raise @@ -107,24 +107,24 @@ async def perform_evaluations(self, settings, page_log): # Call the eval function for each listed run. for run_dict in evaluation_settings["runs"]: - await eval_function(fast_learner, self, client, self.page_log, function_settings, run_dict) + await eval_function(fast_learner, self, client, self.logger, function_settings, run_dict) if hasattr(client, "finalize"): # If this is a client wrapper, it needs to be finalized. client.finalize() - self.page_log.flush(final=True) # Finalize the page log - self.page_log.finish_page(page) + self.logger.flush(final=True) # Finalize the page log + self.logger.finish_page() async def run(self, settings_filepath): # Load the settings from yaml. with open(settings_filepath, "r") as file: settings = yaml.load(file, Loader=yaml.FullLoader) evaluator_settings = settings["Evaluator"] - self.page_log = PageLog(evaluator_settings["PageLog"]) + self.logger = PageLogger(evaluator_settings["PageLogger"]) # Perform the evaluations. - await self.perform_evaluations(settings, self.page_log) + await self.perform_evaluations(settings) if __name__ == "__main__": diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index 7d4f776daf35..f06829179ecc 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -1,7 +1,7 @@ -async def eval_learning_from_demonstration(fast_learner, evaluator, client, page_log, settings, run_dict): +async def eval_learning_from_demonstration(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - page = page_log.begin_page(summary="eval_learning_from_demonstration") + logger.begin_page(summary="eval_learning_from_demonstration") num_trials = settings["num_trials"] @@ -16,24 +16,24 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, page demo = evaluator.get_demo_from_file(demo_2_file) # Start by clearing memory then running a baseline test. - page.add_lines("To get a baseline, clear memory, then assign the task.") + logger.info("To get a baseline, clear memory, then assign the task.") fast_learner.reset_memory() num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, - num_trials=num_trials, use_memory=True, client=client, page_log=page_log) + num_trials=num_trials, use_memory=True, client=client, logger=logger) success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + logger.info("\nSuccess rate: {}%\n".format(success_rate)) # Provide a demonstration for a similar but different task. - page.add_lines("Demonstrate a solution to a similar task.") + logger.info("Demonstrate a solution to a similar task.") await fast_learner.learn_from_demonstration(demo_task, demo) # Now test again to see if the demonstration (retrieved from memory) helps. - page.add_lines("Assign the task again to see if the demonstration helps.") + logger.info("Assign the task again to see if the demonstration helps.") num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, - num_trials=num_trials, use_memory=True, client=client, page_log=page_log) + num_trials=num_trials, use_memory=True, client=client, logger=logger) success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + logger.info("\nSuccess rate: {}%\n".format(success_rate)) - page_log.finish_page(page) + logger.finish_page() diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index 719a3c70d8bf..a208dc9a67ea 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -1,7 +1,7 @@ -async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings, run_dict): +async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - page = page_log.begin_page(summary="eval_self_teaching") + logger.begin_page(summary="eval_self_teaching") num_loops = settings["num_loops"] num_final_test_trials = settings["num_final_test_trials"] @@ -27,23 +27,23 @@ async def eval_self_teaching(fast_learner, evaluator, client, page_log, settings # Test on the first task. num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, - num_trials=num_final_test_trials, use_memory=True, client=client, page_log=page_log) - page.add_lines("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100)), flush=True) + num_trials=num_final_test_trials, use_memory=True, client=client, logger=logger) + logger.info("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100))) total_num_successes_1 += num_successes # Test on the second task. num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_description=task_description_2, expected_answer=expected_answer_2, - num_trials=num_final_test_trials, use_memory=True, client=client, page_log=page_log) - page.add_lines("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100)), flush=True) + num_trials=num_final_test_trials, use_memory=True, client=client, logger=logger) + logger.info("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100))) total_num_successes_2 += num_successes total_num_trials += num_final_test_trials - page.add_lines("") + logger.info("") overall_success_rate_1 = round((total_num_successes_1 / total_num_trials) * 100) overall_success_rate_2 = round((total_num_successes_2 / total_num_trials) * 100) - page.add_lines("\nOverall task 1 success rate (1): {}%".format(overall_success_rate_1), flush=True) - page.add_lines("Overall task 2 success rate (2): {}%".format(overall_success_rate_2), flush=True) + logger.info("\nOverall task 1 success rate (1): {}%".format(overall_success_rate_1)) + logger.info("Overall task 2 success rate (2): {}%".format(overall_success_rate_2)) - page_log.finish_page(page) + logger.finish_page() diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index 90d537d0f586..4ca856ce3e1a 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -1,9 +1,9 @@ -from autogen_ext.apprentice import PageLog, Grader +from autogen_ext.apprentice import Grader -async def eval_teachability(fast_learner, evaluator, client, page_log, settings, run_dict): +async def eval_teachability(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - page = page_log.begin_page(summary="eval_teachability") + logger.begin_page(summary="eval_teachability") # This eval function needs 2 data strings for each run. task_file = run_dict["task_file"] # The task being tested. @@ -15,32 +15,32 @@ async def eval_teachability(fast_learner, evaluator, client, page_log, settings, # First test without memory. fast_learner.reset_memory() - page.add_lines("\nClear memory, then ask the question.") + logger.info("\nClear memory, then ask the question.") response = await fast_learner.handle_user_message(task_description) # Check the response. - grader = Grader(client, page_log) + grader = Grader(client, logger) response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) + logger.info("Answer is CORRECT.\n") else: - page.add_lines("Answer is INCORRECT.\n", flush=True) + logger.info("Answer is INCORRECT.\n") # Give advice that should help solve this task. - page.add_lines("Give the advice.") + logger.info("Give the advice.") await fast_learner.handle_user_message(advice) # Now ask the question again to see if the advice helps. - page.add_lines("\nAsk the question again to see if the advice helps.") + logger.info("\nAsk the question again to see if the advice helps.") response = await fast_learner.handle_user_message(task_description) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) + logger.info("Answer is CORRECT.\n") else: - page.add_lines("Answer is INCORRECT.\n", flush=True) + logger.info("Answer is INCORRECT.\n") - page_log.finish_page(page) + logger.finish_page() diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index 91d0a6905f56..e1ed5c036334 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -1,7 +1,7 @@ -async def eval_without_learning(fast_learner, evaluator, client, page_log, settings, run_dict): +async def eval_without_learning(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - page = page_log.begin_page(summary="eval_without_learning") + logger.begin_page(summary="eval_without_learning") num_trials = settings["num_trials"] @@ -10,12 +10,12 @@ async def eval_without_learning(fast_learner, evaluator, client, page_log, setti task_description, expected_answer = evaluator.get_task_description_and_answer_from_file(task_file) # Clear memory then run a baseline test. - page.add_lines("To get a baseline, clear memory, then assign the task.") + logger.info("To get a baseline, clear memory, then assign the task.") fast_learner.reset_memory() num_successes, num_trials = await evaluator.test_fast_learner( fast_learner=fast_learner, task_description=task_description, expected_answer=expected_answer, - num_trials=num_trials, use_memory=True, client=client, page_log=page_log) + num_trials=num_trials, use_memory=True, client=client, logger=logger) success_rate = round((num_successes / num_trials) * 100) - page.add_lines("\nSuccess rate: {}%\n".format(success_rate), flush=True) + logger.info("\nSuccess rate: {}%\n".format(success_rate), flush=True) - page_log.finish_page(page) + logger.finish_page() diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index f6047f38fe1b..ea3e7a414a1e 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -1,6 +1,6 @@ Evaluator: - PageLog: + PageLogger: path: ~/pagelogs/base client: diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index a03c24c99980..b32de3887722 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -1,7 +1,7 @@ Evaluator: - PageLog: - path: ~/pagelogs/temp7 + PageLogger: + path: ~/pagelogs/temp8 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index 0240a80ce8fb..6b34fc05142c 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -1,6 +1,6 @@ Evaluator: - PageLog: + PageLogger: path: ~/pagelogs/m1 client: diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py index 9426eb4bd11d..1e852b574c81 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py @@ -1,5 +1,5 @@ from .apprentice import Apprentice -from ._page_log import PageLog +from ._page_logger import PageLogger from ._grader import Grader -__all__ = ["Apprentice", "PageLog", "Grader"] +__all__ = ["Apprentice", "PageLogger", "Grader"] diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py index ac370a2e778b..d9db02f20bb3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py @@ -11,17 +11,17 @@ class AgentWrapper: - def __init__(self, settings, client, page_log): + def __init__(self, settings, client, logger): self.settings = settings self.client = client - self.page_log = page_log + self.logger = logger self.base_agent_name = self.settings["base_agent"] async def assign_task(self, task): """ Assigns a task to the base agent. """ - page = self.page_log.begin_page(summary="AgentWrapper.assign_task") + self.logger.begin_page(summary="AgentWrapper.assign_task") # Pass the task through to the base agent. if self.base_agent_name == "MagenticOneGroupChat": @@ -31,13 +31,13 @@ async def assign_task(self, task): else: assert False, "Invalid base agent" - self.page_log.finish_page(page) + self.logger.finish_page() return response, work_history async def assign_task_to_thin_agent(self, task): - page = self.page_log.begin_page(summary="AgentWrapper.assign_task_to_thin_agent") + self.logger.begin_page(summary="AgentWrapper.assign_task_to_thin_agent") - page.add_lines(task) + self.logger.info(task) system_message_content = """You are a helpful and thoughtful assistant. In responding to every user message, you follow the same multi-step process given here: @@ -61,20 +61,20 @@ async def assign_task_to_thin_agent(self, task): response_str = response.content # Log the model call - self.page_log.add_model_call(summary="Ask the model to complete the task", + self.logger.add_model_call(summary="Ask the model to complete the task", input_messages=input_messages, response=response) - page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) # Use the response as the work history as well. work_history = response_str - self.page_log.finish_page(page) + self.logger.finish_page() return response_str, work_history async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: - page = self.page_log.begin_page(summary="AgentWrapper.assign_task_to_magentic_one") + self.logger.begin_page(summary="AgentWrapper.assign_task_to_magentic_one") - page.add_lines(task) + self.logger.info(task) general_agent = AssistantAgent( "general_agent", @@ -99,10 +99,10 @@ async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: stream = team.run_stream(task=task) task_result = await Console(stream) response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) - page.add_lines("\n----- RESPONSE -----\n\n{}\n".format(response_str), flush=True) + self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) # MagenticOne's response is the chat history, which we use here as the work history. work_history = response_str - self.page_log.finish_page(page) + self.logger.finish_page() return response_str, work_history diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index f2972c9570ff..59d73b8d9e9e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -21,42 +21,40 @@ def __init__(self, settings: Dict, verbosity: Optional[int] = 0, reset: Optional[bool] = False, - page_log=None, + logger=None, ): """ Args: - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print string-pair lists. - reset (Optional, bool): True to clear the DB before starting. Default False - - page_log (Optional, PageLog): the PageLog object to use for logging. + - logger (Optional, PageLogger): the PageLogger object to use for logging. """ - page = page_log.begin_page(summary="AgenticMemoryBank.__init__") + self.logger = logger + self.logger.begin_page(summary="AgenticMemoryBank.__init__") self.settings = settings memory_dir_path = os.path.expanduser(self.settings["path"]) path_to_db_dir = os.path.join(memory_dir_path, "string_map") self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") - self.page_log = page_log - page.add_lines("Creating AgenticMemoryBank", flush=True) - self.string_map = StringSimilarityMap(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir) # Load or create the associated insight dict on disk. self.uid_insight_dict = {} self.last_insight_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - page.add_lines("\nLOADING INSIGHTS FROM DISK {}".format(self.path_to_dict)) - page.add_lines(" Location = {}".format(self.path_to_dict)) + self.logger.info("\nLOADING INSIGHTS FROM DISK {}".format(self.path_to_dict)) + self.logger.info(" Location = {}".format(self.path_to_dict)) with open(self.path_to_dict, "rb") as f: self.uid_insight_dict = pickle.load(f) self.last_insight_id = len(self.uid_insight_dict) - page.add_lines("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) + self.logger.info("\n{} INSIGHTS LOADED".format(len(self.uid_insight_dict))) # Clear the DB if requested. if reset: self.reset_insights() - self.page_log.finish_page(page) + self.logger.finish_page() def reset(self): self.string_map.reset_db() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py index 1c483df6e609..f85dbf47d2e6 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py @@ -5,19 +5,19 @@ class AgenticMemoryController: - def __init__(self, settings, agent, reset, client, page_log): - page = page_log.begin_page(summary="AgenticMemoryController.__init__") + def __init__(self, settings, agent, reset, client, logger): + self.logger = logger + self.logger.begin_page(summary="AgenticMemoryController.__init__") self.settings = settings self.agent = agent self.client = client - self.page_log = page_log - self.prompter = Prompter(client, page_log) + self.prompter = Prompter(client, logger) self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], - verbosity=0, reset=reset, page_log=page_log) - self.grader = Grader(client, page_log) + verbosity=0, reset=reset, logger=logger) + self.grader = Grader(client, logger) - self.page_log.finish_page(page) + self.logger.finish_page() def reset_memory(self): self.memory_bank.reset() @@ -29,70 +29,70 @@ async def train_on_task(self, """ Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ - page = self.page_log.begin_page(summary="AgenticMemoryController.train_on_task") + self.logger.begin_page(summary="AgenticMemoryController.train_on_task") # Attempt to create useful new memories. - page.add_lines("Iterate on the task, possibly discovering a useful new insight.\n", flush=True) + self.logger.info("Iterate on the task, possibly discovering a useful new insight.\n") _, insight = await self._iterate_on_task(task, expected_answer, self.settings["max_train_trials"], self.settings["max_test_trials"]) if insight is None: - page.add_lines("No useful insight was discovered.\n", flush=True) + self.logger.info("No useful insight was discovered.\n") else: - page.add_lines("A new insight was created:\n{}".format(insight), flush=True) + self.logger.info("A new insight was created:\n{}".format(insight)) # Add this insight to memory. await self.add_insight_to_memory(task, insight) - self.page_log.finish_page(page) + self.logger.finish_page() async def test_on_task(self, task: str, expected_answer: str, num_trials=1): """ Assigns a task to the completion agent, along with any relevant insights/memories. """ - page = self.page_log.begin_page(summary="AgenticMemoryController.test_on_task") + self.logger.begin_page(summary="AgenticMemoryController.test_on_task") response = None num_successes = 0 for trial in range(num_trials): - page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) + self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) task_plus_insights = task # Try to retrieve any relevant memories from the DB. filtered_insights = await self.retrieve_relevant_insights(task) if len(filtered_insights) > 0: - page.add_lines("Relevant insights were retrieved from memory.\n", flush=True) + self.logger.info("Relevant insights were retrieved from memory.\n") memory_section = self.format_memory_section(filtered_insights) if len(memory_section) > 0: task_plus_insights = task + '\n\n' + memory_section # Attempt to solve the task. - page.add_lines("Try to solve the task.\n", flush=True) + self.logger.info("Try to solve the task.\n") response, _ = await self.agent.assign_task(task_plus_insights) response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + self.logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) + self.logger.info("Answer is CORRECT.\n") num_successes += 1 else: - page.add_lines("Answer is INCORRECT.\n", flush=True) + self.logger.info("Answer is INCORRECT.\n") # Calculate the success rate as a percentage, rounded to the nearest whole number. - page.add_lines("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100)), flush=True) + self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) - self.page_log.finish_page(page) + self.logger.finish_page() return response, num_successes, num_trials async def add_insight_to_memory(self, task: str, insight: str): # Adds an insight to the DB. - page = self.page_log.begin_page(summary="AgenticMemoryController.add_insight_to_memory") + self.logger.begin_page(summary="AgenticMemoryController.add_insight_to_memory") - page.add_lines("\nGIVEN TASK:") - page.add_lines(task) + self.logger.info("\nGIVEN TASK:") + self.logger.info(task) - page.add_lines("\nGIVEN INSIGHT:") - page.add_lines(insight) + self.logger.info("\nGIVEN INSIGHT:") + self.logger.info(insight) # Generalize the task. generalized_task = await self.prompter.generalize_task(task) @@ -100,56 +100,56 @@ async def add_insight_to_memory(self, task: str, insight: str): # Get a combined list of topics from the task and insight. task_plus_insight = generalized_task.strip() + "\n(Hint: " + insight + ")" topics = await self.prompter.find_index_topics(task_plus_insight) - page.add_lines("\nTOPICS EXTRACTED FROM TASK AND INSIGHT COMBINED:") - page.add_lines("\n".join(topics)) - page.add_lines("") + self.logger.info("\nTOPICS EXTRACTED FROM TASK AND INSIGHT COMBINED:") + self.logger.info("\n".join(topics)) + self.logger.info("") # Add the insight to the memory bank. self.memory_bank.add_insight(insight, generalized_task, topics) - self.page_log.finish_page(page) + self.logger.finish_page() async def add_insight_without_task_to_memory(self, insight: str): # Adds an insight to the DB. - page = self.page_log.begin_page(summary="AgenticMemoryController.add_insight_without_task_to_memory") + self.logger.begin_page(summary="AgenticMemoryController.add_insight_without_task_to_memory") - page.add_lines("\nGIVEN INSIGHT:") - page.add_lines(insight) + self.logger.info("\nGIVEN INSIGHT:") + self.logger.info(insight) # Get a list of topics from the insight. topics = await self.prompter.find_index_topics(insight) - page.add_lines("\nTOPICS EXTRACTED FROM INSIGHT:") - page.add_lines("\n".join(topics)) - page.add_lines("") + self.logger.info("\nTOPICS EXTRACTED FROM INSIGHT:") + self.logger.info("\n".join(topics)) + self.logger.info("") # Add the insight to the memory bank. self.memory_bank.add_insight(insight, None, topics) - self.page_log.finish_page(page) + self.logger.finish_page() async def retrieve_relevant_insights(self, task: str): # Retrieve insights from the DB that are relevant to the task. - page = self.page_log.begin_page(summary="AgenticMemoryController.retrieve_relevant_insights") + self.logger.begin_page(summary="AgenticMemoryController.retrieve_relevant_insights") if self.memory_bank.contains_insights(): - page.add_lines("\nCURRENT TASK:") - page.add_lines(task) + self.logger.info("\nCURRENT TASK:") + self.logger.info(task) # Generalize the task. generalized_task = await self.prompter.generalize_task(task) # Get a list of topics from the task. topics = await self.prompter.find_index_topics(generalized_task) - page.add_lines("\nTOPICS EXTRACTED FROM TASK:") - page.add_lines("\n".join(topics)) - page.add_lines("") + self.logger.info("\nTOPICS EXTRACTED FROM TASK:") + self.logger.info("\n".join(topics)) + self.logger.info("") # Retrieve relevant insights from the memory bank. relevant_insights_and_relevances = self.memory_bank.get_relevant_insights(topics=topics) relevant_insights = [] - page.add_lines("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) + self.logger.info("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) for insight, relevance in relevant_insights_and_relevances.items(): - page.add_lines("\n INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) + self.logger.info("\n INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) relevant_insights.append(insight) # Apply a final validation stage to keep only the insights that the LLM concludes are relevant. @@ -158,14 +158,14 @@ async def retrieve_relevant_insights(self, task: str): if await self.prompter.validate_insight(insight, task): validated_insights.append(insight) - page.add_lines("\n{} VALIDATED INSIGHTS".format(len(validated_insights))) + self.logger.info("\n{} VALIDATED INSIGHTS".format(len(validated_insights))) for insight in validated_insights: - page.add_lines("\n INSIGHT: {}".format(insight)) + self.logger.info("\n INSIGHT: {}".format(insight)) else: - page.add_lines("\nNO INSIGHTS WERE FOUND IN MEMORY") + self.logger.info("\nNO INSIGHTS WERE FOUND IN MEMORY") validated_insights = [] - self.page_log.finish_page(page) + self.logger.finish_page() return validated_insights def format_memory_section(self, memories): @@ -180,39 +180,39 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a """ Attempts to solve the given task multiple times to find a failure case to learn from. """ - page = self.page_log.begin_page(summary="AgenticMemoryController._test_for_failure") + self.logger.begin_page(summary="AgenticMemoryController._test_for_failure") - page.add_lines("\nTask description, including any insights: {}".format(task_plus_insights)) - page.add_lines("\nExpected answer: {}\n".format(expected_answer)) + self.logger.info("\nTask description, including any insights: {}".format(task_plus_insights)) + self.logger.info("\nExpected answer: {}\n".format(expected_answer)) failure_found = False response, work_history = None, None for trial in range(num_trials): - page.add_lines("\n----- TRIAL {} -----\n".format(trial + 1), flush=True) + self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) # Attempt to solve the task. - page.add_lines("Try to solve the task.", flush=True) + self.logger.info("Try to solve the task.") response, work_history = await self.agent.assign_task(task_plus_insights) response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer) - page.add_lines("Extracted answer: {}".format(extracted_answer), flush=True) + self.logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: - page.add_lines("Answer is CORRECT.\n", flush=True) + self.logger.info("Answer is CORRECT.\n") else: - page.add_lines("Answer is INCORRECT.\n Stop testing, and return the details of the failure.\n", flush=True) + self.logger.info("Answer is INCORRECT.\n Stop testing, and return the details of the failure.\n") failure_found = True break - self.page_log.finish_page(page) + self.logger.finish_page() return failure_found, response, work_history async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int): - page = self.page_log.begin_page(summary="AgenticMemoryController._iterate_on_task") + self.logger.begin_page(summary="AgenticMemoryController._iterate_on_task") - page.add_lines("\nTask description: {}".format(task)) - page.add_lines("\nExpected answer: {}\n".format(expected_answer)) + self.logger.info("\nTask description: {}".format(task)) + self.logger.info("\nExpected answer: {}\n".format(expected_answer)) final_response = None old_insights = await self.retrieve_relevant_insights(task) @@ -223,7 +223,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria # Loop until success (or timeout) while learning from failures. for trial in range(1, max_train_trials + 1): - page.add_lines("\n----- TRAIN TRIAL {} -----\n".format(trial), flush=True) + self.logger.info("\n----- TRAIN TRIAL {} -----\n".format(trial)) task_plus_insights = task @@ -240,7 +240,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria task, task_plus_insights, expected_answer, max_test_trials) if not failure_found: # No. Time to exit the loop. - page.add_lines("\nResponse is CORRECT.\n Stop looking for insights.\n", flush=True) + self.logger.info("\nResponse is CORRECT.\n Stop looking for insights.\n") # Was this the first trial? if trial == 1: # Yes. We should return the successful response, and no insight. @@ -253,78 +253,78 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria # Will we try again? if trial == max_train_trials: # No. We're out of training trials. - page.add_lines("\nNo more trials will be attempted.\n", flush=True) + self.logger.info("\nNo more trials will be attempted.\n") break # Try to learn from this failure. - page.add_lines("\nResponse is INCORRECT. Try to learn from this failure.\n", flush=True) + self.logger.info("\nResponse is INCORRECT. Try to learn from this failure.\n") insight = await self.prompter.learn_from_failure( task, memory_section, response, expected_answer, work_history, new_insights) - page.add_lines("\nInsight: {}\n".format(insight), flush=True) + self.logger.info("\nInsight: {}\n".format(insight)) new_insights.append(insight) last_insight = insight # Return the answer from the last loop. - page.add_lines("\n{}\n".format(final_response), flush=True) - self.page_log.finish_page(page) + self.logger.info("\n{}\n".format(final_response)) + self.logger.finish_page() return final_response, successful_insight async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): """ Assigns a task to the agent, along with any relevant insights/memories. """ - page = self.page_log.begin_page(summary="AgenticMemoryController.assign_task") + self.logger.begin_page(summary="AgenticMemoryController.assign_task") if use_memory: # Try to retrieve any relevant memories from the DB. filtered_insights = await self.retrieve_relevant_insights(task) if len(filtered_insights) > 0: - page.add_lines("Relevant insights were retrieved from memory.\n", flush=True) + self.logger.info("Relevant insights were retrieved from memory.\n") memory_section = self.format_memory_section(filtered_insights) task = task + '\n\n' + memory_section # if len(memory_section) > 0: # Best to include this condition, but it will require new recordings. # task = task + '\n\n' + memory_section # Attempt to solve the task. - page.add_lines("Try to solve the task.\n", flush=True) + self.logger.info("Try to solve the task.\n") if should_await: response, _ = await self.agent.assign_task(task) else: response, _ = self.agent.assign_task(task) - self.page_log.finish_page(page) + self.logger.finish_page() return response async def handle_user_message(self, text, should_await=True): - page = self.page_log.begin_page(summary="AgenticMemoryController.handle_user_message") + self.logger.begin_page(summary="AgenticMemoryController.handle_user_message") advice = await self.prompter.extract_advice(text) - page.add_lines("Advice: {}".format(advice), flush=True) + self.logger.info("Advice: {}".format(advice)) if advice is not None: await self.add_insight_without_task_to_memory(advice) response = await self.assign_task(text, use_memory=(advice is None), should_await=should_await) - self.page_log.finish_page(page) + self.logger.finish_page() return response async def learn_from_demonstration(self, task, demonstration): - page = self.page_log.begin_page(summary="AgenticMemoryController.learn_from_demonstration") + self.logger.begin_page(summary="AgenticMemoryController.learn_from_demonstration") - page.add_lines("\nEXAMPLE TASK:") - page.add_lines(task) + self.logger.info("\nEXAMPLE TASK:") + self.logger.info(task) - page.add_lines("\nEXAMPLE DEMONSTRATION:") - page.add_lines(demonstration) + self.logger.info("\nEXAMPLE DEMONSTRATION:") + self.logger.info(demonstration) # Get a list of topics from the task. topics = await self.prompter.find_index_topics(task.strip()) - page.add_lines("\nTOPICS EXTRACTED FROM TASK:") - page.add_lines("\n".join(topics)) - page.add_lines("") + self.logger.info("\nTOPICS EXTRACTED FROM TASK:") + self.logger.info("\n".join(topics)) + self.logger.info("") # Add the insight to the memory bank. self.memory_bank.add_demonstration(task, demonstration, topics) - self.page_log.finish_page(page) + self.logger.finish_page() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index 423ac25fa6ad..22a74d89e33f 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -12,9 +12,9 @@ class Grader: - def __init__(self, client, page_log): + def __init__(self, client, logger): self.client = client - self.page_log = page_log + self.logger = logger # Check whether to report results to the client. self.report_results = hasattr(self.client, 'report_result') @@ -45,7 +45,7 @@ async def call_model(self, summary, user_content: UserContent = None, system_mes assert isinstance(response_message, AssistantMessage) # Log the model call - self.page_log.add_model_call(summary=summary, input_messages=input_messages, response=response) + self.logger.add_model_call(summary=summary, input_messages=input_messages, response=response) # Manage the chat history if keep_these_messages: @@ -64,7 +64,7 @@ def clear_history(self): async def is_response_correct(self, task_description, response_to_be_graded, correct_answer): # Returns only the insights that the client verifies are relevant to the task. - page = self.page_log.begin_page(summary="Grader.is_response_correct") + self.logger.begin_page(summary="Grader.is_response_correct") sys_message = """You are a helpful and thoughtful assistant.""" @@ -82,7 +82,7 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor self.clear_history() extracted_answer = await self.call_model(summary="Ask the model to extract the answer", system_message_content=sys_message, user_content=user_message) - page.add_lines("Extracted answer: " + extracted_answer) + self.logger.info("Extracted answer: " + extracted_answer) user_message = ["""Your job is to decide whether a given answer to a task is correct or not. - You will be given the task description and the correct, gold-standard answer, along with the answer to be graded. @@ -101,9 +101,9 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor self.clear_history() decision = await self.call_model(summary="Ask the model to check the answer for correctness", system_message_content=sys_message, user_content=user_message) - page.add_lines("Decision: " + decision) + self.logger.info("Decision: " + decision) - self.page_log.finish_page(page) + self.logger.finish_page() if self.report_results: self.client.report_result(decision) return decision == "1", extracted_answer diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py similarity index 81% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py index dbe3c05444b0..515cc0b627f7 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_log.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py @@ -17,8 +17,8 @@ class Page: - def __init__(self, page_log, index, summary, indent_level, show_in_overview=True, final=True): - self.page_log = page_log + def __init__(self, page_logger, index, summary, indent_level, show_in_overview=True, final=True): + self.page_logger = page_logger self.index_str = str(index) self.summary = summary self.indent_level = indent_level @@ -36,7 +36,7 @@ def __init__(self, page_log, index, summary, indent_level, show_in_overview=True def link_to_page_file(self): return f'{self.file_title}' - def add_lines(self, line, flush=False): + def _add_lines(self, line, flush=False): # If the string 'line' consists of multiple lines, separate them into a list. lines_to_add = [] if "\n" in line: @@ -59,16 +59,16 @@ def add_link_to_image(self, description, source_image_path): # Copy the image to the run directory. # Remove every character from the string 'description' that is not alphanumeric or a space. description = ''.join(e for e in description if e.isalnum() or e.isspace()) - target_image_filename = (str(self.page_log.get_next_page_id()) + ' - ' + description) - local_image_path = os.path.join(self.page_log.log_dir, target_image_filename) + target_image_filename = (str(self.page_logger.get_next_page_id()) + ' - ' + description) + local_image_path = os.path.join(self.page_logger.log_dir, target_image_filename) shutil.copyfile(source_image_path, local_image_path) - self.add_lines('\n' + description) - self.add_lines(self.link_to_image(target_image_filename, description), flush=True) + self._add_lines('\n' + description) + self._add_lines(self.link_to_image(target_image_filename, description), flush=True) def flush(self): - page_path = os.path.join(self.page_log.log_dir, self.index_str + ".html") + page_path = os.path.join(self.page_logger.log_dir, self.index_str + ".html") with open(page_path, "w") as f: - f.write(self.page_log.html_opening(self.file_title, final=self.final)) + f.write(self.page_logger.html_opening(self.file_title, final=self.final)) f.write(f"

{self.file_title}

\n") for line in self.lines: # Call f.write in a try block to catch any UnicodeEncodeErrors. @@ -76,12 +76,12 @@ def flush(self): f.write(f"{line}\n") except UnicodeEncodeError: f.write(f"UnicodeEncodeError in this line.\n") - f.write(self.page_log.html_closing()) + f.write(self.page_logger.html_closing()) f.flush() time.sleep(0.1) -class PageLog: +class PageLogger: def __init__(self, settings): self.log_dir = os.path.expanduser(settings["path"]) self.page_stack = PageStack() @@ -124,7 +124,7 @@ def html_closing(self): def add_page(self, summary, show_in_overview=True, final=True): # Add a page to the log. - page = Page(page_log=self, + page = Page(page_logger=self, index=self.get_next_page_id(), summary=summary, indent_level=len(self.page_stack.stack), @@ -135,14 +135,24 @@ def add_page(self, summary, show_in_overview=True, final=True): if len(self.page_stack.stack) > 0: # Insert a link to the new page into the calling page. - self.add_lines('\n' + page.full_link, flush=True) + self._add_lines('\n' + page.full_link, flush=True) return page - def add_lines(self, line, flush=False): + def _add_lines(self, line, flush=False): # Add lines to the current page (at the top of the page stack). page = self.page_stack.top() - page.add_lines(line, flush=flush) + page._add_lines(line, flush=flush) + + def info(self, line): + # Add lines to the current page (at the top of the page stack). + page = self.page_stack.top() + page._add_lines(line, flush=True) + + def error(self, line): + # Add lines to the current page (at the top of the page stack). + page = self.page_stack.top() + page._add_lines(line, flush=True) def message_source(self, message): source = "UNKNOWN" @@ -206,20 +216,20 @@ def add_message_content(self, message_content, summary): # Add a page containing a message's content. page = self.add_page(summary=summary, show_in_overview=False) self.page_stack.write_stack_to_page(page) - page.add_lines(self.message_content(page, message_content=message_content)) + page._add_lines(self.message_content(page, message_content=message_content)) page.flush() def add_model_call(self, summary, input_messages, response): # Add a model call to the log. page = self.add_page(summary=summary, show_in_overview=False) self.page_stack.write_stack_to_page(page) - page.add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) - page.add_lines("{} completion tokens".format(response.usage.completion_tokens)) + page._add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) + page._add_lines("{} completion tokens".format(response.usage.completion_tokens)) for i, m in enumerate(input_messages): - page.add_lines('\n' + self.message_source(m)) - page.add_lines(self.message_content(page, message=m)) - page.add_lines("\n" + self.decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) - page.add_lines(self.message_content(page, message=response)) + page._add_lines('\n' + self.message_source(m)) + page._add_lines(self.message_content(page, message=m)) + page._add_lines("\n" + self.decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) + page._add_lines(self.message_content(page, message=response)) page.flush() return page @@ -250,14 +260,14 @@ def begin_page(self, summary, show_in_overview=True): self.page_stack.push(page) self.page_stack.write_stack_to_page(page) - page.add_lines("\nENTER {}".format(summary), flush=True) + page._add_lines("\nENTER {}".format(summary), flush=True) return page - def finish_page(self, page): + def finish_page(self): # Perform a set of logging actions that are often performed at the end of a caller's method. page = self.page_stack.top() page.final = True - page.add_lines("\nLEAVE {}".format(page.summary), flush=True) + page._add_lines("\nLEAVE {}".format(page.summary), flush=True) self.page_stack.pop() @@ -279,9 +289,9 @@ def top(self): def write_stack_to_page(self, page): # Log a properly indented string showing the current state of the call stack. - page.add_lines("\nCALL STACK") + page._add_lines("\nCALL STACK") for stack_page in self.stack: - page.add_lines(stack_page.line_text) - page.add_lines("") - page.add_lines("") + page._add_lines(stack_page.line_text) + page._add_lines("") + page._add_lines("") page.flush() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py index eeedc39aabf9..cdb2d7fe0539 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py @@ -15,9 +15,9 @@ class Prompter: - def __init__(self, client, page_log): + def __init__(self, client, logger): self.client = client - self.page_log = page_log + self.logger = logger self.default_system_message_content = "You are a helpful assistant." self.time_spent_in_model_calls = 0. self.num_model_calls = 0 @@ -63,7 +63,7 @@ async def call_model(self, summary, user_content: UserContent = None, system_mes self.num_model_calls += 1 # Log the model call - self.page_log.add_model_call(summary=summary, input_messages=input_messages, response=response) + self.logger.add_model_call(summary=summary, input_messages=input_messages, response=response) # Manage the chat history if keep_these_messages: diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index cbb55d039bbc..fa9a758ca32a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -3,15 +3,15 @@ class Apprentice: - def __init__(self, settings, evaluator, client, page_log): + def __init__(self, settings, evaluator, client, logger): self.settings = settings self.evaluator = evaluator self.client = client - self.page_log = page_log + self.logger = logger # Create the agent wrapper, which creates the base agent. self.agent_settings = settings["AgentWrapper"] - self.agent = AgentWrapper(settings=self.agent_settings, client=self.client, page_log=self.page_log) + self.agent = AgentWrapper(settings=self.agent_settings, client=self.client, logger=self.logger) # Create the AgenticMemoryController, which creates the AgenticMemoryBank. self.memory_controller = AgenticMemoryController( @@ -19,7 +19,7 @@ def __init__(self, settings, evaluator, client, page_log): agent=self.agent, reset=False, client=self.client, - page_log=self.page_log + logger=self.logger ) def reset_memory(self): @@ -28,40 +28,40 @@ def reset_memory(self): async def handle_user_message(self, text, should_await=True): """A foreground operation, intended for immediate response to the user.""" - page = self.page_log.begin_page(summary="Apprentice.handle_user_message") + self.logger.begin_page(summary="Apprentice.handle_user_message") # Pass the user message through to the memory controller. response = await self.memory_controller.handle_user_message(text, should_await) - self.page_log.finish_page(page) + self.logger.finish_page() return response async def learn_from_demonstration(self, task, demonstration): """A foreground operation, assuming that the task and demonstration are already known.""" - page = self.page_log.begin_page(summary="Apprentice.learn_from_demonstration") + self.logger.begin_page(summary="Apprentice.learn_from_demonstration") # Pass the task and demonstration through to the memory controller. await self.memory_controller.learn_from_demonstration(task, demonstration) - self.page_log.finish_page(page) + self.logger.finish_page() async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): """ Assigns a task to the agent, along with any relevant insights/memories. """ - page = self.page_log.begin_page(summary="Apprentice.assign_task") + self.logger.begin_page(summary="Apprentice.assign_task") # Pass the task through to the memory controller. response = await self.memory_controller.assign_task(task, use_memory, should_await) - self.page_log.finish_page(page) + self.logger.finish_page() return response async def train_on_task(self, task, expected_answer): """A background operation, not intended for immediate response.""" - page = self.page_log.begin_page(summary="Apprentice.train_on_task") + self.logger.begin_page(summary="Apprentice.train_on_task") # Pass the task through to the memory controller. await self.memory_controller.train_on_task(task, expected_answer) - self.page_log.finish_page(page) + self.logger.finish_page() From 3865cffcf4ea49615ae97e14f73de3568455d51e Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 21 Jan 2025 17:55:58 -0800 Subject: [PATCH 45/93] control logger enabling --- python/packages/ame/src/ame/eval.py | 3 ++- .../eval_learning_from_demonstration.py | 7 +++++-- .../src/ame/eval_functions/eval_self_teaching.py | 8 ++++++-- .../src/ame/eval_functions/eval_teachability.py | 11 +++++++---- .../packages/ame/src/ame/settings/baseline.yaml | 1 + python/packages/ame/src/ame/settings/check.yaml | 3 ++- python/packages/ame/src/ame/settings/m1.yaml | 1 + .../src/autogen_ext/apprentice/_page_logger.py | 15 +++++++++++++++ 8 files changed, 39 insertions(+), 10 deletions(-) diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 15f660304c7d..501e5f71886b 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -107,7 +107,8 @@ async def perform_evaluations(self, settings): # Call the eval function for each listed run. for run_dict in evaluation_settings["runs"]: - await eval_function(fast_learner, self, client, self.logger, function_settings, run_dict) + results = await eval_function(fast_learner, self, client, self.logger, function_settings, run_dict) + print(results) if hasattr(client, "finalize"): # If this is a client wrapper, it needs to be finalized. diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index f06829179ecc..6cb831900fa2 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -22,7 +22,8 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, num_trials=num_trials, use_memory=True, client=client, logger=logger) success_rate = round((num_successes / num_trials) * 100) - logger.info("\nSuccess rate: {}%\n".format(success_rate)) + results_str_1 = "Baseline success rate: {}%".format(success_rate) + logger.info('\n' + results_str_1) # Provide a demonstration for a similar but different task. logger.info("Demonstrate a solution to a similar task.") @@ -34,6 +35,8 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, num_trials=num_trials, use_memory=True, client=client, logger=logger) success_rate = round((num_successes / num_trials) * 100) - logger.info("\nSuccess rate: {}%\n".format(success_rate)) + results_str_2 = "Success rate after demonstration: {}%".format(success_rate) + logger.info('\n' + results_str_2) logger.finish_page() + return "\neval_learning_from_demonstration\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index a208dc9a67ea..405bd79c4c5f 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -43,7 +43,11 @@ async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, overall_success_rate_1 = round((total_num_successes_1 / total_num_trials) * 100) overall_success_rate_2 = round((total_num_successes_2 / total_num_trials) * 100) - logger.info("\nOverall task 1 success rate (1): {}%".format(overall_success_rate_1)) - logger.info("Overall task 2 success rate (2): {}%".format(overall_success_rate_2)) + + results_str_1 = "Overall task 1 success rate: {}%".format(overall_success_rate_1) + results_str_2 = "Overall task 2 success rate: {}%".format(overall_success_rate_2) + logger.info('\n' + results_str_1) + logger.info(results_str_2) logger.finish_page() + return "\neval_self_teaching\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index 4ca856ce3e1a..66fbba6cd60e 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -23,9 +23,10 @@ async def eval_teachability(fast_learner, evaluator, client, logger, settings, r response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: - logger.info("Answer is CORRECT.\n") + results_str_1 = "Answer before teaching is CORRECT." else: - logger.info("Answer is INCORRECT.\n") + results_str_1 = "Answer before teaching is INCORRECT." + logger.info(results_str_1 + "\n") # Give advice that should help solve this task. logger.info("Give the advice.") @@ -39,8 +40,10 @@ async def eval_teachability(fast_learner, evaluator, client, logger, settings, r response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: - logger.info("Answer is CORRECT.\n") + results_str_2 = "Answer after teaching is CORRECT." else: - logger.info("Answer is INCORRECT.\n") + results_str_2 = "Answer after teaching is INCORRECT." + logger.info(results_str_2 + "\n") logger.finish_page() + return "\neval_teachability\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index ea3e7a414a1e..83b926b6ba12 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -1,6 +1,7 @@ Evaluator: PageLogger: + enabled: 1 path: ~/pagelogs/base client: diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index b32de3887722..b2fa03e9f52b 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -1,7 +1,8 @@ Evaluator: PageLogger: - path: ~/pagelogs/temp8 + enabled: 1 + path: ~/pagelogs/temp9 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index 6b34fc05142c..c71be8e95a8c 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -1,6 +1,7 @@ Evaluator: PageLogger: + enabled: 1 path: ~/pagelogs/m1 client: diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py index 515cc0b627f7..904cfaca8d3d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py @@ -83,6 +83,9 @@ def flush(self): class PageLogger: def __init__(self, settings): + self.enabled = settings["enabled"] + if not self.enabled: + return self.log_dir = os.path.expanduser(settings["path"]) self.page_stack = PageStack() self.pages = [] @@ -145,11 +148,15 @@ def _add_lines(self, line, flush=False): page._add_lines(line, flush=flush) def info(self, line): + if not self.enabled: + return # Add lines to the current page (at the top of the page stack). page = self.page_stack.top() page._add_lines(line, flush=True) def error(self, line): + if not self.enabled: + return # Add lines to the current page (at the top of the page stack). page = self.page_stack.top() page._add_lines(line, flush=True) @@ -220,6 +227,8 @@ def add_message_content(self, message_content, summary): page.flush() def add_model_call(self, summary, input_messages, response): + if not self.enabled: + return # Add a model call to the log. page = self.add_page(summary=summary, show_in_overview=False) self.page_stack.write_stack_to_page(page) @@ -239,6 +248,8 @@ def link_to_local_file(self, file_path): return link def flush(self, final=False): + if not self.enabled: + return # Create an overview of the log. overview_path = os.path.join(self.log_dir, self.name + ".html") with open(overview_path, "w") as f: @@ -253,6 +264,8 @@ def flush(self, final=False): time.sleep(0.1) def begin_page(self, summary, show_in_overview=True): + if not self.enabled: + return assert show_in_overview # Perform a set of logging actions that are often performed at the beginning of a caller's method. page = self.add_page(summary=summary, show_in_overview=show_in_overview, final=False) @@ -264,6 +277,8 @@ def begin_page(self, summary, show_in_overview=True): return page def finish_page(self): + if not self.enabled: + return # Perform a set of logging actions that are often performed at the end of a caller's method. page = self.page_stack.top() page.final = True From 6c7367433c3a3690dae82762cbdf3819a828bf34 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 21 Jan 2025 18:11:24 -0800 Subject: [PATCH 46/93] add logging to string map --- .../packages/ame/src/ame/settings/check.yaml | 2 +- .../apprentice/_agentic_memory_bank.py | 3 +- .../apprentice/_agentic_memory_controller.py | 2 +- .../apprentice/_string_similarity_map.py | 33 ++++++++++++------- 4 files changed, 25 insertions(+), 15 deletions(-) diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index b2fa03e9f52b..65c1441e4210 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -2,7 +2,7 @@ Evaluator: PageLogger: enabled: 1 - path: ~/pagelogs/temp9 + path: ~/pagelogs/temp10 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index 59d73b8d9e9e..c4337222bdef 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -37,7 +37,8 @@ def __init__(self, path_to_db_dir = os.path.join(memory_dir_path, "string_map") self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") - self.string_map = StringSimilarityMap(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir) + self.string_map = StringSimilarityMap(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir, + logger=self.logger) # Load or create the associated insight dict on disk. self.uid_insight_dict = {} diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py index f85dbf47d2e6..ce0d156dc690 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py @@ -14,7 +14,7 @@ def __init__(self, settings, agent, reset, client, logger): self.client = client self.prompter = Prompter(client, logger) self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], - verbosity=0, reset=reset, logger=logger) + verbosity=3, reset=reset, logger=logger) self.grader = Grader(client, logger) self.logger.finish_page() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py index 9b54d161f772..811af5385193 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py @@ -16,16 +16,20 @@ class StringSimilarityMap: def __init__( self, - verbosity: Optional[int] = 0, + verbosity: Optional[int] = 3, reset: Optional[bool] = False, path_to_db_dir: Optional[str] = None, + logger=None, ): """ Args: - - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print string-pair lists. + - verbosity (Optional, int): 1 to log memory operations, 0 to omit them. 3+ to log string-pair lists. - reset (Optional, bool): True to clear the DB before starting. Default False. - path_to_db_dir (Optional, str): path to the directory where the DB is stored. + - logger (Optional, PageLogger): the PageLogger object to use for logging. """ + self.logger = logger + self.logger.begin_page(summary="StringSimilarityMap.__init__") self.verbosity = verbosity self.path_to_db_dir = path_to_db_dir @@ -42,42 +46,45 @@ def __init__( self.last_string_pair_id = 0 if (not reset) and os.path.exists(self.path_to_dict): if self.verbosity >= 1: - print("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) - print(" Location = {}".format(self.path_to_dict)) + self.logger.info("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) + self.logger.info(" Location = {}".format(self.path_to_dict)) with open(self.path_to_dict, "rb") as f: self.uid_text_dict = pickle.load(f) self.last_string_pair_id = len(self.uid_text_dict) if self.verbosity >= 1: - print("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) + self.logger.info("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) if self.verbosity >= 3: self.list_string_pairs() # Clear the DB if requested. if reset: self.reset_db() + self.logger.finish_page() def list_string_pairs(self): """Prints the string-pair contents.""" - print("LIST OF STRING PAIRS") + self.logger.info("LIST OF STRING PAIRS") for uid, text in self.uid_text_dict.items(): input_text, output_text = text - print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + self.logger.info(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) def save_string_pairs_to_text_files(self): """Saves the contents to text files.""" + self.logger.begin_page(summary="StringSimilarityMap.save_string_pairs_to_text_files") # Delete all files in mem_text dir. for file in os.listdir("mem_text"): os.remove(os.path.join("mem_text", file)) if self.verbosity >= 1: - print("LIST OF STRING PAIRS") + self.logger.info("LIST OF STRING PAIRS") for uid, text in self.uid_text_dict.items(): input_text, output_text = text if self.verbosity >= 1: - print(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + self.logger.info(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) # Save the input string to a file with the same name as the string-pair ID in the mem_text dir, which is a subdir of the dir containing this file. with open("mem_text/{}.txt".format(uid), "w") as file: file.write(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + self.logger.finish_page() def save_string_pairs(self): """Saves self.uid_text_dict to disk.""" @@ -86,12 +93,14 @@ def save_string_pairs(self): def reset_db(self): """Forces immediate deletion of the DB's contents, in memory and on disk.""" + self.logger.begin_page(summary="StringSimilarityMap.reset_db") if self.verbosity >= 1: - print("\nCLEARING STRING-PAIR MAP") + self.logger.info("\nCLEARING STRING-PAIR MAP") self.db_client.delete_collection("string-pairs") self.vec_db = self.db_client.create_collection("string-pairs") self.uid_text_dict = {} self.save_string_pairs() + self.logger.finish_page() def add_input_output_pair(self, input_text: str, output_text: str): """Adds an input-output pair to the vector DB.""" @@ -99,7 +108,7 @@ def add_input_output_pair(self, input_text: str, output_text: str): self.vec_db.add(documents=[input_text], ids=[str(self.last_string_pair_id)]) self.uid_text_dict[str(self.last_string_pair_id)] = input_text, output_text if self.verbosity >= 1: - print("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( + self.logger.info("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( self.last_string_pair_id, input_text, output_text)) if self.verbosity >= 3: self.list_string_pairs() @@ -120,7 +129,7 @@ def get_related_string_pairs(self, query_text: str, n_results: int, threshold: U input_text_2, output_text = self.uid_text_dict[uid] assert input_text == input_text_2 if self.verbosity >= 1: - print("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + self.logger.info("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( input_text, output_text, distance)) string_pairs.append((input_text, output_text, distance)) return string_pairs From db5e07b2821607e95993a8c74eb9c0cbba4833bd Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 22 Jan 2025 11:41:39 -0800 Subject: [PATCH 47/93] simplify logging --- .../ame/src/ame/clients/_client_creator.py | 4 +- .../ame/src/ame/clients/_client_wrapper.py | 8 ++-- python/packages/ame/src/ame/eval.py | 8 ++-- .../eval_learning_from_demonstration.py | 4 +- .../ame/eval_functions/eval_self_teaching.py | 4 +- .../ame/eval_functions/eval_teachability.py | 4 +- .../eval_functions/eval_without_learning.py | 4 +- .../autogen_ext/apprentice/_agent_wrapper.py | 12 ++--- .../apprentice/_agentic_memory_bank.py | 4 +- .../apprentice/_agentic_memory_controller.py | 44 +++++++++---------- .../src/autogen_ext/apprentice/_grader.py | 4 +- .../autogen_ext/apprentice/_page_logger.py | 27 +++++++++--- .../apprentice/_string_similarity_map.py | 12 ++--- .../src/autogen_ext/apprentice/apprentice.py | 16 +++---- 14 files changed, 85 insertions(+), 70 deletions(-) diff --git a/python/packages/ame/src/ame/clients/_client_creator.py b/python/packages/ame/src/ame/clients/_client_creator.py index d61a4927ae06..3c3aaba25046 100644 --- a/python/packages/ame/src/ame/clients/_client_creator.py +++ b/python/packages/ame/src/ame/clients/_client_creator.py @@ -10,7 +10,7 @@ def __init__(self, settings, logger): self.logger = logger def create_client(self): - self.logger.begin_page(summary="ClientCreator.create_client") + self.logger.enter_function() # A few args are shared by all clients. args = {} @@ -48,7 +48,7 @@ def create_client(self): client = ClientWrapper( client, wrapper_settings["mode"], wrapper_settings["session_name"], self.logger) - self.logger.finish_page() + self.logger.leave_function() return client def create_oai_client(self, args): diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index 0a6e3a21d7be..b7e2a7b6ac4e 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -19,7 +19,7 @@ class ClientWrapper: """ def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, logger: PageLogger) -> None: self.logger = logger - self.logger.begin_page(summary="ClientWrapper.__init__") + self.logger.enter_function() self.base_client = base_client self.mode = mode @@ -36,7 +36,7 @@ def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, sess self.logger.info("Check-Replay mode enabled.\nRetrieving session from: " + self.path_to_output_file) self.recorded_items = self.load() - self.logger.finish_page() + self.logger.leave_function() async def create( self, @@ -136,14 +136,14 @@ def check_result(self, result: Any) -> None: raise ValueError(error_str) def finalize(self) -> None: - self.logger.begin_page(summary="ClientWrapper.finalize") + self.logger.enter_function() self.report_result("Total items = " + str(self.next_item_index)) if self.mode == "record": self.save() self.logger.error("\nRecorded session was saved to: " + self.path_to_output_file) elif self.mode == "check-replay": self.logger.error("\nRecorded session was fully replayed and checked.") - self.logger.finish_page() + self.logger.leave_function() def save(self) -> None: # Save the recorded messages and responses to disk. diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 501e5f71886b..7916a82fe951 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -37,7 +37,7 @@ def get_demo_from_file(self, demo_filename): async def test_fast_learner(self, fast_learner, task_description, expected_answer, num_trials, use_memory, client, logger) -> Tuple[int, int]: - logger.begin_page(summary="Evaluator.test_fast_learner") + logger.enter_function() self.logger.info("Testing the fast learner on the given task.\n") @@ -58,11 +58,11 @@ async def test_fast_learner(self, fast_learner, task_description, expected_answe self.logger.info("Answer is INCORRECT.\n") self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) - logger.finish_page() + logger.leave_function() return num_successes, num_trials async def perform_evaluations(self, settings): - self.logger.begin_page(summary="Evaluator.perform_evaluations") + self.logger.enter_function() # Create the client, passed to both the fast_learner and the evaluator. client_creator = ClientCreator(settings=settings["client"], logger=self.logger) @@ -115,7 +115,7 @@ async def perform_evaluations(self, settings): client.finalize() self.logger.flush(final=True) # Finalize the page log - self.logger.finish_page() + self.logger.leave_function() async def run(self, settings_filepath): # Load the settings from yaml. diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index 6cb831900fa2..dd39116efb4d 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -1,7 +1,7 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - logger.begin_page(summary="eval_learning_from_demonstration") + logger.enter_function() num_trials = settings["num_trials"] @@ -38,5 +38,5 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg results_str_2 = "Success rate after demonstration: {}%".format(success_rate) logger.info('\n' + results_str_2) - logger.finish_page() + logger.leave_function() return "\neval_learning_from_demonstration\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index 405bd79c4c5f..d893a4e8112a 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -1,7 +1,7 @@ async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - logger.begin_page(summary="eval_self_teaching") + logger.enter_function() num_loops = settings["num_loops"] num_final_test_trials = settings["num_final_test_trials"] @@ -49,5 +49,5 @@ async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, logger.info('\n' + results_str_1) logger.info(results_str_2) - logger.finish_page() + logger.leave_function() return "\neval_self_teaching\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index 66fbba6cd60e..ca0846a2b635 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -3,7 +3,7 @@ async def eval_teachability(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - logger.begin_page(summary="eval_teachability") + logger.enter_function() # This eval function needs 2 data strings for each run. task_file = run_dict["task_file"] # The task being tested. @@ -45,5 +45,5 @@ async def eval_teachability(fast_learner, evaluator, client, logger, settings, r results_str_2 = "Answer after teaching is INCORRECT." logger.info(results_str_2 + "\n") - logger.finish_page() + logger.leave_function() return "\neval_teachability\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index e1ed5c036334..fe0c84705755 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -1,7 +1,7 @@ async def eval_without_learning(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" - logger.begin_page(summary="eval_without_learning") + logger.enter_function() num_trials = settings["num_trials"] @@ -18,4 +18,4 @@ async def eval_without_learning(fast_learner, evaluator, client, logger, setting success_rate = round((num_successes / num_trials) * 100) logger.info("\nSuccess rate: {}%\n".format(success_rate), flush=True) - logger.finish_page() + logger.leave_function() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py index d9db02f20bb3..6b6557dbabd1 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py @@ -21,7 +21,7 @@ async def assign_task(self, task): """ Assigns a task to the base agent. """ - self.logger.begin_page(summary="AgentWrapper.assign_task") + self.logger.enter_function() # Pass the task through to the base agent. if self.base_agent_name == "MagenticOneGroupChat": @@ -31,11 +31,11 @@ async def assign_task(self, task): else: assert False, "Invalid base agent" - self.logger.finish_page() + self.logger.leave_function() return response, work_history async def assign_task_to_thin_agent(self, task): - self.logger.begin_page(summary="AgentWrapper.assign_task_to_thin_agent") + self.logger.enter_function() self.logger.info(task) @@ -68,11 +68,11 @@ async def assign_task_to_thin_agent(self, task): # Use the response as the work history as well. work_history = response_str - self.logger.finish_page() + self.logger.leave_function() return response_str, work_history async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: - self.logger.begin_page(summary="AgentWrapper.assign_task_to_magentic_one") + self.logger.enter_function() self.logger.info(task) @@ -104,5 +104,5 @@ async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: # MagenticOne's response is the chat history, which we use here as the work history. work_history = response_str - self.logger.finish_page() + self.logger.leave_function() return response_str, work_history diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index c4337222bdef..cfb38c77c371 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -30,7 +30,7 @@ def __init__(self, - logger (Optional, PageLogger): the PageLogger object to use for logging. """ self.logger = logger - self.logger.begin_page(summary="AgenticMemoryBank.__init__") + self.logger.enter_function() self.settings = settings memory_dir_path = os.path.expanduser(self.settings["path"]) @@ -55,7 +55,7 @@ def __init__(self, if reset: self.reset_insights() - self.logger.finish_page() + self.logger.leave_function() def reset(self): self.string_map.reset_db() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py index ce0d156dc690..406673619b6d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py @@ -7,7 +7,7 @@ class AgenticMemoryController: def __init__(self, settings, agent, reset, client, logger): self.logger = logger - self.logger.begin_page(summary="AgenticMemoryController.__init__") + self.logger.enter_function() self.settings = settings self.agent = agent @@ -17,7 +17,7 @@ def __init__(self, settings, agent, reset, client, logger): verbosity=3, reset=reset, logger=logger) self.grader = Grader(client, logger) - self.logger.finish_page() + self.logger.leave_function() def reset_memory(self): self.memory_bank.reset() @@ -29,7 +29,7 @@ async def train_on_task(self, """ Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ - self.logger.begin_page(summary="AgenticMemoryController.train_on_task") + self.logger.enter_function() # Attempt to create useful new memories. self.logger.info("Iterate on the task, possibly discovering a useful new insight.\n") @@ -42,13 +42,13 @@ async def train_on_task(self, # Add this insight to memory. await self.add_insight_to_memory(task, insight) - self.logger.finish_page() + self.logger.leave_function() async def test_on_task(self, task: str, expected_answer: str, num_trials=1): """ Assigns a task to the completion agent, along with any relevant insights/memories. """ - self.logger.begin_page(summary="AgenticMemoryController.test_on_task") + self.logger.enter_function() response = None num_successes = 0 @@ -81,12 +81,12 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1): # Calculate the success rate as a percentage, rounded to the nearest whole number. self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) - self.logger.finish_page() + self.logger.leave_function() return response, num_successes, num_trials async def add_insight_to_memory(self, task: str, insight: str): # Adds an insight to the DB. - self.logger.begin_page(summary="AgenticMemoryController.add_insight_to_memory") + self.logger.enter_function() self.logger.info("\nGIVEN TASK:") self.logger.info(task) @@ -107,11 +107,11 @@ async def add_insight_to_memory(self, task: str, insight: str): # Add the insight to the memory bank. self.memory_bank.add_insight(insight, generalized_task, topics) - self.logger.finish_page() + self.logger.leave_function() async def add_insight_without_task_to_memory(self, insight: str): # Adds an insight to the DB. - self.logger.begin_page(summary="AgenticMemoryController.add_insight_without_task_to_memory") + self.logger.enter_function() self.logger.info("\nGIVEN INSIGHT:") self.logger.info(insight) @@ -125,11 +125,11 @@ async def add_insight_without_task_to_memory(self, insight: str): # Add the insight to the memory bank. self.memory_bank.add_insight(insight, None, topics) - self.logger.finish_page() + self.logger.leave_function() async def retrieve_relevant_insights(self, task: str): # Retrieve insights from the DB that are relevant to the task. - self.logger.begin_page(summary="AgenticMemoryController.retrieve_relevant_insights") + self.logger.enter_function() if self.memory_bank.contains_insights(): self.logger.info("\nCURRENT TASK:") @@ -165,7 +165,7 @@ async def retrieve_relevant_insights(self, task: str): self.logger.info("\nNO INSIGHTS WERE FOUND IN MEMORY") validated_insights = [] - self.logger.finish_page() + self.logger.leave_function() return validated_insights def format_memory_section(self, memories): @@ -180,7 +180,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a """ Attempts to solve the given task multiple times to find a failure case to learn from. """ - self.logger.begin_page(summary="AgenticMemoryController._test_for_failure") + self.logger.enter_function() self.logger.info("\nTask description, including any insights: {}".format(task_plus_insights)) self.logger.info("\nExpected answer: {}\n".format(expected_answer)) @@ -205,11 +205,11 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a failure_found = True break - self.logger.finish_page() + self.logger.leave_function() return failure_found, response, work_history async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int): - self.logger.begin_page(summary="AgenticMemoryController._iterate_on_task") + self.logger.enter_function() self.logger.info("\nTask description: {}".format(task)) self.logger.info("\nExpected answer: {}\n".format(expected_answer)) @@ -266,14 +266,14 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria # Return the answer from the last loop. self.logger.info("\n{}\n".format(final_response)) - self.logger.finish_page() + self.logger.leave_function() return final_response, successful_insight async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): """ Assigns a task to the agent, along with any relevant insights/memories. """ - self.logger.begin_page(summary="AgenticMemoryController.assign_task") + self.logger.enter_function() if use_memory: # Try to retrieve any relevant memories from the DB. @@ -292,11 +292,11 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo else: response, _ = self.agent.assign_task(task) - self.logger.finish_page() + self.logger.leave_function() return response async def handle_user_message(self, text, should_await=True): - self.logger.begin_page(summary="AgenticMemoryController.handle_user_message") + self.logger.enter_function() advice = await self.prompter.extract_advice(text) self.logger.info("Advice: {}".format(advice)) @@ -306,11 +306,11 @@ async def handle_user_message(self, text, should_await=True): response = await self.assign_task(text, use_memory=(advice is None), should_await=should_await) - self.logger.finish_page() + self.logger.leave_function() return response async def learn_from_demonstration(self, task, demonstration): - self.logger.begin_page(summary="AgenticMemoryController.learn_from_demonstration") + self.logger.enter_function() self.logger.info("\nEXAMPLE TASK:") self.logger.info(task) @@ -327,4 +327,4 @@ async def learn_from_demonstration(self, task, demonstration): # Add the insight to the memory bank. self.memory_bank.add_demonstration(task, demonstration, topics) - self.logger.finish_page() + self.logger.leave_function() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index 22a74d89e33f..2b3bd9fac350 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -64,7 +64,7 @@ def clear_history(self): async def is_response_correct(self, task_description, response_to_be_graded, correct_answer): # Returns only the insights that the client verifies are relevant to the task. - self.logger.begin_page(summary="Grader.is_response_correct") + self.logger.enter_function() sys_message = """You are a helpful and thoughtful assistant.""" @@ -103,7 +103,7 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor system_message_content=sys_message, user_content=user_message) self.logger.info("Decision: " + decision) - self.logger.finish_page() + self.logger.leave_function() if self.report_results: self.client.report_result(decision) return decision == "1", extracted_answer diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py index 904cfaca8d3d..096408c99d49 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py @@ -2,6 +2,7 @@ import shutil import time import json +import inspect from typing import List, Dict from autogen_core import Image @@ -263,20 +264,34 @@ def flush(self, final=False): f.write(self.html_closing()) time.sleep(0.1) - def begin_page(self, summary, show_in_overview=True): + def enter_function(self): + # Perform a set of logging actions that are often performed at the beginning of a caller's method. if not self.enabled: return - assert show_in_overview - # Perform a set of logging actions that are often performed at the beginning of a caller's method. - page = self.add_page(summary=summary, show_in_overview=show_in_overview, final=False) + frame = inspect.currentframe().f_back # Get the calling frame + + # Check if it's a method by looking for 'self' or 'cls' in f_locals + if 'self' in frame.f_locals: + class_name = type(frame.f_locals['self']).__name__ + elif 'cls' in frame.f_locals: + class_name = frame.f_locals['cls'].__name__ + else: + class_name = None # Not part of a class + + if class_name is None: # Not part of a class + caller_name = frame.f_code.co_name + else: + caller_name = class_name + '.' + frame.f_code.co_name + # Create a new page for this function. + page = self.add_page(summary=caller_name, show_in_overview=True, final=False) self.page_stack.push(page) self.page_stack.write_stack_to_page(page) - page._add_lines("\nENTER {}".format(summary), flush=True) + page._add_lines("\nENTER {}".format(caller_name), flush=True) return page - def finish_page(self): + def leave_function(self): if not self.enabled: return # Perform a set of logging actions that are often performed at the end of a caller's method. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py index 811af5385193..ded2bce1f1d5 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py @@ -29,7 +29,7 @@ def __init__( - logger (Optional, PageLogger): the PageLogger object to use for logging. """ self.logger = logger - self.logger.begin_page(summary="StringSimilarityMap.__init__") + self.logger.enter_function() self.verbosity = verbosity self.path_to_db_dir = path_to_db_dir @@ -59,7 +59,7 @@ def __init__( # Clear the DB if requested. if reset: self.reset_db() - self.logger.finish_page() + self.logger.leave_function() def list_string_pairs(self): """Prints the string-pair contents.""" @@ -70,7 +70,7 @@ def list_string_pairs(self): def save_string_pairs_to_text_files(self): """Saves the contents to text files.""" - self.logger.begin_page(summary="StringSimilarityMap.save_string_pairs_to_text_files") + self.logger.enter_function() # Delete all files in mem_text dir. for file in os.listdir("mem_text"): os.remove(os.path.join("mem_text", file)) @@ -84,7 +84,7 @@ def save_string_pairs_to_text_files(self): # Save the input string to a file with the same name as the string-pair ID in the mem_text dir, which is a subdir of the dir containing this file. with open("mem_text/{}.txt".format(uid), "w") as file: file.write(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) - self.logger.finish_page() + self.logger.leave_function() def save_string_pairs(self): """Saves self.uid_text_dict to disk.""" @@ -93,14 +93,14 @@ def save_string_pairs(self): def reset_db(self): """Forces immediate deletion of the DB's contents, in memory and on disk.""" - self.logger.begin_page(summary="StringSimilarityMap.reset_db") + self.logger.enter_function() if self.verbosity >= 1: self.logger.info("\nCLEARING STRING-PAIR MAP") self.db_client.delete_collection("string-pairs") self.vec_db = self.db_client.create_collection("string-pairs") self.uid_text_dict = {} self.save_string_pairs() - self.logger.finish_page() + self.logger.leave_function() def add_input_output_pair(self, input_text: str, output_text: str): """Adds an input-output pair to the vector DB.""" diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index fa9a758ca32a..d4c86bd5cc18 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -28,40 +28,40 @@ def reset_memory(self): async def handle_user_message(self, text, should_await=True): """A foreground operation, intended for immediate response to the user.""" - self.logger.begin_page(summary="Apprentice.handle_user_message") + self.logger.enter_function() # Pass the user message through to the memory controller. response = await self.memory_controller.handle_user_message(text, should_await) - self.logger.finish_page() + self.logger.leave_function() return response async def learn_from_demonstration(self, task, demonstration): """A foreground operation, assuming that the task and demonstration are already known.""" - self.logger.begin_page(summary="Apprentice.learn_from_demonstration") + self.logger.enter_function() # Pass the task and demonstration through to the memory controller. await self.memory_controller.learn_from_demonstration(task, demonstration) - self.logger.finish_page() + self.logger.leave_function() async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): """ Assigns a task to the agent, along with any relevant insights/memories. """ - self.logger.begin_page(summary="Apprentice.assign_task") + self.logger.enter_function() # Pass the task through to the memory controller. response = await self.memory_controller.assign_task(task, use_memory, should_await) - self.logger.finish_page() + self.logger.leave_function() return response async def train_on_task(self, task, expected_answer): """A background operation, not intended for immediate response.""" - self.logger.begin_page(summary="Apprentice.train_on_task") + self.logger.enter_function() # Pass the task through to the memory controller. await self.memory_controller.train_on_task(task, expected_answer) - self.logger.finish_page() + self.logger.leave_function() From 07cb3f094961f6def210953de205a7bd7f044e17 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 22 Jan 2025 12:16:02 -0800 Subject: [PATCH 48/93] simplify logging --- .../autogen-ext/src/autogen_ext/apprentice/_page_logger.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py index 096408c99d49..1db5f974e293 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py @@ -29,14 +29,11 @@ def __init__(self, page_logger, index, summary, indent_level, show_in_overview=T self.indentation_text = "" for i in range(self.indent_level): self.indentation_text += "| " - self.full_link = self.link_to_page_file() + self.full_link = f'{self.file_title}' self.line_text = self.indentation_text + self.full_link self.lines = [] self.flush() - def link_to_page_file(self): - return f'{self.file_title}' - def _add_lines(self, line, flush=False): # If the string 'line' consists of multiple lines, separate them into a list. lines_to_add = [] From 9b3f77da3d2f181ee33a17932834b5f81859b233 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 22 Jan 2025 16:13:31 -0800 Subject: [PATCH 49/93] merge from main --- .../ame/src/ame/eval_functions/eval_without_learning.py | 2 +- python/packages/autogen-ext/pyproject.toml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index fe0c84705755..b98d21b6474a 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -16,6 +16,6 @@ async def eval_without_learning(fast_learner, evaluator, client, logger, setting fast_learner=fast_learner, task_description=task_description, expected_answer=expected_answer, num_trials=num_trials, use_memory=True, client=client, logger=logger) success_rate = round((num_successes / num_trials) * 100) - logger.info("\nSuccess rate: {}%\n".format(success_rate), flush=True) + logger.info("\nSuccess rate: {}%\n".format(success_rate)) logger.leave_function() diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index 20579c99baec..b01dccee0e88 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -60,6 +60,7 @@ jupyter-executor = [ "ipykernel>=6.29.5", "nbclient>=0.10.2", ] +apprentice = ["chromadb"] semantic-kernel-core = [ "semantic-kernel>=1.17.1", From a0dee67270c1b10fe23ce9fc1f89cfff3127dd50 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 22 Jan 2025 17:27:40 -0800 Subject: [PATCH 50/93] Changes made by poe check. --- python/packages/ame/pyproject.toml | 12 +- .../ame/src/ame/clients/_client_creator.py | 66 +-- .../ame/src/ame/clients/_client_wrapper.py | 34 +- python/packages/ame/src/ame/eval.py | 24 +- .../eval_learning_from_demonstration.py | 25 +- .../ame/eval_functions/eval_self_teaching.py | 23 +- .../ame/eval_functions/eval_teachability.py | 8 +- .../eval_functions/eval_without_learning.py | 11 +- .../src/autogen_ext/apprentice/__init__.py | 4 +- .../autogen_ext/apprentice/_agent_wrapper.py | 16 +- .../apprentice/_agentic_memory_bank.py | 22 +- .../apprentice/_agentic_memory_controller.py | 42 +- .../src/autogen_ext/apprentice/_grader.py | 32 +- .../autogen_ext/apprentice/_page_logger.py | 48 +- .../src/autogen_ext/apprentice/_prompter.py | 127 ++++-- .../apprentice/_string_similarity_map.py | 21 +- .../src/autogen_ext/apprentice/_utils.py | 1 + .../src/autogen_ext/apprentice/apprentice.py | 4 +- python/uv.lock | 426 +++++++++++++++++- 19 files changed, 747 insertions(+), 199 deletions(-) diff --git a/python/packages/ame/pyproject.toml b/python/packages/ame/pyproject.toml index aee909668c3c..49b5eea467c2 100644 --- a/python/packages/ame/pyproject.toml +++ b/python/packages/ame/pyproject.toml @@ -25,6 +25,16 @@ packages = ["src/ame"] extend = "../../pyproject.toml" include = ["src/**", "tests/*.py"] +[tool.ruff.lint] +# Allow prints in this package +ignore = ["T20"] + [tool.pyright] extends = "../../pyproject.toml" -include = ["src", "tests"] +include = ["src"] + +[tool.poe] +include = "../../shared_tasks.toml" + +[tool.poe.tasks] +mypy = "mypy --config-file ../../pyproject.toml src" diff --git a/python/packages/ame/src/ame/clients/_client_creator.py b/python/packages/ame/src/ame/clients/_client_creator.py index 3c3aaba25046..8d04cdf27175 100644 --- a/python/packages/ame/src/ame/clients/_client_creator.py +++ b/python/packages/ame/src/ame/clients/_client_creator.py @@ -45,8 +45,7 @@ def create_client(self): wrapper_settings = self.settings["ClientWrapper"] if wrapper_settings["enabled"]: # Wrap the client. - client = ClientWrapper( - client, wrapper_settings["mode"], wrapper_settings["session_name"], self.logger) + client = ClientWrapper(client, wrapper_settings["mode"], wrapper_settings["session_name"], self.logger) self.logger.leave_function() return client @@ -57,23 +56,26 @@ def create_oai_client(self, args): client = OpenAIChatCompletionClient(**args) return client, " created through OpenAI" - def create_aoai_client(self, args): # Create an Azure OpenAI client - token_provider = get_bearer_token_provider(DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default") + token_provider = get_bearer_token_provider( + DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" + ) model = self.settings["model"] if model == "gpt-4o-2024-08-06": - azure_deployment = 'gpt-4o-2024-08-06-eval' # This is DeploymentName in the table at https://aka.ms/trapi/models + azure_deployment = ( + "gpt-4o-2024-08-06-eval" # This is DeploymentName in the table at https://aka.ms/trapi/models + ) azure_endpoint = "https://agentic2.openai.azure.com/" elif model == "gpt-4o-2024-05-13": - azure_deployment = 'gpt-4o-2024-05-13-eval' + azure_deployment = "gpt-4o-2024-05-13-eval" azure_endpoint = "https://agentic1.openai.azure.com/" elif model == "o1-preview": - azure_deployment = 'o1-preview-2024-09-12-eval' + azure_deployment = "o1-preview-2024-09-12-eval" azure_endpoint = "https://agentic1.openai.azure.com/" else: assert False, "Unsupported model" - api_version = '2024-12-01-preview' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release + api_version = "2024-12-01-preview" # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release args["azure_ad_token_provider"] = token_provider args["azure_deployment"] = azure_deployment args["azure_endpoint"] = azure_endpoint @@ -81,37 +83,41 @@ def create_aoai_client(self, args): client = AzureOpenAIChatCompletionClient(**args) return client, " created through Azure OpenAI" - def create_trapi_client(self, args): # Create an Azure OpenAI client through TRAPI - token_provider = get_bearer_token_provider(ChainedTokenCredential( - AzureCliCredential(), - DefaultAzureCredential( - exclude_cli_credential=True, - # Exclude other credentials we are not interested in. - exclude_environment_credential=True, - exclude_shared_token_cache_credential=True, - exclude_developer_cli_credential=True, - exclude_powershell_credential=True, - exclude_interactive_browser_credential=True, - exclude_visual_studio_code_credentials=True, - # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs - ) - ), "api://trapi/.default") + token_provider = get_bearer_token_provider( + ChainedTokenCredential( + AzureCliCredential(), + DefaultAzureCredential( + exclude_cli_credential=True, + # Exclude other credentials we are not interested in. + exclude_environment_credential=True, + exclude_shared_token_cache_credential=True, + exclude_developer_cli_credential=True, + exclude_powershell_credential=True, + exclude_interactive_browser_credential=True, + exclude_visual_studio_code_credentials=True, + # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs + ), + ), + "api://trapi/.default", + ) model = self.settings["model"] if model == "gpt-4o-2024-08-06": - azure_deployment = 'gpt-4o_2024-08-06' # This is DeploymentName in the table at https://aka.ms/trapi/models + azure_deployment = "gpt-4o_2024-08-06" # This is DeploymentName in the table at https://aka.ms/trapi/models elif model == "gpt-4o-2024-05-13": - azure_deployment = 'gpt-4o_2024-05-13' + azure_deployment = "gpt-4o_2024-05-13" elif model == "o1-preview": - azure_deployment = 'o1-preview_2024-09-12' + azure_deployment = "o1-preview_2024-09-12" elif model == "o1": - azure_deployment = 'o1_2024-12-17' + azure_deployment = "o1_2024-12-17" else: assert False, "Unsupported model" - trapi_suffix = 'msraif/shared' # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models - endpoint = f'https://trapi.research.microsoft.com/{trapi_suffix}' - api_version = '2024-12-01-preview' # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release + trapi_suffix = ( + "msraif/shared" # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models + ) + endpoint = f"https://trapi.research.microsoft.com/{trapi_suffix}" + api_version = "2024-12-01-preview" # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release args["azure_ad_token_provider"] = token_provider args["azure_deployment"] = azure_deployment args["azure_endpoint"] = endpoint diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index b7e2a7b6ac4e..c8b0542ec998 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -17,7 +17,10 @@ class ClientWrapper: Wraps a client object to record messages and responses (in record mode) or check the messages and replay the responses (in check-replay mode). """ - def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, logger: PageLogger) -> None: + + def __init__( + self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, logger: PageLogger + ) -> None: self.logger = logger self.logger.enter_function() @@ -39,19 +42,23 @@ def __init__(self, base_client: AzureOpenAIChatCompletionClient, mode: str, sess self.logger.leave_function() async def create( - self, - messages: Sequence[LLMMessage], - tools: Sequence[Tool | ToolSchema] = [], - json_output: Optional[bool] = None, - extra_create_args: Mapping[str, Any] = {}, - cancellation_token: Optional[CancellationToken] = None, + self, + messages: Sequence[LLMMessage], + tools: Sequence[Tool | ToolSchema] = [], + json_output: Optional[bool] = None, + extra_create_args: Mapping[str, Any] = {}, + cancellation_token: Optional[CancellationToken] = None, ) -> CreateResult: response = None if self.mode == "pass-through": - response = await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) + response = await self.base_client.create( + messages, tools, json_output, extra_create_args, cancellation_token + ) elif self.mode == "record": - response = await self.base_client.create(messages, tools, json_output, extra_create_args, cancellation_token) + response = await self.base_client.create( + messages, tools, json_output, extra_create_args, cancellation_token + ) self.record_one_turn(messages, response) elif self.mode == "check-replay": response = self.check_and_replay_one_turn(messages) @@ -63,7 +70,10 @@ async def create( def convert_messages(self, messages: Sequence[LLMMessage]) -> List[Dict[str, str]]: converted_messages = [] for message in messages: - turn = {"content": message.content, "source": 'System' if message.type == "SystemMessage" else message.source} + turn = { + "content": message.content, + "source": "System" if message.type == "SystemMessage" else message.source, + } converted_messages.append(turn) return converted_messages @@ -131,7 +141,9 @@ def check_result(self, result: Any) -> None: self.logger.error(error_str) raise ValueError(error_str) if result != recorded_result["result"]: - error_str = "\nRecorded result ({}) doesn't match the current result ({}).".format(recorded_result["result"], result) + error_str = "\nRecorded result ({}) doesn't match the current result ({}).".format( + recorded_result["result"], result + ) self.logger.error(error_str) raise ValueError(error_str) diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 7916a82fe951..4168cb6988e5 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -14,7 +14,7 @@ def __init__(self): def get_task_description_and_answer_from_file(self, task_filename): path_to_this_file = os.path.abspath(__file__) dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, 'task_data', 'tasks', task_filename + '.yaml') + task_filepath = os.path.join(dir_of_this_file, "task_data", "tasks", task_filename + ".yaml") with open(task_filepath, "r") as file: task_details = yaml.load(file, Loader=yaml.FullLoader) return task_details["task_description"], task_details["expected_answer"] @@ -22,7 +22,7 @@ def get_task_description_and_answer_from_file(self, task_filename): def get_advice_from_file(self, advice_filename): path_to_this_file = os.path.abspath(__file__) dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, 'task_data', 'advice', advice_filename + '.yaml') + task_filepath = os.path.join(dir_of_this_file, "task_data", "advice", advice_filename + ".yaml") with open(task_filepath, "r") as file: advice_dict = yaml.load(file, Loader=yaml.FullLoader) return advice_dict["advice"] @@ -30,13 +30,14 @@ def get_advice_from_file(self, advice_filename): def get_demo_from_file(self, demo_filename): path_to_this_file = os.path.abspath(__file__) dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, 'task_data', 'demos', demo_filename + '.yaml') + task_filepath = os.path.join(dir_of_this_file, "task_data", "demos", demo_filename + ".yaml") with open(task_filepath, "r") as file: demo_dict = yaml.load(file, Loader=yaml.FullLoader) return demo_dict["demo"] - async def test_fast_learner(self, fast_learner, task_description, expected_answer, num_trials, - use_memory, client, logger) -> Tuple[int, int]: + async def test_fast_learner( + self, fast_learner, task_description, expected_answer, num_trials, use_memory, client, logger + ) -> Tuple[int, int]: logger.enter_function() self.logger.info("Testing the fast learner on the given task.\n") @@ -49,7 +50,8 @@ async def test_fast_learner(self, fast_learner, task_description, expected_answe self.logger.info("Try to solve the task.\n") response = await fast_learner.assign_task(task_description, use_memory=use_memory) response_is_correct, extracted_answer = await grader.is_response_correct( - task_description, response, expected_answer) + task_description, response, expected_answer + ) self.logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: self.logger.info("Answer is CORRECT.\n") @@ -74,18 +76,18 @@ async def perform_evaluations(self, settings): try: module = importlib.import_module(module_path) except ModuleNotFoundError: - print('Failed to import {}'.format(module_path)) + print("Failed to import {}".format(module_path)) raise class_name = fast_learner_settings["class_name"] try: fast_learner_class = getattr(module, class_name) except AttributeError: - print('Failed to import {}.{}'.format(module_path, class_name)) + print("Failed to import {}.{}".format(module_path, class_name)) raise try: fast_learner = fast_learner_class(fast_learner_settings, self, client, self.logger) except Exception as err: - print("Error creating \"{}\": {}".format(fast_learner_class, err)) + print('Error creating "{}": {}'.format(fast_learner_class, err)) raise # Execute each evaluation. @@ -96,13 +98,13 @@ async def perform_evaluations(self, settings): try: module = importlib.import_module(module_path) except ModuleNotFoundError: - print('Failed to import {}'.format(module_path)) + print("Failed to import {}".format(module_path)) raise function_name = function_settings["function_name"] try: eval_function = getattr(module, function_name) except AttributeError: - print('Failed to import {}.{}'.format(module_path, function_name)) + print("Failed to import {}.{}".format(module_path, function_name)) raise # Call the eval function for each listed run. diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index dd39116efb4d..9ffce8c1598d 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -1,4 +1,3 @@ - async def eval_learning_from_demonstration(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" logger.enter_function() @@ -19,11 +18,17 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg logger.info("To get a baseline, clear memory, then assign the task.") fast_learner.reset_memory() num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, - num_trials=num_trials, use_memory=True, client=client, logger=logger) + fast_learner=fast_learner, + task_description=task_description_1, + expected_answer=expected_answer_1, + num_trials=num_trials, + use_memory=True, + client=client, + logger=logger, + ) success_rate = round((num_successes / num_trials) * 100) results_str_1 = "Baseline success rate: {}%".format(success_rate) - logger.info('\n' + results_str_1) + logger.info("\n" + results_str_1) # Provide a demonstration for a similar but different task. logger.info("Demonstrate a solution to a similar task.") @@ -32,11 +37,17 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg # Now test again to see if the demonstration (retrieved from memory) helps. logger.info("Assign the task again to see if the demonstration helps.") num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, - num_trials=num_trials, use_memory=True, client=client, logger=logger) + fast_learner=fast_learner, + task_description=task_description_1, + expected_answer=expected_answer_1, + num_trials=num_trials, + use_memory=True, + client=client, + logger=logger, + ) success_rate = round((num_successes / num_trials) * 100) results_str_2 = "Success rate after demonstration: {}%".format(success_rate) - logger.info('\n' + results_str_2) + logger.info("\n" + results_str_2) logger.leave_function() return "\neval_learning_from_demonstration\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index d893a4e8112a..5bd0213e5389 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -1,4 +1,3 @@ - async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" logger.enter_function() @@ -26,15 +25,27 @@ async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, # Test on the first task. num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_description=task_description_1, expected_answer=expected_answer_1, - num_trials=num_final_test_trials, use_memory=True, client=client, logger=logger) + fast_learner=fast_learner, + task_description=task_description_1, + expected_answer=expected_answer_1, + num_trials=num_final_test_trials, + use_memory=True, + client=client, + logger=logger, + ) logger.info("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100))) total_num_successes_1 += num_successes # Test on the second task. num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_description=task_description_2, expected_answer=expected_answer_2, - num_trials=num_final_test_trials, use_memory=True, client=client, logger=logger) + fast_learner=fast_learner, + task_description=task_description_2, + expected_answer=expected_answer_2, + num_trials=num_final_test_trials, + use_memory=True, + client=client, + logger=logger, + ) logger.info("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100))) total_num_successes_2 += num_successes @@ -46,7 +57,7 @@ async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, results_str_1 = "Overall task 1 success rate: {}%".format(overall_success_rate_1) results_str_2 = "Overall task 2 success rate: {}%".format(overall_success_rate_2) - logger.info('\n' + results_str_1) + logger.info("\n" + results_str_1) logger.info(results_str_2) logger.leave_function() diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index ca0846a2b635..8fda2cd3d49c 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -20,7 +20,9 @@ async def eval_teachability(fast_learner, evaluator, client, logger, settings, r # Check the response. grader = Grader(client, logger) - response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) + response_is_correct, extracted_answer = await grader.is_response_correct( + task_description, response, expected_answer + ) logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: results_str_1 = "Answer before teaching is CORRECT." @@ -37,7 +39,9 @@ async def eval_teachability(fast_learner, evaluator, client, logger, settings, r response = await fast_learner.handle_user_message(task_description) # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct(task_description, response, expected_answer) + response_is_correct, extracted_answer = await grader.is_response_correct( + task_description, response, expected_answer + ) logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: results_str_2 = "Answer after teaching is CORRECT." diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index b98d21b6474a..f1cf9095039d 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -1,4 +1,3 @@ - async def eval_without_learning(fast_learner, evaluator, client, logger, settings, run_dict): """An evaluation""" logger.enter_function() @@ -13,8 +12,14 @@ async def eval_without_learning(fast_learner, evaluator, client, logger, setting logger.info("To get a baseline, clear memory, then assign the task.") fast_learner.reset_memory() num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, task_description=task_description, expected_answer=expected_answer, - num_trials=num_trials, use_memory=True, client=client, logger=logger) + fast_learner=fast_learner, + task_description=task_description, + expected_answer=expected_answer, + num_trials=num_trials, + use_memory=True, + client=client, + logger=logger, + ) success_rate = round((num_successes / num_trials) * 100) logger.info("\nSuccess rate: {}%\n".format(success_rate)) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py index 1e852b574c81..a086019e85e5 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py @@ -1,5 +1,5 @@ -from .apprentice import Apprentice -from ._page_logger import PageLogger from ._grader import Grader +from ._page_logger import PageLogger +from .apprentice import Apprentice __all__ = ["Apprentice", "PageLogger", "Grader"] diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py index 6b6557dbabd1..5151356ddb8a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py @@ -1,13 +1,15 @@ +from typing import Tuple + from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.teams import MagenticOneGroupChat -from autogen_ext.agents.web_surfer import MultimodalWebSurfer -from autogen_ext.agents.web_surfer._utils import message_content_to_str from autogen_agentchat.ui._console import Console from autogen_core.models import ( SystemMessage, UserMessage, ) -from typing import Tuple + +from autogen_ext.agents.web_surfer import MultimodalWebSurfer +from autogen_ext.agents.web_surfer._utils import message_content_to_str class AgentWrapper: @@ -61,8 +63,9 @@ async def assign_task_to_thin_agent(self, task): response_str = response.content # Log the model call - self.logger.add_model_call(summary="Ask the model to complete the task", - input_messages=input_messages, response=response) + self.logger.add_model_call( + summary="Ask the model to complete the task", input_messages=input_messages, response=response + ) self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) # Use the response as the work history as well. @@ -79,7 +82,8 @@ async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: general_agent = AssistantAgent( "general_agent", self.client, - description="A general GPT-4o AI assistant capable of performing a variety of tasks.", ) + description="A general GPT-4o AI assistant capable of performing a variety of tasks.", + ) web_surfer = MultimodalWebSurfer( name="web_surfer", diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index cfb38c77c371..fb97e665b5f2 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -1,7 +1,8 @@ import os -from dataclasses import dataclass import pickle -from typing import Dict, Optional, Union, List +from dataclasses import dataclass +from typing import Dict, List, Optional, Union + from ._string_similarity_map import StringSimilarityMap @@ -17,7 +18,9 @@ class AgenticMemoryBank: """ Stores task-completion insights in a vector DB for later retrieval. """ - def __init__(self, + + def __init__( + self, settings: Dict, verbosity: Optional[int] = 0, reset: Optional[bool] = False, @@ -37,8 +40,9 @@ def __init__(self, path_to_db_dir = os.path.join(memory_dir_path, "string_map") self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") - self.string_map = StringSimilarityMap(verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir, - logger=self.logger) + self.string_map = StringSimilarityMap( + verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir, logger=self.logger + ) # Load or create the associated insight dict on disk. self.uid_insight_dict = {} @@ -88,12 +92,16 @@ def add_insight(self, insight_str: str, task_str: Optional[str] = None, topics: def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional[List[str]] = None): """Returns any insights from the memory bank that are relevant to the given task or topics.""" - assert (task_str is not None) or (topics is not None), "Either the task string or the topics list must be provided." + assert (task_str is not None) or ( + topics is not None + ), "Either the task string or the topics list must be provided." assert topics is not None, "For now, the topics list is always required, because it won't be generated." # Build a dict of insight-relevance pairs. insight_relevance_dict = {} - relevance_conversion_threshold = 1.7 # The approximate borderline between relevant and irrelevant topic matches. + relevance_conversion_threshold = ( + 1.7 # The approximate borderline between relevant and irrelevant topic matches. + ) # Process the matching topics. matches = [] # Each match is a tuple: (topic, insight, distance) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py index 406673619b6d..0b14042a156d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py @@ -1,7 +1,8 @@ from typing import Callable, List -from ._prompter import Prompter + from ._agentic_memory_bank import AgenticMemoryBank from ._grader import Grader +from ._prompter import Prompter class AgenticMemoryController: @@ -13,8 +14,9 @@ def __init__(self, settings, agent, reset, client, logger): self.agent = agent self.client = client self.prompter = Prompter(client, logger) - self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], - verbosity=3, reset=reset, logger=logger) + self.memory_bank = AgenticMemoryBank( + self.settings["AgenticMemoryBank"], verbosity=3, reset=reset, logger=logger + ) self.grader = Grader(client, logger) self.logger.leave_function() @@ -22,10 +24,11 @@ def __init__(self, settings, agent, reset, client, logger): def reset_memory(self): self.memory_bank.reset() - async def train_on_task(self, - task: str, # The task to be completed. - expected_answer: str, # The expected answer to the task. - ): + async def train_on_task( + self, + task: str, # The task to be completed. + expected_answer: str, # The expected answer to the task. + ): """ Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ @@ -33,8 +36,9 @@ async def train_on_task(self, # Attempt to create useful new memories. self.logger.info("Iterate on the task, possibly discovering a useful new insight.\n") - _, insight = await self._iterate_on_task(task, expected_answer, - self.settings["max_train_trials"], self.settings["max_test_trials"]) + _, insight = await self._iterate_on_task( + task, expected_answer, self.settings["max_train_trials"], self.settings["max_test_trials"] + ) if insight is None: self.logger.info("No useful insight was discovered.\n") else: @@ -63,14 +67,15 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1): self.logger.info("Relevant insights were retrieved from memory.\n") memory_section = self.format_memory_section(filtered_insights) if len(memory_section) > 0: - task_plus_insights = task + '\n\n' + memory_section + task_plus_insights = task + "\n\n" + memory_section # Attempt to solve the task. self.logger.info("Try to solve the task.\n") response, _ = await self.agent.assign_task(task_plus_insights) response_is_correct, extracted_answer = await self.grader.is_response_correct( - task, response, expected_answer) + task, response, expected_answer + ) self.logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: self.logger.info("Answer is CORRECT.\n") @@ -173,7 +178,7 @@ def format_memory_section(self, memories): if len(memories) > 0: memory_section = "## Important insights that may help solve tasks like this\n" for mem in memories: - memory_section += ('- ' + mem + '\n') + memory_section += "- " + mem + "\n" return memory_section async def _test_for_failure(self, task: str, task_plus_insights: str, expected_answer: str, num_trials: int): @@ -196,7 +201,8 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a response, work_history = await self.agent.assign_task(task_plus_insights) response_is_correct, extracted_answer = await self.grader.is_response_correct( - task, response, expected_answer) + task, response, expected_answer + ) self.logger.info("Extracted answer: {}".format(extracted_answer)) if response_is_correct: self.logger.info("Answer is CORRECT.\n") @@ -233,11 +239,12 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria else: memory_section = self.format_memory_section(old_insights) if len(memory_section) > 0: - task_plus_insights += '\n\n' + memory_section + task_plus_insights += "\n\n" + memory_section # Can we find a failure case to learn from? failure_found, response, work_history = await self._test_for_failure( - task, task_plus_insights, expected_answer, max_test_trials) + task, task_plus_insights, expected_answer, max_test_trials + ) if not failure_found: # No. Time to exit the loop. self.logger.info("\nResponse is CORRECT.\n Stop looking for insights.\n") @@ -259,7 +266,8 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria # Try to learn from this failure. self.logger.info("\nResponse is INCORRECT. Try to learn from this failure.\n") insight = await self.prompter.learn_from_failure( - task, memory_section, response, expected_answer, work_history, new_insights) + task, memory_section, response, expected_answer, work_history, new_insights + ) self.logger.info("\nInsight: {}\n".format(insight)) new_insights.append(insight) last_insight = insight @@ -281,7 +289,7 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo if len(filtered_insights) > 0: self.logger.info("Relevant insights were retrieved from memory.\n") memory_section = self.format_memory_section(filtered_insights) - task = task + '\n\n' + memory_section + task = task + "\n\n" + memory_section # if len(memory_section) > 0: # Best to include this condition, but it will require new recordings. # task = task + '\n\n' + memory_section diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index 2b3bd9fac350..3d2a1b9f5f29 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -2,10 +2,10 @@ from autogen_core.models import ( AssistantMessage, + CreateResult, LLMMessage, SystemMessage, UserMessage, - CreateResult, ) from ._utils import UserContent @@ -17,12 +17,14 @@ def __init__(self, client, logger): self.logger = logger # Check whether to report results to the client. - self.report_results = hasattr(self.client, 'report_result') + self.report_results = hasattr(self.client, "report_result") # Create the chat history self._chat_history: List[LLMMessage] = [] - async def call_model(self, summary, user_content: UserContent = None, system_message_content=None, keep_these_messages=True): + async def call_model( + self, summary, user_content: UserContent = None, system_message_content=None, keep_these_messages=True + ): # Prepare the input message list if system_message_content is None: system_message_content = "You are a helpful assistant." @@ -68,30 +70,35 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor sys_message = """You are a helpful and thoughtful assistant.""" - user_message = ["""Your job is to extract a possible answer to the following question from the given text. + user_message = [ + """Your job is to extract a possible answer to the following question from the given text. - First review the following task. - Then review the text that follows, which may an answer, plus reasoning that led to the answer. - Do not attempt to actually solve the task yourself. - Don't try to judge whether the reasoning steps were correct. - Simply respond by summarizing the answer described in the text, omitting any other parts of the text. -- If no answer is present can be extracted from the text, simply reply "None"."""] +- If no answer is present can be extracted from the text, simply reply "None".""" + ] user_message.append("\n# Task description") user_message.append(task_description) user_message.append("\n# Text that may contain an answer") user_message.append(response_to_be_graded) self.clear_history() - extracted_answer = await self.call_model(summary="Ask the model to extract the answer", - system_message_content=sys_message, user_content=user_message) + extracted_answer = await self.call_model( + summary="Ask the model to extract the answer", system_message_content=sys_message, user_content=user_message + ) self.logger.info("Extracted answer: " + extracted_answer) - user_message = ["""Your job is to decide whether a given answer to a task is correct or not. + user_message = [ + """Your job is to decide whether a given answer to a task is correct or not. - You will be given the task description and the correct, gold-standard answer, along with the answer to be graded. - In general, an answer is correct if it is equivalent to the correct answer. - Specifically, the given answer must contain the important information from the correct answer, and must not in any way contradict the correct answer. - Ignore any differences of grammar, spelling mistakes, punctuation, capitalization, formatting, or extra commentary. - An answer should be considered correct if it omits information that is clearly inferred. - For instance, if the correct answer is "Paris, France", the answer "Paris" should be considered correct. -- Respond with a single character: '1' if the answer to be graded is correct", '0' if not."""] +- Respond with a single character: '1' if the answer to be graded is correct", '0' if not.""" + ] user_message.append("\n# Task description") user_message.append(task_description) user_message.append("\n# Correct answer") @@ -99,8 +106,11 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor user_message.append("\n# Answer to be graded") user_message.append(extracted_answer) self.clear_history() - decision = await self.call_model(summary="Ask the model to check the answer for correctness", - system_message_content=sys_message, user_content=user_message) + decision = await self.call_model( + summary="Ask the model to check the answer for correctness", + system_message_content=sys_message, + user_content=user_message, + ) self.logger.info("Decision: " + decision) self.logger.leave_function() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py index 1db5f974e293..d91a196dd7f9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py @@ -1,19 +1,18 @@ +import inspect +import json import os import shutil import time -import json -import inspect -from typing import List, Dict +from typing import Dict, List from autogen_core import Image from autogen_core.models import ( AssistantMessage, ChatCompletionClient, + FunctionExecutionResultMessage, LLMMessage, SystemMessage, UserMessage, - AssistantMessage, - FunctionExecutionResultMessage, ) @@ -25,7 +24,7 @@ def __init__(self, page_logger, index, summary, indent_level, show_in_overview=T self.indent_level = indent_level self.show_in_overview = show_in_overview self.final = final - self.file_title = self.index_str + ' ' + self.summary + self.file_title = self.index_str + " " + self.summary self.indentation_text = "" for i in range(self.indent_level): self.indentation_text += "| " @@ -56,11 +55,11 @@ def link_to_image(self, image_path, description): def add_link_to_image(self, description, source_image_path): # Copy the image to the run directory. # Remove every character from the string 'description' that is not alphanumeric or a space. - description = ''.join(e for e in description if e.isalnum() or e.isspace()) - target_image_filename = (str(self.page_logger.get_next_page_id()) + ' - ' + description) + description = "".join(e for e in description if e.isalnum() or e.isspace()) + target_image_filename = str(self.page_logger.get_next_page_id()) + " - " + description local_image_path = os.path.join(self.page_logger.log_dir, target_image_filename) shutil.copyfile(source_image_path, local_image_path) - self._add_lines('\n' + description) + self._add_lines("\n" + description) self._add_lines(self.link_to_image(target_image_filename, description), flush=True) def flush(self): @@ -73,7 +72,7 @@ def flush(self): try: f.write(f"{line}\n") except UnicodeEncodeError: - f.write(f"UnicodeEncodeError in this line.\n") + f.write("UnicodeEncodeError in this line.\n") f.write(self.page_logger.html_closing()) f.flush() time.sleep(0.1) @@ -125,18 +124,20 @@ def html_closing(self): def add_page(self, summary, show_in_overview=True, final=True): # Add a page to the log. - page = Page(page_logger=self, - index=self.get_next_page_id(), - summary=summary, - indent_level=len(self.page_stack.stack), - show_in_overview=show_in_overview, - final=final) + page = Page( + page_logger=self, + index=self.get_next_page_id(), + summary=summary, + indent_level=len(self.page_stack.stack), + show_in_overview=show_in_overview, + final=final, + ) self.pages.append(page) self.flush() if len(self.page_stack.stack) > 0: # Insert a link to the new page into the calling page. - self._add_lines('\n' + page.full_link, flush=True) + self._add_lines("\n" + page.full_link, flush=True) return page @@ -233,7 +234,7 @@ def add_model_call(self, summary, input_messages, response): page._add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) page._add_lines("{} completion tokens".format(response.usage.completion_tokens)) for i, m in enumerate(input_messages): - page._add_lines('\n' + self.message_source(m)) + page._add_lines("\n" + self.message_source(m)) page._add_lines(self.message_content(page, message=m)) page._add_lines("\n" + self.decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) page._add_lines(self.message_content(page, message=response)) @@ -268,17 +269,17 @@ def enter_function(self): frame = inspect.currentframe().f_back # Get the calling frame # Check if it's a method by looking for 'self' or 'cls' in f_locals - if 'self' in frame.f_locals: - class_name = type(frame.f_locals['self']).__name__ - elif 'cls' in frame.f_locals: - class_name = frame.f_locals['cls'].__name__ + if "self" in frame.f_locals: + class_name = type(frame.f_locals["self"]).__name__ + elif "cls" in frame.f_locals: + class_name = frame.f_locals["cls"].__name__ else: class_name = None # Not part of a class if class_name is None: # Not part of a class caller_name = frame.f_code.co_name else: - caller_name = class_name + '.' + frame.f_code.co_name + caller_name = class_name + "." + frame.f_code.co_name # Create a new page for this function. page = self.add_page(summary=caller_name, show_in_overview=True, final=False) @@ -302,6 +303,7 @@ class PageStack: """ A call stack containing a list of currently active tasks and policies in the order they called each other. """ + def __init__(self): self.stack = [] diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py index cdb2d7fe0539..b75fa9bd1aad 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py @@ -1,17 +1,16 @@ import time from typing import List +from autogen_core import FunctionCall, Image from autogen_core.models import ( AssistantMessage, + CreateResult, LLMMessage, SystemMessage, UserMessage, - CreateResult, ) -from autogen_core import FunctionCall, Image - -from ._utils import message_content_to_str, UserContent, text_from_user_content, single_image_from_user_content +from ._utils import UserContent, message_content_to_str, single_image_from_user_content, text_from_user_content class Prompter: @@ -19,15 +18,16 @@ def __init__(self, client, logger): self.client = client self.logger = logger self.default_system_message_content = "You are a helpful assistant." - self.time_spent_in_model_calls = 0. + self.time_spent_in_model_calls = 0.0 self.num_model_calls = 0 self.start_time = time.time() # Create the chat history self._chat_history: List[LLMMessage] = [] - async def call_model(self, summary, user_content: UserContent = None, system_message_content=None, - keep_these_messages=True): + async def call_model( + self, summary, user_content: UserContent = None, system_message_content=None, keep_these_messages=True + ): # Prepare the input message list if system_message_content is None: system_message_content = self.default_system_message_content @@ -47,7 +47,9 @@ async def call_model(self, summary, user_content: UserContent = None, system_mes if part is None: print("part is None") print("message = ", message) - assert isinstance(part, str) or isinstance(part, Image), "Invalid message content type: {}".format(type(part)) + assert isinstance(part, str) or isinstance(part, Image), "Invalid message content type: {}".format( + type(part) + ) # Call the model start_time = time.time() @@ -76,8 +78,9 @@ async def call_model(self, summary, user_content: UserContent = None, system_mes def clear_history(self): self._chat_history = [] - async def learn_from_failure(self, task_description, memory_section, final_response, expected_answer, - work_history, insights): + async def learn_from_failure( + self, task_description, memory_section, final_response, expected_answer, work_history, insights + ): # Try to create an insight to help avoid this failure in the future. sys_message = """- You are a patient and thorough teacher. @@ -102,19 +105,32 @@ async def learn_from_failure(self, task_description, memory_section, final_respo user_message.append("\n**----- END OF STUDENTS' WORK -----**\n\n") user_message.append( - "# Now carefully review the students' work above, explaining in detail what the students did right and what they did wrong.\n") + "# Now carefully review the students' work above, explaining in detail what the students did right and what they did wrong.\n" + ) self.clear_history() - await self.call_model(summary="Ask the model to learn from this failure", - system_message_content=sys_message, user_content=user_message) + await self.call_model( + summary="Ask the model to learn from this failure", + system_message_content=sys_message, + user_content=user_message, + ) user_message = [ - "Now put yourself in the mind of the students. What misconception led them to their incorrect answer?"] - await self.call_model(summary="Ask the model to state the misconception", - system_message_content=sys_message, user_content=user_message) + "Now put yourself in the mind of the students. What misconception led them to their incorrect answer?" + ] + await self.call_model( + summary="Ask the model to state the misconception", + system_message_content=sys_message, + user_content=user_message, + ) - user_message = ["Please express your key insights in the form of short, general advice that will be given to the students. Just one or two sentences, or they won't bother to read it."] - insight = await self.call_model(summary="Ask the model to formulate a concise insight", - system_message_content=sys_message, user_content=user_message) + user_message = [ + "Please express your key insights in the form of short, general advice that will be given to the students. Just one or two sentences, or they won't bother to read it." + ] + insight = await self.call_model( + summary="Ask the model to formulate a concise insight", + system_message_content=sys_message, + user_content=user_message, + ) return insight async def find_index_topics(self, input_string): @@ -135,8 +151,9 @@ async def find_index_topics(self, input_string): user_message.append(input_string) self.clear_history() - topics = await self.call_model(summary="Ask the model to extract topics", - system_message_content=sys_message, user_content=user_message) + topics = await self.call_model( + summary="Ask the model to extract topics", system_message_content=sys_message, user_content=user_message + ) # Parse the topics into a python list. topic_list = [] @@ -151,21 +168,36 @@ async def generalize_task(self, task_description): sys_message = """You are a helpful and thoughtful assistant.""" - user_message = ["We have been given a task description. Our job is not to complete the task, but merely rephrase the task in simpler, more general terms, if possible. Please reach through the following task description, then explain your understanding of the task in detail, as a single, flat list of all the important points."] + user_message = [ + "We have been given a task description. Our job is not to complete the task, but merely rephrase the task in simpler, more general terms, if possible. Please reach through the following task description, then explain your understanding of the task in detail, as a single, flat list of all the important points." + ] user_message.append("\n# Task description") user_message.append(task_description) self.clear_history() - await self.call_model(summary="Ask the model to rephrase the task in a list of important points", - system_message_content=sys_message, user_content=user_message) + await self.call_model( + summary="Ask the model to rephrase the task in a list of important points", + system_message_content=sys_message, + user_content=user_message, + ) - user_message = ["Do you see any parts of this list that are irrelevant to actually solving the task? If so, explain which items are irrelevant."] - await self.call_model(summary="Ask the model to identify irrelevant points", - system_message_content=sys_message, user_content=user_message) + user_message = [ + "Do you see any parts of this list that are irrelevant to actually solving the task? If so, explain which items are irrelevant." + ] + await self.call_model( + summary="Ask the model to identify irrelevant points", + system_message_content=sys_message, + user_content=user_message, + ) - user_message = ["Revise your original list to include only the most general terms, those that are critical to solving the task, removing any themes or descriptions that are not essential to the solution. Your final list may be shorter, but do not leave out any part of the task that is needed for solving the task. Do not add any additional commentary either before or after the list."] - generalized_task = await self.call_model(summary="Ask the model to make a final list of general terms", - system_message_content=sys_message, user_content=user_message) + user_message = [ + "Revise your original list to include only the most general terms, those that are critical to solving the task, removing any themes or descriptions that are not essential to the solution. Your final list may be shorter, but do not leave out any part of the task that is needed for solving the task. Do not add any additional commentary either before or after the list." + ] + generalized_task = await self.call_model( + summary="Ask the model to make a final list of general terms", + system_message_content=sys_message, + user_content=user_message, + ) return generalized_task async def validate_insight(self, insight, task_description): @@ -173,43 +205,54 @@ async def validate_insight(self, insight, task_description): sys_message = """You are a helpful and thoughtful assistant.""" - user_message = ["""We have been given a potential insight that may or may not be useful for solving a given task. + user_message = [ + """We have been given a potential insight that may or may not be useful for solving a given task. - First review the following task. - Then review the insight that follows, and consider whether it might help solve the given task. - Do not attempt to actually solve the task. -- Reply with a single character, '1' if the insight may be useful, or '0' if it is not."""] +- Reply with a single character, '1' if the insight may be useful, or '0' if it is not.""" + ] user_message.append("\n# Task description") user_message.append(task_description) user_message.append("\n# Possibly useful insight") user_message.append(insight) self.clear_history() - response = await self.call_model(summary="Ask the model to validate the insight", - system_message_content=sys_message, user_content=user_message) + response = await self.call_model( + summary="Ask the model to validate the insight", + system_message_content=sys_message, + user_content=user_message, + ) return response == "1" async def extract_task(self, text): # Returns a task from the given text, or None if none is found. sys_message = """You are a helpful and thoughtful assistant.""" - user_message = ["""Does the following text contain a question or a some task we are being asked to perform? + user_message = [ + """Does the following text contain a question or a some task we are being asked to perform? - If so, please reply with the full question or task description, along with any supporting information, but without adding extra commentary or formatting. - If the task is just to remember something, that doesn't count as a task, so don't include it. -- If there is no question or task in the text, simply write "None" with no punctuation."""] +- If there is no question or task in the text, simply write "None" with no punctuation.""" + ] user_message.append("\n# Text to analyze") user_message.append(text) self.clear_history() - response = await self.call_model(summary="Ask the model to extract a task", - system_message_content=sys_message, user_content=user_message) + response = await self.call_model( + summary="Ask the model to extract a task", system_message_content=sys_message, user_content=user_message + ) return response if response != "None" else None async def extract_advice(self, text): # Returns a task from the given text, or None if none is found. sys_message = """You are a helpful and thoughtful assistant.""" - user_message = ["""Does the following text contain any information or advice that might be useful later? + user_message = [ + """Does the following text contain any information or advice that might be useful later? - If so, please copy the information or advice, adding no extra commentary or formatting. -- If there is no potentially useful information or advice at all, simply write "None" with no punctuation."""] +- If there is no potentially useful information or advice at all, simply write "None" with no punctuation.""" + ] user_message.append("\n# Text to analyze") user_message.append(text) self.clear_history() - response = await self.call_model(summary="Ask the model to extract advice", - system_message_content=sys_message, user_content=user_message) + response = await self.call_model( + summary="Ask the model to extract advice", system_message_content=sys_message, user_content=user_message + ) return response if response != "None" else None diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py index ded2bce1f1d5..e5e4c929c933 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py @@ -1,8 +1,9 @@ import os import pickle +from typing import Optional, Union + import chromadb from chromadb.config import Settings -from typing import Optional, Union class StringSimilarityMap: @@ -80,7 +81,9 @@ def save_string_pairs_to_text_files(self): for uid, text in self.uid_text_dict.items(): input_text, output_text = text if self.verbosity >= 1: - self.logger.info(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + self.logger.info( + " ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text) + ) # Save the input string to a file with the same name as the string-pair ID in the mem_text dir, which is a subdir of the dir containing this file. with open("mem_text/{}.txt".format(uid), "w") as file: file.write(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) @@ -108,8 +111,11 @@ def add_input_output_pair(self, input_text: str, output_text: str): self.vec_db.add(documents=[input_text], ids=[str(self.last_string_pair_id)]) self.uid_text_dict[str(self.last_string_pair_id)] = input_text, output_text if self.verbosity >= 1: - self.logger.info("\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( - self.last_string_pair_id, input_text, output_text)) + self.logger.info( + "\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( + self.last_string_pair_id, input_text, output_text + ) + ) if self.verbosity >= 3: self.list_string_pairs() @@ -129,7 +135,10 @@ def get_related_string_pairs(self, query_text: str, n_results: int, threshold: U input_text_2, output_text = self.uid_text_dict[uid] assert input_text == input_text_2 if self.verbosity >= 1: - self.logger.info("\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance)) + self.logger.info( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance + ) + ) string_pairs.append((input_text, output_text, distance)) return string_pairs diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py index 7073f0d9c079..974cad1a5fc5 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py @@ -1,4 +1,5 @@ from typing import Any, Dict, List, Union + from autogen_core import FunctionCall, Image from autogen_core.models import FunctionExecutionResult, LLMMessage diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index d4c86bd5cc18..0531fdf55555 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -1,5 +1,5 @@ -from ._agentic_memory_controller import AgenticMemoryController from ._agent_wrapper import AgentWrapper +from ._agentic_memory_controller import AgenticMemoryController class Apprentice: @@ -19,7 +19,7 @@ def __init__(self, settings, evaluator, client, logger): agent=self.agent, reset=False, client=self.client, - logger=self.logger + logger=self.logger, ) def reset_memory(self): diff --git a/python/uv.lock b/python/uv.lock index e8a07804e93d..1db3e099521a 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -18,6 +18,7 @@ resolution-markers = [ [manifest] members = [ "agbench", + "ame", "autogen-agentchat", "autogen-core", "autogen-ext", @@ -89,7 +90,6 @@ wheels = [ [[package]] name = "agbench" -version = "0.0.1a1" source = { editable = "packages/agbench" } dependencies = [ { name = "azure-identity" }, @@ -262,6 +262,17 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/cb/06/8b505aea3d77021b18dcbd8133aa1418f1a1e37e432a465b14c46b2c0eaa/alembic-1.14.0-py3-none-any.whl", hash = "sha256:99bd884ca390466db5e27ffccff1d179ec5c05c965cfefc0607e69f9e411cb25", size = 233482 }, ] +[[package]] +name = "ame" +version = "0.1.1" +source = { editable = "packages/ame" } +dependencies = [ + { name = "autogen-core" }, +] + +[package.metadata] +requires-dist = [{ name = "autogen-core", editable = "packages/autogen-core" }] + [[package]] name = "annotated-types" version = "0.7.0" @@ -338,6 +349,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80", size = 66419 }, ] +[[package]] +name = "asgiref" +version = "3.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/38/b3395cc9ad1b56d2ddac9970bc8f4141312dbaec28bc7c218b0dfafd0f42/asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590", size = 35186 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/e3/893e8757be2612e6c266d9bb58ad2e3651524b5b40cf56761e985a28b13e/asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47", size = 23828 }, +] + [[package]] name = "asttokens" version = "2.4.1" @@ -560,6 +583,9 @@ dependencies = [ ] [package.optional-dependencies] +apprentice = [ + { name = "chromadb" }, +] azure = [ { name = "azure-core" }, { name = "azure-identity" }, @@ -664,6 +690,7 @@ requires-dist = [ { name = "autogen-core", editable = "packages/autogen-core" }, { name = "azure-core", marker = "extra == 'azure'" }, { name = "azure-identity", marker = "extra == 'azure'" }, + { name = "chromadb", marker = "extra == 'apprentice'" }, { name = "diskcache", marker = "extra == 'diskcache'", specifier = ">=5.6.3" }, { name = "docker", marker = "extra == 'docker'", specifier = "~=7.0" }, { name = "ffmpeg-python", marker = "extra == 'video-surfer'" }, @@ -792,7 +819,6 @@ requires-dist = [ [[package]] name = "autogenstudio" -version = "0.4.0" source = { editable = "packages/autogen-studio" } dependencies = [ { name = "aiofiles" }, @@ -965,6 +991,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, ] +[[package]] +name = "backoff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148 }, +] + +[[package]] +name = "bcrypt" +version = "4.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/56/8c/dd696962612e4cd83c40a9e6b3db77bfe65a830f4b9af44098708584686c/bcrypt-4.2.1.tar.gz", hash = "sha256:6765386e3ab87f569b276988742039baab087b2cdb01e809d74e74503c2faafe", size = 24427 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/ca/e17b08c523adb93d5f07a226b2bd45a7c6e96b359e31c1e99f9db58cb8c3/bcrypt-4.2.1-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:1340411a0894b7d3ef562fb233e4b6ed58add185228650942bdc885362f32c17", size = 489982 }, + { url = "https://files.pythonhosted.org/packages/6a/be/e7c6e0fd6087ee8fc6d77d8d9e817e9339d879737509019b9a9012a1d96f/bcrypt-4.2.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ee315739bc8387aa36ff127afc99120ee452924e0df517a8f3e4c0187a0f5f", size = 273108 }, + { url = "https://files.pythonhosted.org/packages/d6/53/ac084b7d985aee1a5f2b086d501f550862596dbf73220663b8c17427e7f2/bcrypt-4.2.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dbd0747208912b1e4ce730c6725cb56c07ac734b3629b60d4398f082ea718ad", size = 278733 }, + { url = "https://files.pythonhosted.org/packages/8e/ab/b8710a3d6231c587e575ead0b1c45bb99f5454f9f579c9d7312c17b069cc/bcrypt-4.2.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:aaa2e285be097050dba798d537b6efd9b698aa88eef52ec98d23dcd6d7cf6fea", size = 273856 }, + { url = "https://files.pythonhosted.org/packages/9d/e5/2fd1ea6395358ffdfd4afe370d5b52f71408f618f781772a48971ef3b92b/bcrypt-4.2.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:76d3e352b32f4eeb34703370e370997065d28a561e4a18afe4fef07249cb4396", size = 279067 }, + { url = "https://files.pythonhosted.org/packages/4e/ef/f2cb7a0f7e1ed800a604f8ab256fb0afcf03c1540ad94ff771ce31e794aa/bcrypt-4.2.1-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:b7703ede632dc945ed1172d6f24e9f30f27b1b1a067f32f68bf169c5f08d0425", size = 306851 }, + { url = "https://files.pythonhosted.org/packages/de/cb/578b0023c6a5ca16a177b9044ba6bd6032277bd3ef020fb863eccd22e49b/bcrypt-4.2.1-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:89df2aea2c43be1e1fa066df5f86c8ce822ab70a30e4c210968669565c0f4685", size = 310793 }, + { url = "https://files.pythonhosted.org/packages/98/bc/9d501ee9d754f63d4b1086b64756c284facc3696de9b556c146279a124a5/bcrypt-4.2.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:04e56e3fe8308a88b77e0afd20bec516f74aecf391cdd6e374f15cbed32783d6", size = 320957 }, + { url = "https://files.pythonhosted.org/packages/a1/25/2ec4ce5740abc43182bfc064b9acbbf5a493991246985e8b2bfe231ead64/bcrypt-4.2.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cfdf3d7530c790432046c40cda41dfee8c83e29482e6a604f8930b9930e94139", size = 339958 }, + { url = "https://files.pythonhosted.org/packages/6d/64/fd67788f64817727897d31e9cdeeeba3941eaad8540733c05c7eac4aa998/bcrypt-4.2.1-cp37-abi3-win32.whl", hash = "sha256:adadd36274510a01f33e6dc08f5824b97c9580583bd4487c564fc4617b328005", size = 160912 }, + { url = "https://files.pythonhosted.org/packages/00/8f/fe834eaa54abbd7cab8607e5020fa3a0557e929555b9e4ca404b4adaab06/bcrypt-4.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:8c458cd103e6c5d1d85cf600e546a639f234964d0228909d8f8dbeebff82d526", size = 152981 }, + { url = "https://files.pythonhosted.org/packages/4a/57/23b46933206daf5384b5397d9878746d2249fe9d45efaa8e1467c87d3048/bcrypt-4.2.1-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:8ad2f4528cbf0febe80e5a3a57d7a74e6635e41af1ea5675282a33d769fba413", size = 489842 }, + { url = "https://files.pythonhosted.org/packages/fd/28/3ea8a39ddd4938b6c6b6136816d72ba5e659e2d82b53d843c8c53455ac4d/bcrypt-4.2.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909faa1027900f2252a9ca5dfebd25fc0ef1417943824783d1c8418dd7d6df4a", size = 272500 }, + { url = "https://files.pythonhosted.org/packages/77/7f/b43622999f5d4de06237a195ac5501ac83516adf571b907228cd14bac8fe/bcrypt-4.2.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cde78d385d5e93ece5479a0a87f73cd6fa26b171c786a884f955e165032b262c", size = 278368 }, + { url = "https://files.pythonhosted.org/packages/50/68/f2e3959014b4d8874c747e6e171d46d3e63a3a39aaca8417a8d837eda0a8/bcrypt-4.2.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:533e7f3bcf2f07caee7ad98124fab7499cb3333ba2274f7a36cf1daee7409d99", size = 273335 }, + { url = "https://files.pythonhosted.org/packages/d6/c3/4b4bad4da852924427c651589d464ad1aa624f94dd904ddda8493b0a35e5/bcrypt-4.2.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:687cf30e6681eeda39548a93ce9bfbb300e48b4d445a43db4298d2474d2a1e54", size = 278614 }, + { url = "https://files.pythonhosted.org/packages/6e/5a/ee107961e84c41af2ac201d0460f962b6622ff391255ffd46429e9e09dc1/bcrypt-4.2.1-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:041fa0155c9004eb98a232d54da05c0b41d4b8e66b6fc3cb71b4b3f6144ba837", size = 306464 }, + { url = "https://files.pythonhosted.org/packages/5c/72/916e14fa12d2b1d1fc6c26ea195337419da6dd23d0bf53ac61ef3739e5c5/bcrypt-4.2.1-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f85b1ffa09240c89aa2e1ae9f3b1c687104f7b2b9d2098da4e923f1b7082d331", size = 310674 }, + { url = "https://files.pythonhosted.org/packages/97/92/3dc76d8bfa23300591eec248e950f85bd78eb608c96bd4747ce4cc06acdb/bcrypt-4.2.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c6f5fa3775966cca251848d4d5393ab016b3afed251163c1436fefdec3b02c84", size = 320577 }, + { url = "https://files.pythonhosted.org/packages/5d/ab/a6c0da5c2cf86600f74402a72b06dfe365e1a1d30783b1bbeec460fd57d1/bcrypt-4.2.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:807261df60a8b1ccd13e6599c779014a362ae4e795f5c59747f60208daddd96d", size = 339836 }, + { url = "https://files.pythonhosted.org/packages/b4/b4/e75b6e9a72a030a04362034022ebe317c5b735d04db6ad79237101ae4a5c/bcrypt-4.2.1-cp39-abi3-win32.whl", hash = "sha256:b588af02b89d9fad33e5f98f7838bf590d6d692df7153647724a7f20c186f6bf", size = 160911 }, + { url = "https://files.pythonhosted.org/packages/76/b9/d51d34e6cd6d887adddb28a8680a1d34235cc45b9d6e238ce39b98199ca0/bcrypt-4.2.1-cp39-abi3-win_amd64.whl", hash = "sha256:e84e0e6f8e40a242b11bce56c313edc2be121cec3e0ec2d76fce01f6af33c07c", size = 153078 }, + { url = "https://files.pythonhosted.org/packages/4e/6e/7193067042de23af3d71882f898c8c0bd2b18e6ee44a4f76e395dfadb5a8/bcrypt-4.2.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76132c176a6d9953cdc83c296aeaed65e1a708485fd55abf163e0d9f8f16ce0e", size = 270069 }, + { url = "https://files.pythonhosted.org/packages/3b/05/2546085c6dc07a45627460a39e6291b82382b434fff2bd0167ff3bc31eb1/bcrypt-4.2.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e158009a54c4c8bc91d5e0da80920d048f918c61a581f0a63e4e93bb556d362f", size = 274652 }, +] + [[package]] name = "beartype" version = "0.18.5" @@ -1044,6 +1111,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0c/fe/c066e8cb069027c12dbcf9066a7a4f3e9d2a31b10c7b174a8455ef1d0f46/botocore-1.36.2-py3-none-any.whl", hash = "sha256:bc3b7e3b573a48af2bd7116b80fe24f9a335b0b67314dcb2697a327d009abf29", size = 13302324 }, ] +[[package]] +name = "build" +version = "1.2.2.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "(os_name == 'nt' and platform_machine != 'aarch64' and sys_platform == 'linux') or (os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" }, + { name = "packaging" }, + { name = "pyproject-hooks" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/46/aeab111f8e06793e4f0e421fcad593d547fb8313b50990f31681ee2fb1ad/build-1.2.2.post1.tar.gz", hash = "sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7", size = 46701 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/c2/80633736cd183ee4a62107413def345f7e6e3c01563dbca1417363cf957e/build-1.2.2.post1-py3-none-any.whl", hash = "sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5", size = 22950 }, +] + [[package]] name = "cachetools" version = "5.5.0" @@ -1213,6 +1296,70 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/93/342cc62a70ab727e093ed98e02a725d85b746345f05d2b5e5034649f4ec8/chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443", size = 11595 }, ] +[[package]] +name = "chroma-hnswlib" +version = "0.7.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/09/10d57569e399ce9cbc5eee2134996581c957f63a9addfa6ca657daf006b8/chroma_hnswlib-0.7.6.tar.gz", hash = "sha256:4dce282543039681160259d29fcde6151cc9106c6461e0485f57cdccd83059b7", size = 32256 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/74/b9dde05ea8685d2f8c4681b517e61c7887e974f6272bb24ebc8f2105875b/chroma_hnswlib-0.7.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f35192fbbeadc8c0633f0a69c3d3e9f1a4eab3a46b65458bbcbcabdd9e895c36", size = 195821 }, + { url = "https://files.pythonhosted.org/packages/fd/58/101bfa6bc41bc6cc55fbb5103c75462a7bf882e1704256eb4934df85b6a8/chroma_hnswlib-0.7.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f007b608c96362b8f0c8b6b2ac94f67f83fcbabd857c378ae82007ec92f4d82", size = 183854 }, + { url = "https://files.pythonhosted.org/packages/17/ff/95d49bb5ce134f10d6aa08d5f3bec624eaff945f0b17d8c3fce888b9a54a/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:456fd88fa0d14e6b385358515aef69fc89b3c2191706fd9aee62087b62aad09c", size = 2358774 }, + { url = "https://files.pythonhosted.org/packages/3a/6d/27826180a54df80dbba8a4f338b022ba21c0c8af96fd08ff8510626dee8f/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dfaae825499c2beaa3b75a12d7ec713b64226df72a5c4097203e3ed532680da", size = 2392739 }, + { url = "https://files.pythonhosted.org/packages/d6/63/ee3e8b7a8f931918755faacf783093b61f32f59042769d9db615999c3de0/chroma_hnswlib-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:2487201982241fb1581be26524145092c95902cb09fc2646ccfbc407de3328ec", size = 150955 }, + { url = "https://files.pythonhosted.org/packages/f5/af/d15fdfed2a204c0f9467ad35084fbac894c755820b203e62f5dcba2d41f1/chroma_hnswlib-0.7.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81181d54a2b1e4727369486a631f977ffc53c5533d26e3d366dda243fb0998ca", size = 196911 }, + { url = "https://files.pythonhosted.org/packages/0d/19/aa6f2139f1ff7ad23a690ebf2a511b2594ab359915d7979f76f3213e46c4/chroma_hnswlib-0.7.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b4ab4e11f1083dd0a11ee4f0e0b183ca9f0f2ed63ededba1935b13ce2b3606f", size = 185000 }, + { url = "https://files.pythonhosted.org/packages/79/b1/1b269c750e985ec7d40b9bbe7d66d0a890e420525187786718e7f6b07913/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53db45cd9173d95b4b0bdccb4dbff4c54a42b51420599c32267f3abbeb795170", size = 2377289 }, + { url = "https://files.pythonhosted.org/packages/c7/2d/d5663e134436e5933bc63516a20b5edc08b4c1b1588b9680908a5f1afd04/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c093f07a010b499c00a15bc9376036ee4800d335360570b14f7fe92badcdcf9", size = 2411755 }, + { url = "https://files.pythonhosted.org/packages/3e/79/1bce519cf186112d6d5ce2985392a89528c6e1e9332d680bf752694a4cdf/chroma_hnswlib-0.7.6-cp311-cp311-win_amd64.whl", hash = "sha256:0540b0ac96e47d0aa39e88ea4714358ae05d64bbe6bf33c52f316c664190a6a3", size = 151888 }, + { url = "https://files.pythonhosted.org/packages/93/ac/782b8d72de1c57b64fdf5cb94711540db99a92768d93d973174c62d45eb8/chroma_hnswlib-0.7.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e87e9b616c281bfbe748d01705817c71211613c3b063021f7ed5e47173556cb7", size = 197804 }, + { url = "https://files.pythonhosted.org/packages/32/4e/fd9ce0764228e9a98f6ff46af05e92804090b5557035968c5b4198bc7af9/chroma_hnswlib-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec5ca25bc7b66d2ecbf14502b5729cde25f70945d22f2aaf523c2d747ea68912", size = 185421 }, + { url = "https://files.pythonhosted.org/packages/d9/3d/b59a8dedebd82545d873235ef2d06f95be244dfece7ee4a1a6044f080b18/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305ae491de9d5f3c51e8bd52d84fdf2545a4a2bc7af49765cda286b7bb30b1d4", size = 2389672 }, + { url = "https://files.pythonhosted.org/packages/74/1e/80a033ea4466338824974a34f418e7b034a7748bf906f56466f5caa434b0/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:822ede968d25a2c88823ca078a58f92c9b5c4142e38c7c8b4c48178894a0a3c5", size = 2436986 }, +] + +[[package]] +name = "chromadb" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bcrypt" }, + { name = "build" }, + { name = "chroma-hnswlib" }, + { name = "fastapi" }, + { name = "grpcio" }, + { name = "httpx" }, + { name = "importlib-resources" }, + { name = "kubernetes" }, + { name = "mmh3" }, + { name = "numpy" }, + { name = "onnxruntime" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-instrumentation-fastapi" }, + { name = "opentelemetry-sdk" }, + { name = "orjson" }, + { name = "overrides" }, + { name = "posthog" }, + { name = "pydantic" }, + { name = "pypika" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "tenacity" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, + { name = "uvicorn", extra = ["standard"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/cd/f0f2de3f466ff514fb6b58271c14f6d22198402bb5b71b8d890231265946/chromadb-0.6.3.tar.gz", hash = "sha256:c8f34c0b704b9108b04491480a36d42e894a960429f87c6516027b5481d59ed3", size = 29297929 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/8e/5c186c77bf749b6fe0528385e507e463f1667543328d76fd00a49e1a4e6a/chromadb-0.6.3-py3-none-any.whl", hash = "sha256:4851258489a3612b558488d98d09ae0fe0a28d5cad6bd1ba64b96fdc419dc0e5", size = 611129 }, +] + [[package]] name = "chromedriver-autoinstaller" version = "0.6.4" @@ -1648,6 +1795,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] +[[package]] +name = "durationpy" +version = "0.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/31/e9/f49c4e7fccb77fa5c43c2480e09a857a78b41e7331a75e128ed5df45c56b/durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a", size = 3186 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 }, +] + [[package]] name = "environs" version = "11.2.1" @@ -2462,6 +2618,35 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a8/6c/d2fbdaaa5959339d53ba38e94c123e4e84b8fbc4b84beb0e70d7c1608486/httplib2-0.22.0-py3-none-any.whl", hash = "sha256:14ae0a53c1ba8f3d37e9e27cf37eabb0fb9980f435ba405d546948b009dd64dc", size = 96854 }, ] +[[package]] +name = "httptools" +version = "0.6.4" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/a7/9a/ce5e1f7e131522e6d3426e8e7a490b3a01f39a6696602e1c4f33f9e94277/httptools-0.6.4.tar.gz", hash = "sha256:4e93eee4add6493b59a5c514da98c939b244fce4a0d8879cd3f466562f4b7d5c", size = 240639 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/6f/972f8eb0ea7d98a1c6be436e2142d51ad2a64ee18e02b0e7ff1f62171ab1/httptools-0.6.4-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3c73ce323711a6ffb0d247dcd5a550b8babf0f757e86a52558fe5b86d6fefcc0", size = 198780 }, + { url = "https://files.pythonhosted.org/packages/6a/b0/17c672b4bc5c7ba7f201eada4e96c71d0a59fbc185e60e42580093a86f21/httptools-0.6.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:345c288418f0944a6fe67be8e6afa9262b18c7626c3ef3c28adc5eabc06a68da", size = 103297 }, + { url = "https://files.pythonhosted.org/packages/92/5e/b4a826fe91971a0b68e8c2bd4e7db3e7519882f5a8ccdb1194be2b3ab98f/httptools-0.6.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:deee0e3343f98ee8047e9f4c5bc7cedbf69f5734454a94c38ee829fb2d5fa3c1", size = 443130 }, + { url = "https://files.pythonhosted.org/packages/b0/51/ce61e531e40289a681a463e1258fa1e05e0be54540e40d91d065a264cd8f/httptools-0.6.4-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:ca80b7485c76f768a3bc83ea58373f8db7b015551117375e4918e2aa77ea9b50", size = 442148 }, + { url = "https://files.pythonhosted.org/packages/ea/9e/270b7d767849b0c96f275c695d27ca76c30671f8eb8cc1bab6ced5c5e1d0/httptools-0.6.4-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:90d96a385fa941283ebd231464045187a31ad932ebfa541be8edf5b3c2328959", size = 415949 }, + { url = "https://files.pythonhosted.org/packages/81/86/ced96e3179c48c6f656354e106934e65c8963d48b69be78f355797f0e1b3/httptools-0.6.4-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:59e724f8b332319e2875efd360e61ac07f33b492889284a3e05e6d13746876f4", size = 417591 }, + { url = "https://files.pythonhosted.org/packages/75/73/187a3f620ed3175364ddb56847d7a608a6fc42d551e133197098c0143eca/httptools-0.6.4-cp310-cp310-win_amd64.whl", hash = "sha256:c26f313951f6e26147833fc923f78f95604bbec812a43e5ee37f26dc9e5a686c", size = 88344 }, + { url = "https://files.pythonhosted.org/packages/7b/26/bb526d4d14c2774fe07113ca1db7255737ffbb119315839af2065abfdac3/httptools-0.6.4-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:f47f8ed67cc0ff862b84a1189831d1d33c963fb3ce1ee0c65d3b0cbe7b711069", size = 199029 }, + { url = "https://files.pythonhosted.org/packages/a6/17/3e0d3e9b901c732987a45f4f94d4e2c62b89a041d93db89eafb262afd8d5/httptools-0.6.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:0614154d5454c21b6410fdf5262b4a3ddb0f53f1e1721cfd59d55f32138c578a", size = 103492 }, + { url = "https://files.pythonhosted.org/packages/b7/24/0fe235d7b69c42423c7698d086d4db96475f9b50b6ad26a718ef27a0bce6/httptools-0.6.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f8787367fbdfccae38e35abf7641dafc5310310a5987b689f4c32cc8cc3ee975", size = 462891 }, + { url = "https://files.pythonhosted.org/packages/b1/2f/205d1f2a190b72da6ffb5f41a3736c26d6fa7871101212b15e9b5cd8f61d/httptools-0.6.4-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:40b0f7fe4fd38e6a507bdb751db0379df1e99120c65fbdc8ee6c1d044897a636", size = 459788 }, + { url = "https://files.pythonhosted.org/packages/6e/4c/d09ce0eff09057a206a74575ae8f1e1e2f0364d20e2442224f9e6612c8b9/httptools-0.6.4-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:40a5ec98d3f49904b9fe36827dcf1aadfef3b89e2bd05b0e35e94f97c2b14721", size = 433214 }, + { url = "https://files.pythonhosted.org/packages/3e/d2/84c9e23edbccc4a4c6f96a1b8d99dfd2350289e94f00e9ccc7aadde26fb5/httptools-0.6.4-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:dacdd3d10ea1b4ca9df97a0a303cbacafc04b5cd375fa98732678151643d4988", size = 434120 }, + { url = "https://files.pythonhosted.org/packages/d0/46/4d8e7ba9581416de1c425b8264e2cadd201eb709ec1584c381f3e98f51c1/httptools-0.6.4-cp311-cp311-win_amd64.whl", hash = "sha256:288cd628406cc53f9a541cfaf06041b4c71d751856bab45e3702191f931ccd17", size = 88565 }, + { url = "https://files.pythonhosted.org/packages/bb/0e/d0b71465c66b9185f90a091ab36389a7352985fe857e352801c39d6127c8/httptools-0.6.4-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:df017d6c780287d5c80601dafa31f17bddb170232d85c066604d8558683711a2", size = 200683 }, + { url = "https://files.pythonhosted.org/packages/e2/b8/412a9bb28d0a8988de3296e01efa0bd62068b33856cdda47fe1b5e890954/httptools-0.6.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:85071a1e8c2d051b507161f6c3e26155b5c790e4e28d7f236422dbacc2a9cc44", size = 104337 }, + { url = "https://files.pythonhosted.org/packages/9b/01/6fb20be3196ffdc8eeec4e653bc2a275eca7f36634c86302242c4fbb2760/httptools-0.6.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:69422b7f458c5af875922cdb5bd586cc1f1033295aa9ff63ee196a87519ac8e1", size = 508796 }, + { url = "https://files.pythonhosted.org/packages/f7/d8/b644c44acc1368938317d76ac991c9bba1166311880bcc0ac297cb9d6bd7/httptools-0.6.4-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:16e603a3bff50db08cd578d54f07032ca1631450ceb972c2f834c2b860c28ea2", size = 510837 }, + { url = "https://files.pythonhosted.org/packages/52/d8/254d16a31d543073a0e57f1c329ca7378d8924e7e292eda72d0064987486/httptools-0.6.4-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:ec4f178901fa1834d4a060320d2f3abc5c9e39766953d038f1458cb885f47e81", size = 485289 }, + { url = "https://files.pythonhosted.org/packages/5f/3c/4aee161b4b7a971660b8be71a92c24d6c64372c1ab3ae7f366b3680df20f/httptools-0.6.4-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:f9eb89ecf8b290f2e293325c646a211ff1c2493222798bb80a530c5e7502494f", size = 489779 }, + { url = "https://files.pythonhosted.org/packages/12/b7/5cae71a8868e555f3f67a50ee7f673ce36eac970f029c0c5e9d584352961/httptools-0.6.4-cp312-cp312-win_amd64.whl", hash = "sha256:db78cb9ca56b59b016e64b6031eda5653be0589dba2b1b43453f6e8b405a0970", size = 88634 }, +] + [[package]] name = "httpx" version = "0.28.1" @@ -2561,6 +2746,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c0/14/362d31bf1076b21e1bcdcb0dc61944822ff263937b804a79231df2774d28/importlib_metadata-8.4.0-py3-none-any.whl", hash = "sha256:66f342cc6ac9818fc6ff340576acd24d65ba0b3efabb2b4ac08b598965a4a2f1", size = 26269 }, ] +[[package]] +name = "importlib-resources" +version = "6.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/8c/f834fbf984f691b4f7ff60f50b514cc3de5cc08abfc3295564dd89c5e2e7/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c", size = 44693 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461 }, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -2924,6 +3118,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/1d/50ad811d1c5dae091e4cf046beba925bcae0a610e79ae4c538f996f63ed5/kiwisolver-1.4.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:65ea09a5a3faadd59c2ce96dc7bf0f364986a315949dc6374f04396b0d60e09b", size = 71762 }, ] +[[package]] +name = "kubernetes" +version = "31.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "durationpy" }, + { name = "google-auth" }, + { name = "oauthlib" }, + { name = "python-dateutil" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "requests-oauthlib" }, + { name = "six" }, + { name = "urllib3" }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7e/bd/ffcd3104155b467347cd9b3a64eb24182e459579845196b3a200569c8912/kubernetes-31.0.0.tar.gz", hash = "sha256:28945de906c8c259c1ebe62703b56a03b714049372196f854105afe4e6d014c0", size = 916096 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fb/a8/17f5e28cecdbd6d48127c22abdb794740803491f422a11905c4569d8e139/kubernetes-31.0.0-py2.py3-none-any.whl", hash = "sha256:bf141e2d380c8520eada8b351f4e319ffee9636328c137aa432bc486ca1200e1", size = 1857013 }, +] + [[package]] name = "lancedb" version = "0.17.0" @@ -3806,6 +4022,71 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/1a/b4/a76b6942b78383d5499f776d880a166296542383f6f952feeef96d0ea692/mistralai-1.3.1-py3-none-any.whl", hash = "sha256:35e74feadf835b7d2145095114b9cf3ba86c4cf1044f28f49b02cd6ddd0a5733", size = 261271 }, ] +[[package]] +name = "mmh3" +version = "5.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e2/08/04ad6419f072ea3f51f9a0f429dd30f5f0a0b02ead7ca11a831117b6f9e8/mmh3-5.0.1.tar.gz", hash = "sha256:7dab080061aeb31a6069a181f27c473a1f67933854e36a3464931f2716508896", size = 32008 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/fa/b9/9a91b0a0e330557cdbf51fc43ca0ba306633f2ec6d2b15e871e288592a32/mmh3-5.0.1-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:f0a4b4bf05778ed77d820d6e7d0e9bd6beb0c01af10e1ce9233f5d2f814fcafa", size = 52867 }, + { url = "https://files.pythonhosted.org/packages/da/28/6b37f0d6707872764e1af49f327b0940b6a3ad995d91b3839b90ba35f559/mmh3-5.0.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:ac7a391039aeab95810c2d020b69a94eb6b4b37d4e2374831e92db3a0cdf71c6", size = 38352 }, + { url = "https://files.pythonhosted.org/packages/76/84/a98f59a620b522f218876a0630b02fc345ecf078f6393595756ddb3aa0b5/mmh3-5.0.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3a2583b5521ca49756d8d8bceba80627a9cc295f255dcab4e3df7ccc2f09679a", size = 38214 }, + { url = "https://files.pythonhosted.org/packages/35/cb/4980c7eb6cd31f49d1913a4066562bc9e0af28526750f1232be9688a9cd4/mmh3-5.0.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:081a8423fe53c1ac94f87165f3e4c500125d343410c1a0c5f1703e898a3ef038", size = 93502 }, + { url = "https://files.pythonhosted.org/packages/65/f3/29726296fadeaf06134a6978f7c453dfa562cf2f0f1faf9ae28b9b8ef76e/mmh3-5.0.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:b8b4d72713799755dc8954a7d36d5c20a6c8de7b233c82404d122c7c7c1707cc", size = 98394 }, + { url = "https://files.pythonhosted.org/packages/35/fd/e181f4f4b250f7b63ee27a7d65e5e290a3ea0e26cc633f4bfd906f04558b/mmh3-5.0.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:389a6fd51efc76d3182d36ec306448559c1244f11227d2bb771bdd0e6cc91321", size = 98052 }, + { url = "https://files.pythonhosted.org/packages/61/5c/8a5d838da3eb3fb91035ef5eaaea469abab4e8e3fae55607c27a1a07d162/mmh3-5.0.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:39f4128edaa074bff721b1d31a72508cba4d2887ee7867f22082e1fe9d4edea0", size = 86320 }, + { url = "https://files.pythonhosted.org/packages/10/80/3f33a8f4de12cea322607da1a84d001513affb741b3c3cc1277ecb85d34b/mmh3-5.0.1-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1d5d23a94d91aabba3386b3769048d5f4210fdfef80393fece2f34ba5a7b466c", size = 93232 }, + { url = "https://files.pythonhosted.org/packages/9e/1c/d0ce5f498493be4de2e7e7596e1cbf63315a4c0bb8bb94e3c37c4fad965d/mmh3-5.0.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:16347d038361f8b8f24fd2b7ef378c9b68ddee9f7706e46269b6e0d322814713", size = 93590 }, + { url = "https://files.pythonhosted.org/packages/d9/66/770b5ad35b5a2eb7965f3fcaeaa76148e59543575d2e27b80690c1b0795c/mmh3-5.0.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6e299408565af7d61f2d20a5ffdd77cf2ed902460fe4e6726839d59ba4b72316", size = 88433 }, + { url = "https://files.pythonhosted.org/packages/14/58/e0d258b18749d8640233976493716a40aa27352dcb1cea941836357dac24/mmh3-5.0.1-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:42050af21ddfc5445ee5a66e73a8fc758c71790305e3ee9e4a85a8e69e810f94", size = 99339 }, + { url = "https://files.pythonhosted.org/packages/38/26/7267146122deb584cf377975b994d80c6d72c4c8d0e8eedff4d0cc5cd4c8/mmh3-5.0.1-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:2ae9b1f5ef27ec54659920f0404b7ceb39966e28867c461bfe83a05e8d18ddb0", size = 93944 }, + { url = "https://files.pythonhosted.org/packages/8d/6b/df60b14a2dd383d8848f6f35496c86c7003be3ffb236789e98d002c542c6/mmh3-5.0.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:50c2495a02045f3047d71d4ae9cdd7a15efc0bcbb7ff17a18346834a8e2d1d19", size = 92798 }, + { url = "https://files.pythonhosted.org/packages/0a/3f/d5fecf13915163a15b449e5cc89232a4df90e836ecad1c38121318119d27/mmh3-5.0.1-cp310-cp310-win32.whl", hash = "sha256:c028fa77cddf351ca13b4a56d43c1775652cde0764cadb39120b68f02a23ecf6", size = 39185 }, + { url = "https://files.pythonhosted.org/packages/74/8e/4bb5ade332a87de633cda21dae09d6002d69601f2b93e9f40302ab2d9acf/mmh3-5.0.1-cp310-cp310-win_amd64.whl", hash = "sha256:c5e741e421ec14400c4aae30890515c201f518403bdef29ae1e00d375bb4bbb5", size = 39766 }, + { url = "https://files.pythonhosted.org/packages/16/2b/cd5cfa4d7ad40a37655af491f9270909d63fc27bcf0558ec36000ee5347f/mmh3-5.0.1-cp310-cp310-win_arm64.whl", hash = "sha256:b17156d56fabc73dbf41bca677ceb6faed435cc8544f6566d72ea77d8a17e9d0", size = 36540 }, + { url = "https://files.pythonhosted.org/packages/fb/8a/f3b9cf8b7110fef0f130158d7602af6f5b09f2cf568130814b7c92e2507b/mmh3-5.0.1-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9a6d5a9b1b923f1643559ba1fc0bf7a5076c90cbb558878d3bf3641ce458f25d", size = 52867 }, + { url = "https://files.pythonhosted.org/packages/bf/06/f466e0da3c5bd6fbb1e047f70fd4e9e9563d0268aa56de511f363478dbf2/mmh3-5.0.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:3349b968be555f7334bbcce839da98f50e1e80b1c615d8e2aa847ea4a964a012", size = 38349 }, + { url = "https://files.pythonhosted.org/packages/13/f0/2d3daca276a4673f82af859e4b0b18befd4e6e54f1017ba48ea9735b2f1b/mmh3-5.0.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1bd3c94b110e55db02ab9b605029f48a2f7f677c6e58c09d44e42402d438b7e1", size = 38211 }, + { url = "https://files.pythonhosted.org/packages/e3/56/a2d203ca97702d4e045ac1a46a608393da1a1dddb24f81de664dae940518/mmh3-5.0.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d47ba84d48608f79adbb10bb09986b6dc33eeda5c2d1bd75d00820081b73bde9", size = 95104 }, + { url = "https://files.pythonhosted.org/packages/ec/45/c7c8ae64e3ae024776a0ce5377c16c6741a3359f3e9505fc35fc5012beb2/mmh3-5.0.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c0217987a8b8525c8d9170f66d036dec4ab45cfbd53d47e8d76125791ceb155e", size = 100049 }, + { url = "https://files.pythonhosted.org/packages/d5/74/681113776fe406c09870ab2152ffbd214a15bbc8f1d1da9ad73ce594b878/mmh3-5.0.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b2797063a34e78d1b61639a98b0edec1c856fa86ab80c7ec859f1796d10ba429", size = 99671 }, + { url = "https://files.pythonhosted.org/packages/bf/4f/dbb8be18ce9b6ff8df14bc14348c0404b3091fb51df9c673ebfcf5877db3/mmh3-5.0.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:8bba16340adcbd47853a2fbe5afdb397549e8f2e79324ff1dced69a3f8afe7c3", size = 87549 }, + { url = "https://files.pythonhosted.org/packages/5f/82/274d646f3f604c35b7e3d4eb7f3ff08b3bdc6a2c87d797709bb6f084a611/mmh3-5.0.1-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:282797957c9f60b51b9d768a602c25f579420cc9af46feb77d457a27823d270a", size = 94780 }, + { url = "https://files.pythonhosted.org/packages/c9/a1/f094ca8b8fb5e2ac53201070bda42b0fee80ceb92c153eb99a1453e3aed3/mmh3-5.0.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:e4fb670c29e63f954f9e7a2cdcd57b36a854c2538f579ef62681ccbaa1de2b69", size = 90430 }, + { url = "https://files.pythonhosted.org/packages/d9/23/4732ba68c6ef7242b69bb53b9e1bcb2ef065d68ed85fd26e829fb911ab5a/mmh3-5.0.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:8ee7d85438dc6aff328e19ab052086a3c29e8a9b632998a49e5c4b0034e9e8d6", size = 89451 }, + { url = "https://files.pythonhosted.org/packages/3c/c5/daea5d534fcf20b2399c2a7b1cd00a8d29d4d474247c15c2c94548a1a272/mmh3-5.0.1-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:b7fb5db231f3092444bc13901e6a8d299667126b00636ffbad4a7b45e1051e2f", size = 94703 }, + { url = "https://files.pythonhosted.org/packages/5e/4a/34d5691e7be7c63c34181387bc69bdcc0005ca93c8b562d68cb5775e0e78/mmh3-5.0.1-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:c100dd441703da5ec136b1d9003ed4a041d8a1136234c9acd887499796df6ad8", size = 91054 }, + { url = "https://files.pythonhosted.org/packages/5c/3a/ab31bb5e9e1a19a4a997593cbe6ce56710308218ff36c7f76d40ff9c8d2e/mmh3-5.0.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:71f3b765138260fd7a7a2dba0ea5727dabcd18c1f80323c9cfef97a7e86e01d0", size = 89571 }, + { url = "https://files.pythonhosted.org/packages/0b/79/b986bb067dbfcba6879afe6e723aad1bd53f223450532dd9a4606d0af389/mmh3-5.0.1-cp311-cp311-win32.whl", hash = "sha256:9a76518336247fd17689ce3ae5b16883fd86a490947d46a0193d47fb913e26e3", size = 39187 }, + { url = "https://files.pythonhosted.org/packages/48/69/97029eda3df0f84edde16a496a2e71bac508fc5d1f0a31e163da071e2670/mmh3-5.0.1-cp311-cp311-win_amd64.whl", hash = "sha256:336bc4df2e44271f1c302d289cc3d78bd52d3eed8d306c7e4bff8361a12bf148", size = 39766 }, + { url = "https://files.pythonhosted.org/packages/c7/51/538f2b8412303281d8ce2a9a5c4ea84ff81f06de98af0b7c72059727a3bb/mmh3-5.0.1-cp311-cp311-win_arm64.whl", hash = "sha256:af6522722fbbc5999aa66f7244d0986767a46f1fb05accc5200f75b72428a508", size = 36540 }, + { url = "https://files.pythonhosted.org/packages/75/c7/5b52d0882e7c0dccfaf8786a648e2b26c5307c594abe5cbe98c092607c97/mmh3-5.0.1-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:f2730bb263ed9c388e8860438b057a53e3cc701134a6ea140f90443c4c11aa40", size = 52907 }, + { url = "https://files.pythonhosted.org/packages/01/b5/9609fa353c27188292748db033323c206f3fc6fbfa124bccf6a42af0da08/mmh3-5.0.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:6246927bc293f6d56724536400b85fb85f5be26101fa77d5f97dd5e2a4c69bf2", size = 38389 }, + { url = "https://files.pythonhosted.org/packages/33/99/49bf3c86244857b3b250c2f54aff22a5a78ef12258af556fa39bb1e80699/mmh3-5.0.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:fbca322519a6e6e25b6abf43e940e1667cf8ea12510e07fb4919b48a0cd1c411", size = 38204 }, + { url = "https://files.pythonhosted.org/packages/f8/04/8860cab35b48aaefe40cf88344437e79ddc93cf7ff745dacd1cd56a2be1e/mmh3-5.0.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:eae8c19903ed8a1724ad9e67e86f15d198a7a1271a4f9be83d47e38f312ed672", size = 95091 }, + { url = "https://files.pythonhosted.org/packages/fa/e9/4ac56001a5bab6d26aa3dfabeddea6d7f037fd2972c76803259f51a5af75/mmh3-5.0.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:a09fd6cc72c07c0c07c3357714234b646d78052487c4a3bd5f7f6e08408cff60", size = 100055 }, + { url = "https://files.pythonhosted.org/packages/18/e8/7d5fd73f559c423ed5b72f940130c27803a406ee0ffc32ef5422f733df67/mmh3-5.0.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2ff8551fee7ae3b11c5d986b6347ade0dccaadd4670ffdb2b944dee120ffcc84", size = 99764 }, + { url = "https://files.pythonhosted.org/packages/54/d8/c0d89da6c729feec997a9b3b68698894cef12359ade0da95eba9e03b1d5d/mmh3-5.0.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:e39694c73a5a20c8bf36dfd8676ed351e5234d55751ba4f7562d85449b21ef3f", size = 87650 }, + { url = "https://files.pythonhosted.org/packages/dd/41/ec0ee3fd5124c83cb767dcea8569bb326f8981cc88c991e3e4e948a31e24/mmh3-5.0.1-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eba6001989a92f72a89c7cf382fda831678bd780707a66b4f8ca90239fdf2123", size = 94976 }, + { url = "https://files.pythonhosted.org/packages/8e/fa/e8059199fe6fbb2fd6494302904cb1209b2f8b6899d58059858a280e89a5/mmh3-5.0.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:0771f90c9911811cc606a5c7b7b58f33501c9ee896ed68a6ac22c7d55878ecc0", size = 90485 }, + { url = "https://files.pythonhosted.org/packages/3a/a0/eb9da5f93dea3f44b8e970f013279d1543ab210ccf63bb030830968682aa/mmh3-5.0.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:09b31ed0c0c0920363e96641fac4efde65b1ab62b8df86293142f35a254e72b4", size = 89554 }, + { url = "https://files.pythonhosted.org/packages/e7/e8/5803181eac4e015b4caf307af22fea74292dca48e580d93afe402dcdc138/mmh3-5.0.1-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:5cf4a8deda0235312db12075331cb417c4ba163770edfe789bde71d08a24b692", size = 94872 }, + { url = "https://files.pythonhosted.org/packages/ed/f9/4d55063f9dcaed41524f078a85989efdf1d335159af5e70af29942ebae67/mmh3-5.0.1-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:41f7090a95185ef20ac018581a99337f0cbc84a2135171ee3290a9c0d9519585", size = 91326 }, + { url = "https://files.pythonhosted.org/packages/80/75/0a5acab5291480acd939db80e94448ac937fc7fbfddc0a67b3e721ebfc9c/mmh3-5.0.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:b97b5b368fb7ff22194ec5854f5b12d8de9ab67a0f304728c7f16e5d12135b76", size = 89810 }, + { url = "https://files.pythonhosted.org/packages/9b/fd/eb1a3573cda74d4c2381d10ded62c128e869954ced1881c15e2bcd97a48f/mmh3-5.0.1-cp312-cp312-win32.whl", hash = "sha256:842516acf04da546f94fad52db125ee619ccbdcada179da51c326a22c4578cb9", size = 39206 }, + { url = "https://files.pythonhosted.org/packages/66/e8/542ed252924002b84c43a68a080cfd4facbea0d5df361e4f59637638d3c7/mmh3-5.0.1-cp312-cp312-win_amd64.whl", hash = "sha256:d963be0dbfd9fca209c17172f6110787ebf78934af25e3694fe2ba40e55c1e2b", size = 39799 }, + { url = "https://files.pythonhosted.org/packages/bd/25/ff2cd36c82a23afa57a05cdb52ab467a911fb12c055c8a8238c0d426cbf0/mmh3-5.0.1-cp312-cp312-win_arm64.whl", hash = "sha256:a5da292ceeed8ce8e32b68847261a462d30fd7b478c3f55daae841404f433c15", size = 36537 }, +] + +[[package]] +name = "monotonic" +version = "1.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/ca/8e91948b782ddfbd194f323e7e7d9ba12e5877addf04fb2bf8fca38e86ac/monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7", size = 7615 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/67/7e8406a29b6c45be7af7740456f7f37025f0506ae2e05fb9009a53946860/monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c", size = 8154 }, +] + [[package]] name = "more-itertools" version = "10.6.0" @@ -4213,7 +4494,6 @@ name = "nvidia-cublas-cu12" version = "12.4.5.8" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 }, { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 }, ] @@ -4222,7 +4502,6 @@ name = "nvidia-cuda-cupti-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 }, { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 }, ] @@ -4231,7 +4510,6 @@ name = "nvidia-cuda-nvrtc-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 }, { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 }, ] @@ -4240,7 +4518,6 @@ name = "nvidia-cuda-runtime-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 }, { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 }, ] @@ -4263,7 +4540,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 }, { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, ] @@ -4272,7 +4548,6 @@ name = "nvidia-curand-cu12" version = "10.3.5.147" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 }, { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 }, ] @@ -4286,7 +4561,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 }, { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, ] @@ -4298,7 +4572,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 }, { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 }, ] @@ -4315,7 +4588,6 @@ name = "nvidia-nvjitlink-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 }, { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 }, ] @@ -4324,10 +4596,18 @@ name = "nvidia-nvtx-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 }, { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 }, ] +[[package]] +name = "oauthlib" +version = "3.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688 }, +] + [[package]] name = "ollama" version = "0.4.6" @@ -4595,6 +4875,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/0a/7f/405c41d4f359121376c9d5117dcf68149b8122d3f6c718996d037bd4d800/opentelemetry_instrumentation-0.48b0-py3-none-any.whl", hash = "sha256:a69750dc4ba6a5c3eb67986a337185a25b739966d80479befe37b546fc870b44", size = 29449 }, ] +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.48b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asgiref" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/44/ac/fd3d40bab3234ec3f5c052a815100676baaae1832fa1067935f11e5c59c6/opentelemetry_instrumentation_asgi-0.48b0.tar.gz", hash = "sha256:04c32174b23c7fa72ddfe192dad874954968a6a924608079af9952964ecdf785", size = 23435 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/db/74/a0e0d38622856597dd8e630f2bd793760485eb165708e11b8be1696bbb5a/opentelemetry_instrumentation_asgi-0.48b0-py3-none-any.whl", hash = "sha256:ddb1b5fc800ae66e85a4e2eca4d9ecd66367a8c7b556169d9e7b57e10676e44d", size = 15958 }, +] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.48b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-asgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/58/20/43477da5850ef2cd3792715d442aecd051e885e0603b6ee5783b2104ba8f/opentelemetry_instrumentation_fastapi-0.48b0.tar.gz", hash = "sha256:21a72563ea412c0b535815aeed75fc580240f1f02ebc72381cfab672648637a2", size = 18497 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ee/50/745ab075a3041b7a5f29a579d2c28eaad54f64b4589d8f9fd364c62cf0f3/opentelemetry_instrumentation_fastapi-0.48b0-py3-none-any.whl", hash = "sha256:afeb820a59e139d3e5d96619600f11ce0187658b8ae9e3480857dd790bc024f2", size = 11777 }, +] + [[package]] name = "opentelemetry-proto" version = "1.27.0" @@ -4634,6 +4946,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/b7/7a/4f0063dbb0b6c971568291a8bc19a4ca70d3c185db2d956230dd67429dfc/opentelemetry_semantic_conventions-0.48b0-py3-none-any.whl", hash = "sha256:a0de9f45c413a8669788a38569c7e0a11ce6ce97861a628cca785deecdc32a1f", size = 149685 }, ] +[[package]] +name = "opentelemetry-util-http" +version = "0.48b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d6/d7/185c494754340e0a3928fd39fde2616ee78f2c9d66253affaad62d5b7935/opentelemetry_util_http-0.48b0.tar.gz", hash = "sha256:60312015153580cc20f322e5cdc3d3ecad80a71743235bdb77716e742814623c", size = 7863 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/ad/2e/36097c0a4d0115b8c7e377c90bab7783ac183bc5cb4071308f8959454311/opentelemetry_util_http-0.48b0-py3-none-any.whl", hash = "sha256:76f598af93aab50328d2a69c786beaedc8b6a7770f7a818cc307eb353debfffb", size = 6946 }, +] + [[package]] name = "orjson" version = "3.10.14" @@ -4983,6 +5304,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 }, ] +[[package]] +name = "posthog" +version = "3.9.2" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "monotonic" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8e/2f/fa06445b4367c4101b3904f25a1a375c179fd3f2d3e1b7b5e66f34d80bb5/posthog-3.9.2.tar.gz", hash = "sha256:afa7c0a3f68a92d890a181688165d2aee3e5d235e7521252fbf5dd560b1185d5", size = 60057 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/10/77/b2f839325d968882f7aa0eb7d5d5361ee99eb67ff17617424e7d742b1f38/posthog-3.9.2-py2.py3-none-any.whl", hash = "sha256:6a7aaf1ec9ecb32788d675bc6544be114556286f1f65f5ae57d260f0a7c075ae", size = 70780 }, +] + [[package]] name = "pot" version = "0.9.5" @@ -5473,6 +5810,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/04/fc/6f52588ac1cb4400a7804ef88d0d4e00cfe57a7ac6793ec3b00de5a8758b/pypdf-5.1.0-py3-none-any.whl", hash = "sha256:3bd4f503f4ebc58bae40d81e81a9176c400cbbac2ba2d877367595fb524dfdfc", size = 297976 }, ] +[[package]] +name = "pypika" +version = "0.48.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/2c/94ed7b91db81d61d7096ac8f2d325ec562fc75e35f3baea8749c85b28784/PyPika-0.48.9.tar.gz", hash = "sha256:838836a61747e7c8380cd1b7ff638694b7a7335345d0f559b04b2cd832ad5378", size = 67259 } + +[[package]] +name = "pyproject-hooks" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/82/28175b2414effca1cdac8dc99f76d660e7a4fb0ceefa4b4ab8f5f6742925/pyproject_hooks-1.2.0.tar.gz", hash = "sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8", size = 19228 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216 }, +] + [[package]] name = "pyreadline3" version = "3.5.4" @@ -5872,6 +6224,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/25/dd878a121fcfdf38f52850f11c512e13ec87c2ea72385933818e5b6c15ce/requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c", size = 4244 }, ] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, +] + [[package]] name = "requests-toolbelt" version = "1.0.0" @@ -7377,6 +7742,43 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/26/59/fddd9df489fe27f492cc97626e03663fb3b9b6ef7ce8597a7cdc5f2cbbad/uvicorn-0.25.0-py3-none-any.whl", hash = "sha256:ce107f5d9bd02b4636001a77a4e74aab5e1e2b146868ebbad565237145af444c", size = 60303 }, ] +[package.optional-dependencies] +standard = [ + { name = "colorama", marker = "sys_platform == 'win32'" }, + { name = "httptools" }, + { name = "python-dotenv" }, + { name = "pyyaml" }, + { name = "uvloop", marker = "platform_python_implementation != 'PyPy' and sys_platform != 'cygwin' and sys_platform != 'win32'" }, + { name = "watchfiles" }, + { name = "websockets" }, +] + +[[package]] +name = "uvloop" +version = "0.21.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/af/c0/854216d09d33c543f12a44b393c402e89a920b1a0a7dc634c42de91b9cf6/uvloop-0.21.0.tar.gz", hash = "sha256:3bf12b0fda68447806a7ad847bfa591613177275d35b6724b1ee573faa3704e3", size = 2492741 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3d/76/44a55515e8c9505aa1420aebacf4dd82552e5e15691654894e90d0bd051a/uvloop-0.21.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:ec7e6b09a6fdded42403182ab6b832b71f4edaf7f37a9a0e371a01db5f0cb45f", size = 1442019 }, + { url = "https://files.pythonhosted.org/packages/35/5a/62d5800358a78cc25c8a6c72ef8b10851bdb8cca22e14d9c74167b7f86da/uvloop-0.21.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:196274f2adb9689a289ad7d65700d37df0c0930fd8e4e743fa4834e850d7719d", size = 801898 }, + { url = "https://files.pythonhosted.org/packages/f3/96/63695e0ebd7da6c741ccd4489b5947394435e198a1382349c17b1146bb97/uvloop-0.21.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f38b2e090258d051d68a5b14d1da7203a3c3677321cf32a95a6f4db4dd8b6f26", size = 3827735 }, + { url = "https://files.pythonhosted.org/packages/61/e0/f0f8ec84979068ffae132c58c79af1de9cceeb664076beea86d941af1a30/uvloop-0.21.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87c43e0f13022b998eb9b973b5e97200c8b90823454d4bc06ab33829e09fb9bb", size = 3825126 }, + { url = "https://files.pythonhosted.org/packages/bf/fe/5e94a977d058a54a19df95f12f7161ab6e323ad49f4dabc28822eb2df7ea/uvloop-0.21.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:10d66943def5fcb6e7b37310eb6b5639fd2ccbc38df1177262b0640c3ca68c1f", size = 3705789 }, + { url = "https://files.pythonhosted.org/packages/26/dd/c7179618e46092a77e036650c1f056041a028a35c4d76945089fcfc38af8/uvloop-0.21.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:67dd654b8ca23aed0a8e99010b4c34aca62f4b7fce88f39d452ed7622c94845c", size = 3800523 }, + { url = "https://files.pythonhosted.org/packages/57/a7/4cf0334105c1160dd6819f3297f8700fda7fc30ab4f61fbf3e725acbc7cc/uvloop-0.21.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:c0f3fa6200b3108919f8bdabb9a7f87f20e7097ea3c543754cabc7d717d95cf8", size = 1447410 }, + { url = "https://files.pythonhosted.org/packages/8c/7c/1517b0bbc2dbe784b563d6ab54f2ef88c890fdad77232c98ed490aa07132/uvloop-0.21.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:0878c2640cf341b269b7e128b1a5fed890adc4455513ca710d77d5e93aa6d6a0", size = 805476 }, + { url = "https://files.pythonhosted.org/packages/ee/ea/0bfae1aceb82a503f358d8d2fa126ca9dbdb2ba9c7866974faec1cb5875c/uvloop-0.21.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b9fb766bb57b7388745d8bcc53a359b116b8a04c83a2288069809d2b3466c37e", size = 3960855 }, + { url = "https://files.pythonhosted.org/packages/8a/ca/0864176a649838b838f36d44bf31c451597ab363b60dc9e09c9630619d41/uvloop-0.21.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a375441696e2eda1c43c44ccb66e04d61ceeffcd76e4929e527b7fa401b90fb", size = 3973185 }, + { url = "https://files.pythonhosted.org/packages/30/bf/08ad29979a936d63787ba47a540de2132169f140d54aa25bc8c3df3e67f4/uvloop-0.21.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:baa0e6291d91649c6ba4ed4b2f982f9fa165b5bbd50a9e203c416a2797bab3c6", size = 3820256 }, + { url = "https://files.pythonhosted.org/packages/da/e2/5cf6ef37e3daf2f06e651aae5ea108ad30df3cb269102678b61ebf1fdf42/uvloop-0.21.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4509360fcc4c3bd2c70d87573ad472de40c13387f5fda8cb58350a1d7475e58d", size = 3937323 }, + { url = "https://files.pythonhosted.org/packages/8c/4c/03f93178830dc7ce8b4cdee1d36770d2f5ebb6f3d37d354e061eefc73545/uvloop-0.21.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:359ec2c888397b9e592a889c4d72ba3d6befba8b2bb01743f72fffbde663b59c", size = 1471284 }, + { url = "https://files.pythonhosted.org/packages/43/3e/92c03f4d05e50f09251bd8b2b2b584a2a7f8fe600008bcc4523337abe676/uvloop-0.21.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:f7089d2dc73179ce5ac255bdf37c236a9f914b264825fdaacaded6990a7fb4c2", size = 821349 }, + { url = "https://files.pythonhosted.org/packages/a6/ef/a02ec5da49909dbbfb1fd205a9a1ac4e88ea92dcae885e7c961847cd51e2/uvloop-0.21.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:baa4dcdbd9ae0a372f2167a207cd98c9f9a1ea1188a8a526431eef2f8116cc8d", size = 4580089 }, + { url = "https://files.pythonhosted.org/packages/06/a7/b4e6a19925c900be9f98bec0a75e6e8f79bb53bdeb891916609ab3958967/uvloop-0.21.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86975dca1c773a2c9864f4c52c5a55631038e387b47eaf56210f873887b6c8dc", size = 4693770 }, + { url = "https://files.pythonhosted.org/packages/ce/0c/f07435a18a4b94ce6bd0677d8319cd3de61f3a9eeb1e5f8ab4e8b5edfcb3/uvloop-0.21.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:461d9ae6660fbbafedd07559c6a2e57cd553b34b0065b6550685f6653a98c1cb", size = 4451321 }, + { url = "https://files.pythonhosted.org/packages/8f/eb/f7032be105877bcf924709c97b1bf3b90255b4ec251f9340cef912559f28/uvloop-0.21.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:183aef7c8730e54c9a3ee3227464daed66e37ba13040bb3f350bc2ddc040f22f", size = 4659022 }, +] + [[package]] name = "watchfiles" version = "0.20.0" From 7e359e97e12cb0983989fb3004f9d2336f025ad6 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 23 Jan 2025 15:50:48 -0800 Subject: [PATCH 51/93] docstrings etc. --- .../ame/src/ame/clients/_client_creator.py | 6 +- .../ame/src/ame/clients/_client_wrapper.py | 9 +- python/packages/ame/src/ame/eval.py | 11 +- .../packages/ame/src/ame/settings/check.yaml | 2 +- .../src/ame/task_data/advice/add_topic.yaml | 4 +- .../task_data/demos/cell_towers_2_demo.yaml | 16 +- .../autogen_ext/apprentice/_agent_wrapper.py | 4 +- .../src/autogen_ext/apprentice/_grader.py | 2 +- .../autogen_ext/apprentice/_page_logger.py | 396 +++++++++++------- .../src/autogen_ext/apprentice/_prompter.py | 5 +- 10 files changed, 268 insertions(+), 187 deletions(-) diff --git a/python/packages/ame/src/ame/clients/_client_creator.py b/python/packages/ame/src/ame/clients/_client_creator.py index 8d04cdf27175..2d5ee5efbaf5 100644 --- a/python/packages/ame/src/ame/clients/_client_creator.py +++ b/python/packages/ame/src/ame/clients/_client_creator.py @@ -1,6 +1,6 @@ -from autogen_ext.models.openai import OpenAIChatCompletionClient -from autogen_ext.models.openai import AzureOpenAIChatCompletionClient -from azure.identity import DefaultAzureCredential, ChainedTokenCredential, AzureCliCredential, get_bearer_token_provider +from autogen_ext.models.openai import AzureOpenAIChatCompletionClient, OpenAIChatCompletionClient +from azure.identity import AzureCliCredential, ChainedTokenCredential, DefaultAzureCredential, get_bearer_token_provider + from ._client_wrapper import ClientWrapper diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index c8b0542ec998..3e582b6a81c2 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -1,7 +1,7 @@ import os +from typing import Any, Dict, List, Mapping, Optional, Sequence + import yaml -from typing import Any, List, Dict, Mapping, Optional, Sequence -from autogen_ext.models.openai import AzureOpenAIChatCompletionClient from autogen_core import CancellationToken from autogen_core.models import ( CreateResult, @@ -10,6 +10,7 @@ ) from autogen_core.tools import Tool, ToolSchema from autogen_ext.apprentice import PageLogger +from autogen_ext.models.openai import AzureOpenAIChatCompletionClient class ClientWrapper: @@ -103,8 +104,8 @@ def check_and_replay_one_turn(self, messages): current_messages = self.convert_messages(messages) if current_messages != recorded_messages: error_str = "\nCurrent message list doesn't match the recorded message list." - self.logger.add_message_content(recorded_messages, "recorded message list") - self.logger.add_message_content(current_messages, "current message list") + self.logger.log_message_content(recorded_messages, "recorded message list") + self.logger.log_message_content(current_messages, "current message list") self.logger.error(error_str) raise ValueError(error_str) assert current_messages == recorded_messages diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 4168cb6988e5..02e4ee57a159 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -1,9 +1,12 @@ -import sys, os -import yaml import asyncio import importlib +import os +import sys from typing import Tuple -from autogen_ext.apprentice import PageLogger, Grader + +import yaml +from autogen_ext.apprentice import Grader, PageLogger + from ame.clients._client_creator import ClientCreator @@ -116,7 +119,7 @@ async def perform_evaluations(self, settings): # If this is a client wrapper, it needs to be finalized. client.finalize() - self.logger.flush(final=True) # Finalize the page log + self.logger.flush(finished=True) self.logger.leave_function() async def run(self, settings_filepath): diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 65c1441e4210..30ab05ee62d4 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -2,7 +2,7 @@ Evaluator: PageLogger: enabled: 1 - path: ~/pagelogs/temp10 + path: ~/pagelogs/temp11 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/ame/src/ame/task_data/advice/add_topic.yaml b/python/packages/ame/src/ame/task_data/advice/add_topic.yaml index c95f6703133b..e07dd8be08a5 100644 --- a/python/packages/ame/src/ame/task_data/advice/add_topic.yaml +++ b/python/packages/ame/src/ame/task_data/advice/add_topic.yaml @@ -1,6 +1,6 @@ # Advice created manually for solving the autogen_package task. advice: 'When somebody builds something on autogen and wants to contribute it to autogen, - instead of creating a new autogen package, it''s better for them to implement it in a separate github repo, - then just add the autogen-extension topic to that repo. + instead of creating a new autogen package, it''s better for them to implement it in a separate github repo, + then just add the autogen-extension topic to that repo. That way their contribution will be automatically discoverable through autogen''s page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html' diff --git a/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml b/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml index d94d87d9eb35..da626f9783b7 100644 --- a/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml +++ b/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml @@ -1,11 +1,11 @@ # Demo created manually for solving the cell_towers_2 task. -demo: 'Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. - Then start at one end and place the towers only where absolutely needed. - The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. - This obviously covers houses up to mile 7. - But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. - So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. - The next uncovered house would be at mile 12 (not 10), requiring a second tower. - It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), +demo: 'Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. + Then start at one end and place the towers only where absolutely needed. + The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. + This obviously covers houses up to mile 7. + But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. + So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. + The next uncovered house would be at mile 12 (not 10), requiring a second tower. + It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), covering the remaining houses. So 2 towers would be enough.' diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py index 5151356ddb8a..7321d9155abf 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py @@ -31,7 +31,7 @@ async def assign_task(self, task): elif self.base_agent_name == "thin_agent": response, work_history = await self.assign_task_to_thin_agent(task) else: - assert False, "Invalid base agent" + raise AssertionError("Invalid base agent") self.logger.leave_function() return response, work_history @@ -63,7 +63,7 @@ async def assign_task_to_thin_agent(self, task): response_str = response.content # Log the model call - self.logger.add_model_call( + self.logger.log_model_call( summary="Ask the model to complete the task", input_messages=input_messages, response=response ) self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index 3d2a1b9f5f29..e0ca7c5827c4 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -47,7 +47,7 @@ async def call_model( assert isinstance(response_message, AssistantMessage) # Log the model call - self.logger.add_model_call(summary=summary, input_messages=input_messages, response=response) + self.logger.log_model_call(summary=summary, input_messages=input_messages, response=response) # Manage the chat history if keep_these_messages: diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py index d91a196dd7f9..9855c67d3043 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py @@ -3,83 +3,73 @@ import os import shutil import time -from typing import Dict, List +from typing import Dict, List, Union -from autogen_core import Image +from autogen_core import FunctionCall, Image from autogen_core.models import ( AssistantMessage, - ChatCompletionClient, + FunctionExecutionResult, FunctionExecutionResultMessage, LLMMessage, SystemMessage, UserMessage, ) +# Convenience types +UserContent = Union[str, List[Union[str, Image]]] +AssistantContent = Union[str, List[FunctionCall]] +FunctionExecutionContent = List[FunctionExecutionResult] +SystemContent = str +MessageContent = UserContent | AssistantContent | SystemContent | FunctionExecutionContent -class Page: - def __init__(self, page_logger, index, summary, indent_level, show_in_overview=True, final=True): - self.page_logger = page_logger - self.index_str = str(index) - self.summary = summary - self.indent_level = indent_level - self.show_in_overview = show_in_overview - self.final = final - self.file_title = self.index_str + " " + self.summary - self.indentation_text = "" - for i in range(self.indent_level): - self.indentation_text += "| " - self.full_link = f'{self.file_title}' - self.line_text = self.indentation_text + self.full_link - self.lines = [] - self.flush() - - def _add_lines(self, line, flush=False): - # If the string 'line' consists of multiple lines, separate them into a list. - lines_to_add = [] - if "\n" in line: - lines_to_add = line.split("\n") - else: - lines_to_add.append(line) - - self.lines.extend(lines_to_add) - - if flush: - self.flush() - - def link_to_image(self, image_path, description): - # Add a thumbnail that links to the image. - # If the following html string is indented, underscores appear to the left of thumbnails. - link = f"""{description}""" - return link - - def add_link_to_image(self, description, source_image_path): - # Copy the image to the run directory. - # Remove every character from the string 'description' that is not alphanumeric or a space. - description = "".join(e for e in description if e.isalnum() or e.isspace()) - target_image_filename = str(self.page_logger.get_next_page_id()) + " - " + description - local_image_path = os.path.join(self.page_logger.log_dir, target_image_filename) - shutil.copyfile(source_image_path, local_image_path) - self._add_lines("\n" + description) - self._add_lines(self.link_to_image(target_image_filename, description), flush=True) - def flush(self): - page_path = os.path.join(self.page_logger.log_dir, self.index_str + ".html") - with open(page_path, "w") as f: - f.write(self.page_logger.html_opening(self.file_title, final=self.final)) - f.write(f"

{self.file_title}

\n") - for line in self.lines: - # Call f.write in a try block to catch any UnicodeEncodeErrors. - try: - f.write(f"{line}\n") - except UnicodeEncodeError: - f.write("UnicodeEncodeError in this line.\n") - f.write(self.page_logger.html_closing()) - f.flush() - time.sleep(0.1) +def html_opening(file_title: str, finished: bool = False) -> str: + """ + Returns the opening text of a simple HTML file. + """ + refresh_tag = '' if not finished else "" + st = f""" + + + + {refresh_tag} + {file_title} + + + """ + return st + + +def html_closing() -> str: + """ + Return the closing text of a simple HTML file. + """ + return """""" class PageLogger: - def __init__(self, settings): + """ + Logs text and images to a set of HTML pages, one per function/method, linked to each other in a call tree. + + Args: + settings: A dictionary containing the following keys: + - enabled: A boolean indicating whether logging is enabled. + + Methods: + info: Adds text to the current page. + error: Adds text to the current page. + log_message_content: Adds a page containing the message's content, including any images. + log_model_call: Adds a page containing all messages to or from a model, including any images. + log_link_to_local_file: Returns a link to a local file in the log. + flush: Writes the current state of the log to disk. + enter_function: Adds a new page corresponding to the current function call. + leave_function: Finishes the page corresponding to the current function + """ + + def __init__(self, settings: Dict): self.enabled = settings["enabled"] if not self.enabled: return @@ -87,80 +77,62 @@ def __init__(self, settings): self.page_stack = PageStack() self.pages = [] self.last_page_id = 0 - self.name = "0 Overview" - self.create_run_dir() + self.name = "0 Call Tree" + self._create_run_dir() self.flush() - def get_next_page_id(self): + def _get_next_page_id(self) -> int: + """Returns the next page id and increments the counter.""" self.last_page_id += 1 return self.last_page_id - def create_run_dir(self): - # Create a fresh log directory. + def _create_run_dir(self) -> None: + """Creates a fresh log directory.""" if os.path.exists(self.log_dir): shutil.rmtree(self.log_dir) os.makedirs(self.log_dir) - def html_opening(self, file_title, final=False): - # Return the opening text of a simple HTML file. - refresh_tag = '' if not final else "" - st = f""" - - - - {refresh_tag} - {file_title} - - - """ - return st - - def html_closing(self): - # Return the closing text of a simple HTML file. - return """""" - - def add_page(self, summary, show_in_overview=True, final=True): - # Add a page to the log. + def _add_page(self, summary: str, show_in_call_tree: bool = True, finished: bool = True) -> "Page": + """ + Adds a new page to the log. + """ page = Page( page_logger=self, - index=self.get_next_page_id(), + index=self._get_next_page_id(), summary=summary, indent_level=len(self.page_stack.stack), - show_in_overview=show_in_overview, - final=final, + show_in_call_tree=show_in_call_tree, + finished=finished, ) self.pages.append(page) self.flush() - if len(self.page_stack.stack) > 0: # Insert a link to the new page into the calling page. - self._add_lines("\n" + page.full_link, flush=True) - + self.info("\n" + page.full_link) return page - def _add_lines(self, line, flush=False): - # Add lines to the current page (at the top of the page stack). - page = self.page_stack.top() - page._add_lines(line, flush=flush) - - def info(self, line): + def info(self, line: str) -> None: + """ + Adds text to the current page. + """ if not self.enabled: return - # Add lines to the current page (at the top of the page stack). page = self.page_stack.top() - page._add_lines(line, flush=True) + page.add_lines(line, flush=True) - def error(self, line): + def error(self, line: str) -> None: + """ + Adds text to the current page. + """ if not self.enabled: return - # Add lines to the current page (at the top of the page stack). page = self.page_stack.top() - page._add_lines(line, flush=True) + page.add_lines(line, flush=True) - def message_source(self, message): + def _message_source(self, message: LLMMessage) -> str: + """ + Returns a string indicating the source of a message. + """ source = "UNKNOWN" color = "black" if isinstance(message, SystemMessage): @@ -175,16 +147,24 @@ def message_source(self, message): elif isinstance(message, FunctionExecutionResultMessage): source = "FUNCTION" color = "red" - return self.decorate_text(source, color, demarcate=True) + return self._decorate_text(source, color, demarcate=True) - def decorate_text(self, text, color, weight="bold", demarcate=False): + def _decorate_text(self, text: str, color: str, weight: str = "bold", demarcate: bool = False) -> str: + """ + Returns a string of text with HTML styling for weight and color. + """ if demarcate: text = f"<<<<< {text} >>>>>" return f'{text}' - def message_content(self, page, message=None, message_content=None): - # Format the message content for logging. Either message or message_content must not be None. + def _format_message_content( + self, page: "Page", message: LLMMessage | None = None, message_content: MessageContent | None = None + ) -> str: + """ + Formats the message content for logging. Either message or message_content must not be None. + """ # Start by converting the message content to a list of strings. + content = None content_list = [] if message_content is not None: content = message_content @@ -198,7 +178,7 @@ def message_content(self, page, message=None, message_content=None): content_list.append(item.rstrip()) elif isinstance(item, Image): # Save the image to disk. - image_filename = str(self.get_next_page_id()) + " image.jpg" + image_filename = str(self._get_next_page_id()) + " image.jpg" image_path = os.path.join(self.log_dir, image_filename) item.image.save(image_path) # Add a link to the image. @@ -212,60 +192,72 @@ def message_content(self, page, message=None, message_content=None): else: content_list.append("") - # Convert the list of strings to a single string with newline separators. + # Convert the list of strings to a single string containing newline separators. output = "" for item in content_list: output += f"\n{item}\n" return output - def add_message_content(self, message_content, summary): - # Add a page containing a message's content. - page = self.add_page(summary=summary, show_in_overview=False) + def log_message_content(self, message_content: MessageContent, summary: str) -> None: + """ + Adds a page containing the message's content, including any images. + """ + page = self._add_page(summary=summary, show_in_call_tree=False) self.page_stack.write_stack_to_page(page) - page._add_lines(self.message_content(page, message_content=message_content)) + page.add_lines(self._format_message_content(page, message_content=message_content)) page.flush() - def add_model_call(self, summary, input_messages, response): + def log_model_call(self, summary: str, input_messages: List[LLMMessage], response: LLMMessage) -> "Page": + """ + Adds a page containing all messages to or from a model, including any images. + """ if not self.enabled: - return - # Add a model call to the log. - page = self.add_page(summary=summary, show_in_overview=False) + return None + page = self._add_page(summary=summary, show_in_call_tree=False) self.page_stack.write_stack_to_page(page) - page._add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) - page._add_lines("{} completion tokens".format(response.usage.completion_tokens)) - for i, m in enumerate(input_messages): - page._add_lines("\n" + self.message_source(m)) - page._add_lines(self.message_content(page, message=m)) - page._add_lines("\n" + self.decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) - page._add_lines(self.message_content(page, message=response)) + page.add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) + page.add_lines("{} completion tokens".format(response.usage.completion_tokens)) + for m in input_messages: + page.add_lines("\n" + self._message_source(m)) + page.add_lines(self._format_message_content(page, message=m)) + page.add_lines("\n" + self._decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) + page.add_lines(self._format_message_content(page, message=response)) page.flush() return page - def link_to_local_file(self, file_path): + def log_link_to_local_file(self, file_path: str) -> str: + """ + Returns a link to a local file in the log. + """ file_name = os.path.basename(file_path) link = f'{file_name}' return link - def flush(self, final=False): + def flush(self, finished: bool = False) -> None: + """ + Writes the current state of the log to disk. + """ if not self.enabled: return - # Create an overview of the log. - overview_path = os.path.join(self.log_dir, self.name + ".html") - with open(overview_path, "w") as f: - f.write(self.html_opening("0 Overview", final=final)) + # Create a call tree of the log. + call_tree_path = os.path.join(self.log_dir, self.name + ".html") + with open(call_tree_path, "w") as f: + f.write(html_opening("0 Call Tree", finished=finished)) f.write(f"

{self.name}

") f.write("\n") for page in self.pages: - if page.show_in_overview: + if page.show_in_call_tree: f.write(page.line_text + "\n") f.write("\n") - f.write(self.html_closing()) - time.sleep(0.1) + f.write(html_closing()) + time.sleep(0.1) # Avoids race conditions when writing multiple files in quick succession. - def enter_function(self): - # Perform a set of logging actions that are often performed at the beginning of a caller's method. + def enter_function(self) -> "Page": + """ + Adds a new page corresponding to the current function call. + """ if not self.enabled: - return + return None frame = inspect.currentframe().f_back # Get the calling frame # Check if it's a method by looking for 'self' or 'cls' in f_locals @@ -282,45 +274,133 @@ def enter_function(self): caller_name = class_name + "." + frame.f_code.co_name # Create a new page for this function. - page = self.add_page(summary=caller_name, show_in_overview=True, final=False) + page = self._add_page(summary=caller_name, show_in_call_tree=True, finished=False) self.page_stack.push(page) self.page_stack.write_stack_to_page(page) - page._add_lines("\nENTER {}".format(caller_name), flush=True) + page.add_lines("\nENTER {}".format(caller_name), flush=True) return page - def leave_function(self): + def leave_function(self) -> None: + """ + Finishes the page corresponding to the current function call. + """ if not self.enabled: return - # Perform a set of logging actions that are often performed at the end of a caller's method. page = self.page_stack.top() - page.final = True - page._add_lines("\nLEAVE {}".format(page.summary), flush=True) + page.finished = True + page.add_lines("\nLEAVE {}".format(page.summary), flush=True) self.page_stack.pop() +class Page: + """ + Represents a single HTML page in the logger output. + """ + + def __init__( + self, + page_logger: PageLogger, + index: int, + summary: str, + indent_level: int, + show_in_call_tree: bool = True, + finished: bool = True, + ): + """ + Initializes and writes to a new HTML page. + """ + self.page_logger = page_logger + self.index_str = str(index) + self.summary = summary + self.indent_level = indent_level + self.show_in_call_tree = show_in_call_tree + self.finished = finished + self.file_title = self.index_str + " " + self.summary + self.indentation_text = "| "*self.indent_level + self.full_link = f'{self.file_title}' + self.line_text = self.indentation_text + self.full_link + self.lines = [] + self.flush() + + def add_lines(self, lines: str, flush: bool = False) -> None: + """ + Adds one or more lines to the page. + """ + lines_to_add = [] + if "\n" in lines: + lines_to_add = lines.split("\n") + else: + lines_to_add.append(lines) + self.lines.extend(lines_to_add) + if flush: + self.flush() + + def link_to_image(self, image_path: str, description: str) -> str: + """ + Returns an HTML string defining a thumbnail link to an image. + """ + # To avoid a bug in heml rendering aht displays underscores to the left of thumbnails, + # define the following string on a single line. + link = f"""{description}""" + return link + + def add_link_to_image(self, description: str, source_image_path: str) -> None: + """ + Inserts a thumbnail link to an image to the page. + """ + # Remove every character from the string 'description' that is not alphanumeric or a space. + description = "".join(e for e in description if e.isalnum() or e.isspace()) + target_image_filename = str(self.page_logger._get_next_page_id()) + " - " + description + # Copy the image to the log directory. + local_image_path = os.path.join(self.page_logger.log_dir, target_image_filename) + shutil.copyfile(source_image_path, local_image_path) + self.add_lines("\n" + description) + self.add_lines(self.link_to_image(target_image_filename, description), flush=True) + + def flush(self) -> None: + """ + Writes the HTML page to disk. + """ + page_path = os.path.join(self.page_logger.log_dir, self.index_str + ".html") + with open(page_path, "w") as f: + f.write(html_opening(self.file_title, finished=self.finished)) + f.write(f"

{self.file_title}

\n") + for line in self.lines: + try: + f.write(f"{line}\n") + except UnicodeEncodeError: + f.write("UnicodeEncodeError in this line.\n") + f.write(html_closing()) + f.flush() + time.sleep(0.1) # Avoids race conditions when writing multiple files in quick succession. + + class PageStack: """ - A call stack containing a list of currently active tasks and policies in the order they called each other. + A call stack containing a list of currently active function pages in the order they called each other. """ def __init__(self): self.stack = [] - def push(self, page): + def push(self, page: Page) -> None: + """Adds a page to the top of the stack.""" self.stack.append(page) - def pop(self): + def pop(self) -> Page: + """Removes and returns the top page from the stack""" return self.stack.pop() - def top(self): + def top(self) -> Page: + """Returns the top page from the stack without removing it""" return self.stack[-1] - def write_stack_to_page(self, page): - # Log a properly indented string showing the current state of the call stack. - page._add_lines("\nCALL STACK") + def write_stack_to_page(self, page: Page) -> None: + # Logs a properly indented string displaying the current call stack. + page.add_lines("\nCALL STACK") for stack_page in self.stack: - page._add_lines(stack_page.line_text) - page._add_lines("") - page._add_lines("") + page.add_lines(stack_page.line_text) + page.add_lines("") + page.add_lines("") page.flush() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py index b75fa9bd1aad..7a935657d443 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py @@ -44,9 +44,6 @@ async def call_model( # Double check the types of the input messages. for message in input_messages: for part in message.content: - if part is None: - print("part is None") - print("message = ", message) assert isinstance(part, str) or isinstance(part, Image), "Invalid message content type: {}".format( type(part) ) @@ -65,7 +62,7 @@ async def call_model( self.num_model_calls += 1 # Log the model call - self.logger.add_model_call(summary=summary, input_messages=input_messages, response=response) + self.logger.log_model_call(summary=summary, input_messages=input_messages, response=response) # Manage the chat history if keep_these_messages: From 9466ea82e23ee90c4738d8a943b10831a2fd5ee8 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 23 Jan 2025 16:56:27 -0800 Subject: [PATCH 52/93] docstrings etc. --- .../packages/ame/src/ame/settings/check.yaml | 2 +- .../src/autogen_ext/apprentice/_grader.py | 37 ++++++++++++------- .../autogen_ext/apprentice/_page_logger.py | 8 +--- .../src/autogen_ext/apprentice/_utils.py | 17 ++++++--- 4 files changed, 37 insertions(+), 27 deletions(-) diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 30ab05ee62d4..423947383a2e 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -2,7 +2,7 @@ Evaluator: PageLogger: enabled: 1 - path: ~/pagelogs/temp11 + path: ~/pagelogs/temp12 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index e0ca7c5827c4..cadce2dcf25b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -1,18 +1,22 @@ -from typing import List +from typing import List, Tuple from autogen_core.models import ( AssistantMessage, + ChatCompletionClient, CreateResult, LLMMessage, SystemMessage, UserMessage, ) - +from ._page_logger import PageLogger from ._utils import UserContent class Grader: - def __init__(self, client, logger): + """ + Determines task success without limitation to string matches. + """ + def __init__(self, client: ChatCompletionClient, logger: PageLogger): self.client = client self.logger = logger @@ -23,8 +27,11 @@ def __init__(self, client, logger): self._chat_history: List[LLMMessage] = [] async def call_model( - self, summary, user_content: UserContent = None, system_message_content=None, keep_these_messages=True + self, summary: str, user_content: UserContent = None, system_message_content: str = None, keep_these_messages: bool = True ): + """ + Calls the model client with the given input and returns the response. + """ # Prepare the input message list if system_message_content is None: system_message_content = "You are a helpful assistant." @@ -34,7 +41,6 @@ async def call_model( else: # System message allowed. system_message = SystemMessage(content=system_message_content) - user_message = UserMessage(content=user_content, source="User") input_messages = [system_message] + self._chat_history + [user_message] @@ -54,22 +60,24 @@ async def call_model( self._chat_history.append(user_message) self._chat_history.append(response_message) - # Return the response as a string for now + # Return the response as a string return response_string - def remove_last_turn(self): - if len(self._chat_history) > 0: - self._chat_history.pop() - - def clear_history(self): + def clear_history(self) -> None: + """ + Empties the chat history message list. + """ self._chat_history = [] - async def is_response_correct(self, task_description, response_to_be_graded, correct_answer): - # Returns only the insights that the client verifies are relevant to the task. + async def is_response_correct(self, task_description: str, response_to_be_graded: str, correct_answer: str) -> Tuple[bool, str]: + """ + Determines whether the response is equivalent to the task's correct answer. + """ self.logger.enter_function() sys_message = """You are a helpful and thoughtful assistant.""" + # Ask the model to extract the answer from the response. user_message = [ """Your job is to extract a possible answer to the following question from the given text. - First review the following task. @@ -89,6 +97,7 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor ) self.logger.info("Extracted answer: " + extracted_answer) + # Ask the model to check the answer for correctness. user_message = [ """Your job is to decide whether a given answer to a task is correct or not. - You will be given the task description and the correct, gold-standard answer, along with the answer to be graded. @@ -113,7 +122,7 @@ async def is_response_correct(self, task_description, response_to_be_graded, cor ) self.logger.info("Decision: " + decision) - self.logger.leave_function() if self.report_results: self.client.report_result(decision) + self.logger.leave_function() return decision == "1", extracted_answer diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py index 9855c67d3043..b86ec1df7f38 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py @@ -14,13 +14,7 @@ SystemMessage, UserMessage, ) - -# Convenience types -UserContent = Union[str, List[Union[str, Image]]] -AssistantContent = Union[str, List[FunctionCall]] -FunctionExecutionContent = List[FunctionExecutionResult] -SystemContent = str -MessageContent = UserContent | AssistantContent | SystemContent | FunctionExecutionContent +from ._utils import MessageContent def html_opening(file_title: str, finished: bool = False) -> str: diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py index 974cad1a5fc5..41940888ea7a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py @@ -3,17 +3,18 @@ from autogen_core import FunctionCall, Image from autogen_core.models import FunctionExecutionResult, LLMMessage -# Convenience type +# Convenience types UserContent = Union[str, List[Union[str, Image]]] AssistantContent = Union[str, List[FunctionCall]] FunctionExecutionContent = List[FunctionExecutionResult] SystemContent = str +MessageContent = UserContent | AssistantContent | SystemContent | FunctionExecutionContent -# Convert UserContent to a string -def message_content_to_str( - message_content: UserContent | AssistantContent | SystemContent | FunctionExecutionContent, -) -> str: +def message_content_to_str(message_content: MessageContent) -> str: + """ + Converts the message content to a string. + """ if message_content is None: return "" elif isinstance(message_content, str): @@ -33,6 +34,9 @@ def message_content_to_str( def text_from_user_content(user_content: UserContent) -> str: + """ + Extracts just the text from the user content. + """ if isinstance(user_content, str): return user_content elif isinstance(user_content, List): @@ -46,6 +50,9 @@ def text_from_user_content(user_content: UserContent) -> str: def single_image_from_user_content(user_content: UserContent) -> Union[Image, None]: + """ + Extracts a single image from the user content. + """ image_to_return = None if isinstance(user_content, str): return None From 4ec9bff78eaf86316872c09c6c0d1f5eb62063f7 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 23 Jan 2025 17:31:13 -0800 Subject: [PATCH 53/93] docstrings etc. --- .../apprentice/_agentic_memory_controller.py | 2 +- .../src/autogen_ext/apprentice/_grader.py | 19 ++-- .../src/autogen_ext/apprentice/_prompter.py | 90 +++++++++++++------ 3 files changed, 76 insertions(+), 35 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py index 0b14042a156d..d6c6d919aad7 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py @@ -266,7 +266,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria # Try to learn from this failure. self.logger.info("\nResponse is INCORRECT. Try to learn from this failure.\n") insight = await self.prompter.learn_from_failure( - task, memory_section, response, expected_answer, work_history, new_insights + task, memory_section, response, expected_answer, work_history ) self.logger.info("\nInsight: {}\n".format(insight)) new_insights.append(insight) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py index cadce2dcf25b..b824c98715bb 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py @@ -8,6 +8,7 @@ SystemMessage, UserMessage, ) + from ._page_logger import PageLogger from ._utils import UserContent @@ -15,6 +16,14 @@ class Grader: """ Determines task success without limitation to string matches. + + Args: + client: The client to call the model. + logger: The logger to log the model calls. + + Methods: + call_model: Calls the model with the given input and returns the response. + is_response_correct: Determines whether the response is equivalent to the task's correct answer. """ def __init__(self, client: ChatCompletionClient, logger: PageLogger): self.client = client @@ -28,7 +37,7 @@ def __init__(self, client: ChatCompletionClient, logger: PageLogger): async def call_model( self, summary: str, user_content: UserContent = None, system_message_content: str = None, keep_these_messages: bool = True - ): + ) -> str: """ Calls the model client with the given input and returns the response. """ @@ -63,9 +72,9 @@ async def call_model( # Return the response as a string return response_string - def clear_history(self) -> None: + def _clear_history(self) -> None: """ - Empties the chat history message list. + Empties the message list containing the chat history. """ self._chat_history = [] @@ -91,7 +100,7 @@ async def is_response_correct(self, task_description: str, response_to_be_graded user_message.append(task_description) user_message.append("\n# Text that may contain an answer") user_message.append(response_to_be_graded) - self.clear_history() + self._clear_history() extracted_answer = await self.call_model( summary="Ask the model to extract the answer", system_message_content=sys_message, user_content=user_message ) @@ -114,7 +123,7 @@ async def is_response_correct(self, task_description: str, response_to_be_graded user_message.append(correct_answer) user_message.append("\n# Answer to be graded") user_message.append(extracted_answer) - self.clear_history() + self._clear_history() decision = await self.call_model( summary="Ask the model to check the answer for correctness", system_message_content=sys_message, diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py index 7a935657d443..f36516e14743 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py @@ -1,20 +1,38 @@ import time from typing import List -from autogen_core import FunctionCall, Image +from autogen_core import Image from autogen_core.models import ( AssistantMessage, + ChatCompletionClient, CreateResult, LLMMessage, SystemMessage, UserMessage, ) -from ._utils import UserContent, message_content_to_str, single_image_from_user_content, text_from_user_content +from ._page_logger import PageLogger +from ._utils import UserContent class Prompter: - def __init__(self, client, logger): + """ + Centralizes most of the Apprentice prompts sent to the model client. + + Args: + client: The client to call the model. + logger: The logger to log the model calls. + + Methods: + call_model: Calls the model client with the given input and returns the response. + learn_from_failure: Tries to create an insight to help avoid the given failure in the future. + find_index_topics: Returns a list of topics related to the given string. + generalize_task: Attempts to rewrite a task description in a more general form. + validate_insight: Judges whether the insight could help solve the task. + extract_task: Returns a task found in the given text, or None if not found. + extract_advice: Returns advice from the given text, or None if not found. + """ + def __init__(self, client: ChatCompletionClient, logger: PageLogger): self.client = client self.logger = logger self.default_system_message_content = "You are a helpful assistant." @@ -26,8 +44,11 @@ def __init__(self, client, logger): self._chat_history: List[LLMMessage] = [] async def call_model( - self, summary, user_content: UserContent = None, system_message_content=None, keep_these_messages=True - ): + self, summary: str, user_content: UserContent = None, system_message_content: str = None, keep_these_messages: bool = True + ) -> str: + """ + Calls the model client with the given input and returns the response. + """ # Prepare the input message list if system_message_content is None: system_message_content = self.default_system_message_content @@ -51,13 +72,11 @@ async def call_model( # Call the model start_time = time.time() response = await self.client.create(input_messages) - assert isinstance(response, CreateResult) response_string = response.content assert isinstance(response_string, str) response_message = AssistantMessage(content=response_string, source="Assistant") assert isinstance(response_message, AssistantMessage) - self.time_spent_in_model_calls += time.time() - start_time self.num_model_calls += 1 @@ -72,14 +91,18 @@ async def call_model( # Return the response as a string for now return response_string - def clear_history(self): + def _clear_history(self): + """ + Empties the message list containing the chat history. + """ self._chat_history = [] async def learn_from_failure( - self, task_description, memory_section, final_response, expected_answer, work_history, insights + self, task_description: str, memory_section: str, final_response: str, expected_answer: str, work_history: str ): - # Try to create an insight to help avoid this failure in the future. - + """ + Tries to create an insight to help avoid the given failure in the future. + """ sys_message = """- You are a patient and thorough teacher. - Your job is to review work done by students and help them learn how to do better.""" @@ -105,7 +128,7 @@ async def learn_from_failure( "# Now carefully review the students' work above, explaining in detail what the students did right and what they did wrong.\n" ) - self.clear_history() + self._clear_history() await self.call_model( summary="Ask the model to learn from this failure", system_message_content=sys_message, @@ -130,9 +153,10 @@ async def learn_from_failure( ) return insight - async def find_index_topics(self, input_string): - # Returns a list of topics related to the input string. - + async def find_index_topics(self, input_string: str) -> List[str]: + """ + Returns a list of topics related to the given string. + """ sys_message = """You are an expert at semantic analysis.""" user_message = [] @@ -147,12 +171,12 @@ async def find_index_topics(self, input_string): user_message.append("# Text to be indexed\n") user_message.append(input_string) - self.clear_history() + self._clear_history() topics = await self.call_model( summary="Ask the model to extract topics", system_message_content=sys_message, user_content=user_message ) - # Parse the topics into a python list. + # Parse the topics into a list. topic_list = [] for line in topics.split("\n"): if (line is not None) and (len(line) > 0): @@ -160,8 +184,10 @@ async def find_index_topics(self, input_string): return topic_list - async def generalize_task(self, task_description): - # Returns a list of topics related to the input string. + async def generalize_task(self, task_description: str) -> str: + """ + Attempts to rewrite a task description in a more general form. + """ sys_message = """You are a helpful and thoughtful assistant.""" @@ -171,7 +197,7 @@ async def generalize_task(self, task_description): user_message.append("\n# Task description") user_message.append(task_description) - self.clear_history() + self._clear_history() await self.call_model( summary="Ask the model to rephrase the task in a list of important points", system_message_content=sys_message, @@ -197,8 +223,10 @@ async def generalize_task(self, task_description): ) return generalized_task - async def validate_insight(self, insight, task_description): - # Determines whether the insight could help solve the task. + async def validate_insight(self, insight: str, task_description: str) -> bool: + """ + Judges whether the insight could help solve the task. + """ sys_message = """You are a helpful and thoughtful assistant.""" @@ -213,7 +241,7 @@ async def validate_insight(self, insight, task_description): user_message.append(task_description) user_message.append("\n# Possibly useful insight") user_message.append(insight) - self.clear_history() + self._clear_history() response = await self.call_model( summary="Ask the model to validate the insight", system_message_content=sys_message, @@ -221,8 +249,10 @@ async def validate_insight(self, insight, task_description): ) return response == "1" - async def extract_task(self, text): - # Returns a task from the given text, or None if none is found. + async def extract_task(self, text: str) -> str: + """ + Returns a task found in the given text, or None if not found. + """ sys_message = """You are a helpful and thoughtful assistant.""" user_message = [ """Does the following text contain a question or a some task we are being asked to perform? @@ -232,14 +262,16 @@ async def extract_task(self, text): ] user_message.append("\n# Text to analyze") user_message.append(text) - self.clear_history() + self._clear_history() response = await self.call_model( summary="Ask the model to extract a task", system_message_content=sys_message, user_content=user_message ) return response if response != "None" else None - async def extract_advice(self, text): - # Returns a task from the given text, or None if none is found. + async def extract_advice(self, text: str) -> str: + """ + Returns advice from the given text, or None if not found. + """ sys_message = """You are a helpful and thoughtful assistant.""" user_message = [ """Does the following text contain any information or advice that might be useful later? @@ -248,7 +280,7 @@ async def extract_advice(self, text): ] user_message.append("\n# Text to analyze") user_message.append(text) - self.clear_history() + self._clear_history() response = await self.call_model( summary="Ask the model to extract advice", system_message_content=sys_message, user_content=user_message ) From 76c16f955c7acf4c717a2bd066dd82fabb47e0e7 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 23 Jan 2025 17:56:22 -0800 Subject: [PATCH 54/93] docstrings etc. --- .../eval_learning_from_demonstration.py | 2 +- .../src/autogen_ext/apprentice/__init__.py | 8 ++-- .../src/autogen_ext/apprentice/_prompter.py | 2 +- .../{_agent_wrapper.py => agent_wrapper.py} | 39 ++++++++++++++----- ...roller.py => agentic_memory_controller.py} | 2 +- .../src/autogen_ext/apprentice/apprentice.py | 4 +- .../apprentice/{_grader.py => grader.py} | 2 +- .../{_page_logger.py => page_logger.py} | 0 8 files changed, 40 insertions(+), 19 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/apprentice/{_agent_wrapper.py => agent_wrapper.py} (75%) rename python/packages/autogen-ext/src/autogen_ext/apprentice/{_agentic_memory_controller.py => agentic_memory_controller.py} (99%) rename python/packages/autogen-ext/src/autogen_ext/apprentice/{_grader.py => grader.py} (99%) rename python/packages/autogen-ext/src/autogen_ext/apprentice/{_page_logger.py => page_logger.py} (100%) diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index 9ffce8c1598d..06b221d0d5a8 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -27,7 +27,7 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg logger=logger, ) success_rate = round((num_successes / num_trials) * 100) - results_str_1 = "Baseline success rate: {}%".format(success_rate) + results_str_1 = "Success rate before demonstration: {}%".format(success_rate) logger.info("\n" + results_str_1) # Provide a demonstration for a similar but different task. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py index a086019e85e5..ee15fd0e08de 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py @@ -1,5 +1,7 @@ -from ._grader import Grader -from ._page_logger import PageLogger +from .grader import Grader +from .page_logger import PageLogger from .apprentice import Apprentice +from .agent_wrapper import AgentWrapper +from .agentic_memory_controller import AgenticMemoryController -__all__ = ["Apprentice", "PageLogger", "Grader"] +__all__ = ["Apprentice", "PageLogger", "Grader", "AgentWrapper", "AgenticMemoryController"] diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py index f36516e14743..6f35add7e64b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py @@ -11,7 +11,7 @@ UserMessage, ) -from ._page_logger import PageLogger +from .page_logger import PageLogger from ._utils import UserContent diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/agent_wrapper.py similarity index 75% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/agent_wrapper.py index 7321d9155abf..b5f8a0c6380a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/agent_wrapper.py @@ -1,9 +1,10 @@ -from typing import Tuple +from typing import Tuple, Dict from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.teams import MagenticOneGroupChat from autogen_agentchat.ui._console import Console from autogen_core.models import ( + ChatCompletionClient, SystemMessage, UserMessage, ) @@ -11,34 +12,50 @@ from autogen_ext.agents.web_surfer import MultimodalWebSurfer from autogen_ext.agents.web_surfer._utils import message_content_to_str +from .page_logger import PageLogger + class AgentWrapper: - def __init__(self, settings, client, logger): + """ + Wraps the base agent to route calls to it appropriately, after instantiating it if necessary. + Users can override this class to add methods for calling other agents. + + Args: + settings: The settings for the agent. + client: The client to call the model. + logger: The logger to log the model calls. + + Methods: + assign_task: Passes the given task to the base agent. + """ + def __init__(self, settings: Dict, client: ChatCompletionClient, logger: PageLogger): self.settings = settings self.client = client self.logger = logger self.base_agent_name = self.settings["base_agent"] - async def assign_task(self, task): + async def assign_task(self, task: str) -> Tuple[str, str]: """ - Assigns a task to the base agent. + Passes the given task to the base agent. """ self.logger.enter_function() # Pass the task through to the base agent. if self.base_agent_name == "MagenticOneGroupChat": - response, work_history = await self.assign_task_to_magentic_one(task) + response, work_history = await self._assign_task_to_magentic_one(task) elif self.base_agent_name == "thin_agent": - response, work_history = await self.assign_task_to_thin_agent(task) + response, work_history = await self._assign_task_to_thin_agent(task) else: raise AssertionError("Invalid base agent") self.logger.leave_function() return response, work_history - async def assign_task_to_thin_agent(self, task): + async def _assign_task_to_thin_agent(self, task: str) -> Tuple[str, str]: + """ + Passes the given task directly to the model client, along with a detailed "think carefully" system prompt. + """ self.logger.enter_function() - self.logger.info(task) system_message_content = """You are a helpful and thoughtful assistant. @@ -74,9 +91,11 @@ async def assign_task_to_thin_agent(self, task): self.logger.leave_function() return response_str, work_history - async def assign_task_to_magentic_one(self, task) -> Tuple[str, str]: + async def _assign_task_to_magentic_one(self, task) -> Tuple[str, str]: + """ + Instantiates a MagenticOneGroupChat team, and passes the given task to it. + """ self.logger.enter_function() - self.logger.info(task) general_agent = AssistantAgent( diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py index d6c6d919aad7..2d0f0399807e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py @@ -1,7 +1,7 @@ from typing import Callable, List from ._agentic_memory_bank import AgenticMemoryBank -from ._grader import Grader +from .grader import Grader from ._prompter import Prompter diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index 0531fdf55555..c21052b67bbe 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -1,5 +1,5 @@ -from ._agent_wrapper import AgentWrapper -from ._agentic_memory_controller import AgenticMemoryController +from .agent_wrapper import AgentWrapper +from .agentic_memory_controller import AgenticMemoryController class Apprentice: diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/grader.py similarity index 99% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/grader.py index b824c98715bb..e82c7eabf82d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/grader.py @@ -9,7 +9,7 @@ UserMessage, ) -from ._page_logger import PageLogger +from .page_logger import PageLogger from ._utils import UserContent diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/page_logger.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_page_logger.py rename to python/packages/autogen-ext/src/autogen_ext/apprentice/page_logger.py From a8cd0d7e8d30702a573cb1ef7168e73ee15d2207 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 24 Jan 2025 12:58:47 -0800 Subject: [PATCH 55/93] docstrings etc. --- .../packages/ame/src/ame/settings/check.yaml | 4 +- .../apprentice/_agentic_memory_bank.py | 9 +- .../apprentice/_string_similarity_map.py | 112 ++++++++---------- .../apprentice/agentic_memory_controller.py | 4 +- .../src/autogen_ext/apprentice/apprentice.py | 2 +- 5 files changed, 58 insertions(+), 73 deletions(-) diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 423947383a2e..dc123e1b44cc 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -2,7 +2,7 @@ Evaluator: PageLogger: enabled: 1 - path: ~/pagelogs/temp12 + path: ~/pagelogs/temp14 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. @@ -27,6 +27,8 @@ fast_learning_agent: max_test_trials: 1 # 1-3 AgenticMemoryBank: path: ~/agentic_memory_bank/temp + StringSimilarityMap: + verbose: 1 AgentWrapper: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index fb97e665b5f2..fb0fe2430ba6 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -4,6 +4,7 @@ from typing import Dict, List, Optional, Union from ._string_similarity_map import StringSimilarityMap +from .page_logger import PageLogger @dataclass @@ -22,13 +23,11 @@ class AgenticMemoryBank: def __init__( self, settings: Dict, - verbosity: Optional[int] = 0, - reset: Optional[bool] = False, - logger=None, + reset: bool, + logger: PageLogger, ): """ Args: - - verbosity (Optional, int): 1 to print memory operations, 0 to omit them. 3+ to print string-pair lists. - reset (Optional, bool): True to clear the DB before starting. Default False - logger (Optional, PageLogger): the PageLogger object to use for logging. """ @@ -41,7 +40,7 @@ def __init__( self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") self.string_map = StringSimilarityMap( - verbosity=verbosity, reset=reset, path_to_db_dir=path_to_db_dir, logger=self.logger + settings=self.settings["StringSimilarityMap"], reset=reset, path_to_db_dir=path_to_db_dir, logger=self.logger ) # Load or create the associated insight dict on disk. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py index e5e4c929c933..d1435a032830 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py @@ -1,44 +1,44 @@ import os import pickle -from typing import Optional, Union +from typing import Optional, Union, Dict, List, Tuple import chromadb from chromadb.config import Settings +from .page_logger import PageLogger + class StringSimilarityMap: """ - Provides string-pair storage and retrieval using a vector database. + Provides storage and similarity-based retrieval of string pairs using a vector database. Each DB entry is a pair of strings: an input string and an output string. The input string is embedded and used as the retrieval key. The output string can be anything, but it's typically used as a dict key. Vector embeddings are currently supplied by Chroma's default Sentence Transformers. - """ - def __init__( - self, - verbosity: Optional[int] = 3, - reset: Optional[bool] = False, - path_to_db_dir: Optional[str] = None, - logger=None, - ): - """ - Args: - - verbosity (Optional, int): 1 to log memory operations, 0 to omit them. 3+ to log string-pair lists. - - reset (Optional, bool): True to clear the DB before starting. Default False. - - path_to_db_dir (Optional, str): path to the directory where the DB is stored. - - logger (Optional, PageLogger): the PageLogger object to use for logging. - """ + Args: + - settings: The settings for the string similarity map. + - reset: True to clear the DB immediately after creation. + - path_to_db_dir: Path to the directory where the DB is stored. + - logger: The PageLogger object to use for logging. + + Methods: + - add_input_output_pair: Adds one input-output string pair to the DB. + - get_related_string_pairs: Retrieves up to n string pairs related to the given query text within the specified distance threshold. + - reset_db: Forces immediate deletion of the DB's contents, in memory and on disk. + - save_string_pairs: Saves the string-pair dict to disk. + """ + def __init__(self, settings: Dict, reset: bool, path_to_db_dir: str, logger: PageLogger) -> None: + self.settings = settings self.logger = logger - self.logger.enter_function() - self.verbosity = verbosity + self.verbose = self.settings["verbose"] self.path_to_db_dir = path_to_db_dir # Load or create the vector DB on disk. - settings = Settings( + chromadb_settings = Settings( anonymized_telemetry=False, allow_reset=True, is_persistent=True, persist_directory=path_to_db_dir ) - self.db_client = chromadb.Client(settings) + self.db_client = chromadb.Client(chromadb_settings) self.vec_db = self.db_client.create_collection("string-pairs", get_or_create=True) # The collection is the DB. # Load or create the associated string-pair dict on disk. @@ -46,87 +46,73 @@ def __init__( self.uid_text_dict = {} self.last_string_pair_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - if self.verbosity >= 1: + if self.verbose: self.logger.info("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) self.logger.info(" Location = {}".format(self.path_to_dict)) with open(self.path_to_dict, "rb") as f: self.uid_text_dict = pickle.load(f) self.last_string_pair_id = len(self.uid_text_dict) - if self.verbosity >= 1: + if self.verbose and len(self.uid_text_dict) > 0: self.logger.info("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) - if self.verbosity >= 3: - self.list_string_pairs() + self._log_string_pairs() # Clear the DB if requested. if reset: self.reset_db() - self.logger.leave_function() - def list_string_pairs(self): - """Prints the string-pair contents.""" + def _log_string_pairs(self) -> None: + """ + Logs all string pairs currently in the map. + """ self.logger.info("LIST OF STRING PAIRS") for uid, text in self.uid_text_dict.items(): input_text, output_text = text self.logger.info(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) - def save_string_pairs_to_text_files(self): - """Saves the contents to text files.""" - self.logger.enter_function() - # Delete all files in mem_text dir. - for file in os.listdir("mem_text"): - os.remove(os.path.join("mem_text", file)) - - if self.verbosity >= 1: - self.logger.info("LIST OF STRING PAIRS") - for uid, text in self.uid_text_dict.items(): - input_text, output_text = text - if self.verbosity >= 1: - self.logger.info( - " ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text) - ) - # Save the input string to a file with the same name as the string-pair ID in the mem_text dir, which is a subdir of the dir containing this file. - with open("mem_text/{}.txt".format(uid), "w") as file: - file.write(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) - self.logger.leave_function() - - def save_string_pairs(self): - """Saves self.uid_text_dict to disk.""" + def save_string_pairs(self) -> None: + """ + Saves the string-pair dict (self.uid_text_dict) to disk. + """ with open(self.path_to_dict, "wb") as file: pickle.dump(self.uid_text_dict, file) - def reset_db(self): - """Forces immediate deletion of the DB's contents, in memory and on disk.""" - self.logger.enter_function() - if self.verbosity >= 1: + def reset_db(self) -> None: + """ + Forces immediate deletion of the DB's contents, in memory and on disk. + """ + if self.verbose: self.logger.info("\nCLEARING STRING-PAIR MAP") self.db_client.delete_collection("string-pairs") self.vec_db = self.db_client.create_collection("string-pairs") self.uid_text_dict = {} self.save_string_pairs() - self.logger.leave_function() - def add_input_output_pair(self, input_text: str, output_text: str): - """Adds an input-output pair to the vector DB.""" + def add_input_output_pair(self, input_text: str, output_text: str) -> None: + """ + Adds one input-output string pair to the DB. + """ self.last_string_pair_id += 1 self.vec_db.add(documents=[input_text], ids=[str(self.last_string_pair_id)]) self.uid_text_dict[str(self.last_string_pair_id)] = input_text, output_text - if self.verbosity >= 1: + if self.verbose: self.logger.info( "\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( self.last_string_pair_id, input_text, output_text ) ) - if self.verbosity >= 3: - self.list_string_pairs() + self._log_string_pairs() - def get_related_string_pairs(self, query_text: str, n_results: int, threshold: Union[int, float]): - """Retrieves STRING PAIRS that are related to the given query text within the specified distance threshold.""" + def get_related_string_pairs(self, query_text: str, n_results: int, threshold: Union[int, float]) -> List[Tuple[str, str, float]]: + """ + Retrieves up to n string pairs that are related to the given query text within the specified distance threshold. + """ if n_results > len(self.uid_text_dict): n_results = len(self.uid_text_dict) if n_results > 0: results = self.vec_db.query(query_texts=[query_text], n_results=n_results) num_results = len(results["ids"][0]) else: + results = [] num_results = 0 string_pairs = [] for i in range(num_results): @@ -134,7 +120,7 @@ def get_related_string_pairs(self, query_text: str, n_results: int, threshold: U if distance < threshold: input_text_2, output_text = self.uid_text_dict[uid] assert input_text == input_text_2 - if self.verbosity >= 1: + if self.verbose: self.logger.info( "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( input_text, output_text, distance diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py index 2d0f0399807e..8f1b67e08ff3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py @@ -14,9 +14,7 @@ def __init__(self, settings, agent, reset, client, logger): self.agent = agent self.client = client self.prompter = Prompter(client, logger) - self.memory_bank = AgenticMemoryBank( - self.settings["AgenticMemoryBank"], verbosity=3, reset=reset, logger=logger - ) + self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], reset=reset, logger=logger) self.grader = Grader(client, logger) self.logger.leave_function() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index c21052b67bbe..c5ead3142fa5 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -17,7 +17,7 @@ def __init__(self, settings, evaluator, client, logger): self.memory_controller = AgenticMemoryController( settings=self.settings["AgenticMemoryController"], agent=self.agent, - reset=False, + reset=True, client=self.client, logger=self.logger, ) From ed7fae1067718744c1768b83914c79e779963706 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 24 Jan 2025 16:02:44 -0800 Subject: [PATCH 56/93] docstrings etc. --- .../ame/src/ame/settings/baseline.yaml | 5 + .../packages/ame/src/ame/settings/check.yaml | 5 +- python/packages/ame/src/ame/settings/m1.yaml | 5 + .../apprentice/_agentic_memory_bank.py | 129 ++++++++++-------- .../apprentice/_string_similarity_map.py | 2 +- .../apprentice/agentic_memory_controller.py | 12 +- 6 files changed, 92 insertions(+), 66 deletions(-) diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index 83b926b6ba12..855ea81f7883 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -27,6 +27,11 @@ fast_learning_agent: max_test_trials: 1 # 1-3 AgenticMemoryBank: path: ~/agentic_memory_bank/temp + relevance_conversion_threshold: 1.7 + n_results: 25 + distance_threshold: 100 + StringSimilarityMap: + verbose: 1 AgentWrapper: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index dc123e1b44cc..8b3729215aa8 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -2,7 +2,7 @@ Evaluator: PageLogger: enabled: 1 - path: ~/pagelogs/temp14 + path: ~/pagelogs/temp15 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. @@ -27,6 +27,9 @@ fast_learning_agent: max_test_trials: 1 # 1-3 AgenticMemoryBank: path: ~/agentic_memory_bank/temp + relevance_conversion_threshold: 1.7 + n_results: 25 + distance_threshold: 100 StringSimilarityMap: verbose: 1 AgentWrapper: diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index c71be8e95a8c..cd2930c6da51 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -23,6 +23,11 @@ fast_learning_agent: max_test_trials: 1 # 1-3 AgenticMemoryBank: path: ~/agentic_memory_bank/m1 + relevance_conversion_threshold: 1.7 + n_results: 25 + distance_threshold: 100 + StringSimilarityMap: + verbose: 1 AgentWrapper: base_agent: MagenticOneGroupChat # MagenticOneGroupChat, thin_agent, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index fb0fe2430ba6..c8215e46a339 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -9,6 +9,9 @@ @dataclass class Insight: + """ + Represents a task-completion insight, which is a string that may help solve a task. + """ id: str insight_str: str task_str: str @@ -18,24 +21,22 @@ class Insight: class AgenticMemoryBank: """ Stores task-completion insights in a vector DB for later retrieval. - """ - def __init__( - self, - settings: Dict, - reset: bool, - logger: PageLogger, - ): - """ - Args: - - reset (Optional, bool): True to clear the DB before starting. Default False - - logger (Optional, PageLogger): the PageLogger object to use for logging. - """ + Args: + - settings: Settings for the memory bank. + - reset: True to clear the DB before starting. + - logger: The PageLogger object to use for logging. + """ + def __init__(self, settings: Dict, reset: bool, logger: PageLogger) -> None: + self.settings = settings self.logger = logger self.logger.enter_function() - self.settings = settings memory_dir_path = os.path.expanduser(self.settings["path"]) + self.relevance_conversion_threshold = self.settings["relevance_conversion_threshold"] + self.n_results = self.settings["n_results"] + self.distance_threshold = self.settings["distance_threshold"] + path_to_db_dir = os.path.join(memory_dir_path, "string_map") self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") @@ -56,58 +57,82 @@ def __init__( # Clear the DB if requested. if reset: - self.reset_insights() + self._reset_insights() self.logger.leave_function() - def reset(self): + def reset(self) -> None: + """ + Forces immediate deletion of all contents, in memory and on disk. + """ self.string_map.reset_db() - self.reset_insights() + self._reset_insights() - def reset_insights(self): - """Forces immediate deletion of the insights, in memory and on disk.""" + def _reset_insights(self) -> None: + """ + Forces immediate deletion of the insights, in memory and on disk. + """ self.uid_insight_dict = {} self.save_insights() - def contains_insights(self): - return len(self.uid_insight_dict) > 0 - - def save_insights(self): + def save_insights(self) -> None: + """ + Saves the current insight structures (possibly empty) to disk. + """ self.string_map.save_string_pairs() with open(self.path_to_dict, "wb") as file: pickle.dump(self.uid_insight_dict, file) - def add_insight(self, insight_str: str, task_str: Optional[str] = None, topics: Optional[List[str]] = None): - """Adds an insight to the memory bank.""" - assert topics is not None, "For now, the topics list must be provided." + def contains_insights(self) -> bool: + """ + Returns True if the memory bank contains any insights. + """ + return len(self.uid_insight_dict) > 0 + + def _map_topics_to_insight(self, topics: List[str], insight_id: str, insight: Insight) -> None: + """ + Adds a mapping in the vec DB from each topic to the insight. + """ + self.logger.enter_function() + self.logger.info("\nINSIGHT\n{}".format(insight.insight_str)) + for topic in topics: + self.logger.info("\n TOPIC = {}".format(topic)) + self.string_map.add_input_output_pair(topic, insight_id) + self.uid_insight_dict[insight_id] = insight + self.logger.leave_function() + + def add_insight(self, insight_str: str, topics: List[str], task_str: Optional[str] = None) -> None: + """ + Adds an insight to the memory bank, given topics related to the insight, and optionally the task. + """ self.last_insight_id += 1 id_str = str(self.last_insight_id) insight = Insight(id=id_str, insight_str=insight_str, task_str=task_str, topics=topics) - for topic in topics: - # Add a mapping in the vec DB from each topic to the insight. - self.string_map.add_input_output_pair(topic, id_str) - self.uid_insight_dict[str(id_str)] = insight - self.save_insights() + self._map_topics_to_insight(topics, id_str, insight) - def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional[List[str]] = None): - """Returns any insights from the memory bank that are relevant to the given task or topics.""" - assert (task_str is not None) or ( - topics is not None - ), "Either the task string or the topics list must be provided." - assert topics is not None, "For now, the topics list is always required, because it won't be generated." - - # Build a dict of insight-relevance pairs. - insight_relevance_dict = {} - relevance_conversion_threshold = ( - 1.7 # The approximate borderline between relevant and irrelevant topic matches. - ) + def add_demonstration(self, task: str, insight: str, topics: List[str]) -> None: + """ + Adds a task-insight pair to the memory bank, to be retrieved together later. + This is useful when the insight is a demonstration of how to solve a given type of task. + """ + self.last_insight_id += 1 + id_str = str(self.last_insight_id) + # Prepend the insight to the task description for context. + insight_str = "Example task:\n\n{}\n\nExample solution:\n\n{}".format(task, insight) + insight = Insight(id=id_str, insight_str=insight_str, task_str=task, topics=topics) + self._map_topics_to_insight(topics, id_str, insight) - # Process the matching topics. + def get_relevant_insights(self, task_topics: List[str]) -> Dict[str, float]: + """ + Returns any insights from the memory bank that appear sufficiently relevant to the given task topics. + """ + # Process the matching topics to build a dict of insight-relevance pairs. matches = [] # Each match is a tuple: (topic, insight, distance) - for topic in topics: - matches.extend(self.string_map.get_related_string_pairs(topic, 25, 100)) + insight_relevance_dict = {} + for topic in task_topics: + matches.extend(self.string_map.get_related_string_pairs(topic, self.n_results, self.distance_threshold)) for match in matches: - relevance = relevance_conversion_threshold - match[2] + relevance = self.relevance_conversion_threshold - match[2] insight_id = match[1] insight_str = self.uid_insight_dict[insight_id].insight_str if insight_str in insight_relevance_dict: @@ -121,15 +146,3 @@ def get_relevant_insights(self, task_str: Optional[str] = None, topics: Optional del insight_relevance_dict[insight] return insight_relevance_dict - - def add_demonstration(self, task: str, demonstration: str, topics: List[str]): - """Adds a task-demonstration pair (as a single insight) to the memory bank.""" - self.last_insight_id += 1 - id_str = str(self.last_insight_id) - insight_str = "Example task:\n\n{}\n\nExample solution:\n\n{}".format(task, demonstration) - insight = Insight(id=id_str, insight_str=insight_str, task_str=task, topics=topics) - for topic in topics: - # Add a mapping in the vec DB from each topic to the insight. - self.string_map.add_input_output_pair(topic, id_str) - self.uid_insight_dict[str(id_str)] = insight - self.save_insights() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py index d1435a032830..8535633bf5e9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py @@ -100,7 +100,7 @@ def add_input_output_pair(self, input_text: str, output_text: str) -> None: self.last_string_pair_id, input_text, output_text ) ) - self._log_string_pairs() + # self._log_string_pairs() # For deep debugging, uncomment to log all string pairs after each addition. def get_related_string_pairs(self, query_text: str, n_results: int, threshold: Union[int, float]) -> List[Tuple[str, str, float]]: """ diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py index 8f1b67e08ff3..9bff1d3d528a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py @@ -108,7 +108,7 @@ async def add_insight_to_memory(self, task: str, insight: str): self.logger.info("") # Add the insight to the memory bank. - self.memory_bank.add_insight(insight, generalized_task, topics) + self.memory_bank.add_insight(insight, topics, generalized_task) self.logger.leave_function() @@ -126,7 +126,7 @@ async def add_insight_without_task_to_memory(self, insight: str): self.logger.info("") # Add the insight to the memory bank. - self.memory_bank.add_insight(insight, None, topics) + self.memory_bank.add_insight(insight, topics, None) self.logger.leave_function() @@ -142,13 +142,13 @@ async def retrieve_relevant_insights(self, task: str): generalized_task = await self.prompter.generalize_task(task) # Get a list of topics from the task. - topics = await self.prompter.find_index_topics(generalized_task) + task_topics = await self.prompter.find_index_topics(generalized_task) self.logger.info("\nTOPICS EXTRACTED FROM TASK:") - self.logger.info("\n".join(topics)) + self.logger.info("\n".join(task_topics)) self.logger.info("") # Retrieve relevant insights from the memory bank. - relevant_insights_and_relevances = self.memory_bank.get_relevant_insights(topics=topics) + relevant_insights_and_relevances = self.memory_bank.get_relevant_insights(task_topics=task_topics) relevant_insights = [] self.logger.info("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) for insight, relevance in relevant_insights_and_relevances.items(): @@ -331,6 +331,6 @@ async def learn_from_demonstration(self, task, demonstration): self.logger.info("") # Add the insight to the memory bank. - self.memory_bank.add_demonstration(task, demonstration, topics) + self.memory_bank.add_demonstration(task=task, insight=demonstration, topics=topics) self.logger.leave_function() From 93de858ac70c31c777e5e9bb13fd1c1fb0d7e3ec Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 24 Jan 2025 17:33:39 -0800 Subject: [PATCH 57/93] docstrings etc. --- .../eval_learning_from_demonstration.py | 4 +- .../apprentice/_agentic_memory_bank.py | 14 +- .../apprentice/agentic_memory_controller.py | 172 ++++++++++-------- .../src/autogen_ext/apprentice/apprentice.py | 8 +- 4 files changed, 110 insertions(+), 88 deletions(-) diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index 06b221d0d5a8..167b86d75c1e 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -12,7 +12,7 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg # Get the actual task and advice strings from their files. task_description_1, expected_answer_1 = evaluator.get_task_description_and_answer_from_file(task_1_file) demo_task, _ = evaluator.get_task_description_and_answer_from_file(task_2_file) - demo = evaluator.get_demo_from_file(demo_2_file) + demo_solution = evaluator.get_demo_from_file(demo_2_file) # Start by clearing memory then running a baseline test. logger.info("To get a baseline, clear memory, then assign the task.") @@ -32,7 +32,7 @@ async def eval_learning_from_demonstration(fast_learner, evaluator, client, logg # Provide a demonstration for a similar but different task. logger.info("Demonstrate a solution to a similar task.") - await fast_learner.learn_from_demonstration(demo_task, demo) + await fast_learner.add_task_solution_pair_to_memory(demo_task, demo_solution) # Now test again to see if the demonstration (retrieved from memory) helps. logger.info("Assign the task again to see if the demonstration helps.") diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py index c8215e46a339..bba22a828912 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py @@ -26,6 +26,14 @@ class AgenticMemoryBank: - settings: Settings for the memory bank. - reset: True to clear the DB before starting. - logger: The PageLogger object to use for logging. + + Methods: + - reset: Forces immediate deletion of all contents, in memory and on disk. + - save_insights: Saves the current insight structures (possibly empty) to disk. + - contains_insights: Returns True if the memory bank contains any insights. + - add_insight: Adds an insight to the memory bank, given topics related to the insight, and optionally the task. + - add_task_with_solution: Adds a task-insight pair to the memory bank, to be retrieved together later. + - get_relevant_insights: Returns any insights from the memory bank that appear sufficiently relevant to the given """ def __init__(self, settings: Dict, reset: bool, logger: PageLogger) -> None: self.settings = settings @@ -110,15 +118,15 @@ def add_insight(self, insight_str: str, topics: List[str], task_str: Optional[st insight = Insight(id=id_str, insight_str=insight_str, task_str=task_str, topics=topics) self._map_topics_to_insight(topics, id_str, insight) - def add_demonstration(self, task: str, insight: str, topics: List[str]) -> None: + def add_task_with_solution(self, task: str, solution: str, topics: List[str]) -> None: """ - Adds a task-insight pair to the memory bank, to be retrieved together later. + Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. This is useful when the insight is a demonstration of how to solve a given type of task. """ self.last_insight_id += 1 id_str = str(self.last_insight_id) # Prepend the insight to the task description for context. - insight_str = "Example task:\n\n{}\n\nExample solution:\n\n{}".format(task, insight) + insight_str = "Example task:\n\n{}\n\nExample solution:\n\n{}".format(task, solution) insight = Insight(id=id_str, insight_str=insight_str, task_str=task, topics=topics) self._map_topics_to_insight(topics, id_str, insight) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py index 9bff1d3d528a..6f4eac6870d0 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py @@ -1,38 +1,58 @@ -from typing import Callable, List +from typing import Callable, Dict, List, Optional, Union, Tuple +from autogen_core.models import ( + ChatCompletionClient, +) from ._agentic_memory_bank import AgenticMemoryBank from .grader import Grader from ._prompter import Prompter +from .agent_wrapper import AgentWrapper +from .page_logger import PageLogger class AgenticMemoryController: - def __init__(self, settings, agent, reset, client, logger): + """ + Manages memory-based learning, testing, and the flow of information to and from the memory bank. + + Args: + settings: Settings for the memory controller. + agent: The agent to use for task completion. + reset: True to clear the memory bank before starting. + client: The client to call the model. + logger: The logger to log the model calls. + + Methods: + reset_memory: Resets the memory bank. + train_on_task: Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. + test_on_task: Assigns a task to the completion agent, along with any relevant insights retrieved from memory. + add_insight_to_memory: Adds one insight to the memory bank, using the task (if provided) as context. + add_task_solution_pair_to_memory: Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. + retrieve_relevant_insights: Retrieve any insights from the DB that seem relevant to the task. + assign_task: Assigns a task to the agent, along with any relevant insights/memories. + handle_user_message: Handles a user message, extracting any advice and assigning a task to the agent. + """ + def __init__(self, settings: Dict, agent: AgentWrapper, reset: bool, client: ChatCompletionClient, logger: PageLogger) -> None: self.logger = logger self.logger.enter_function() - self.settings = settings self.agent = agent self.client = client self.prompter = Prompter(client, logger) self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], reset=reset, logger=logger) self.grader = Grader(client, logger) - self.logger.leave_function() - def reset_memory(self): + def reset_memory(self) -> None: + """ + Resets the memory bank. + """ self.memory_bank.reset() - async def train_on_task( - self, - task: str, # The task to be completed. - expected_answer: str, # The expected answer to the task. - ): + async def train_on_task(self, task: str, expected_answer: str) -> None: """ Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ self.logger.enter_function() - - # Attempt to create useful new memories. self.logger.info("Iterate on the task, possibly discovering a useful new insight.\n") _, insight = await self._iterate_on_task( task, expected_answer, self.settings["max_train_trials"], self.settings["max_test_trials"] @@ -41,17 +61,14 @@ async def train_on_task( self.logger.info("No useful insight was discovered.\n") else: self.logger.info("A new insight was created:\n{}".format(insight)) - # Add this insight to memory. - await self.add_insight_to_memory(task, insight) - + await self.add_insight_to_memory(insight, task) self.logger.leave_function() - async def test_on_task(self, task: str, expected_answer: str, num_trials=1): + async def test_on_task(self, task: str, expected_answer: str, num_trials=1) -> Tuple[str, int, int]: """ - Assigns a task to the completion agent, along with any relevant insights/memories. + Assigns a task to the completion agent, along with any relevant insights retrieved from memory. """ self.logger.enter_function() - response = None num_successes = 0 @@ -63,7 +80,7 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1): filtered_insights = await self.retrieve_relevant_insights(task) if len(filtered_insights) > 0: self.logger.info("Relevant insights were retrieved from memory.\n") - memory_section = self.format_memory_section(filtered_insights) + memory_section = self._format_memory_section(filtered_insights) if len(memory_section) > 0: task_plus_insights = task + "\n\n" + memory_section @@ -71,6 +88,7 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1): self.logger.info("Try to solve the task.\n") response, _ = await self.agent.assign_task(task_plus_insights) + # Check if the response is correct. response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer ) @@ -83,65 +101,75 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1): # Calculate the success rate as a percentage, rounded to the nearest whole number. self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) - self.logger.leave_function() return response, num_successes, num_trials - async def add_insight_to_memory(self, task: str, insight: str): - # Adds an insight to the DB. + async def add_insight_to_memory(self, insight: str, task: None | str = None) -> None: + """ + Adds one insight to the memory bank, using the task (if provided) as context. + """ self.logger.enter_function() - self.logger.info("\nGIVEN TASK:") - self.logger.info(task) + generalized_task = None + if task is not None: + self.logger.info("\nGIVEN TASK:") + self.logger.info(task) + # Generalize the task. + generalized_task = await self.prompter.generalize_task(task) self.logger.info("\nGIVEN INSIGHT:") self.logger.info(insight) - # Generalize the task. - generalized_task = await self.prompter.generalize_task(task) - - # Get a combined list of topics from the task and insight. - task_plus_insight = generalized_task.strip() + "\n(Hint: " + insight + ")" + # Get a list of topics from the insight and the task (if provided). + if task is None: + task_plus_insight = insight + self.logger.info("\nTOPICS EXTRACTED FROM INSIGHT:") + else: + task_plus_insight = generalized_task.strip() + "\n(Hint: " + insight + ")" + self.logger.info("\nTOPICS EXTRACTED FROM TASK AND INSIGHT COMBINED:") topics = await self.prompter.find_index_topics(task_plus_insight) - self.logger.info("\nTOPICS EXTRACTED FROM TASK AND INSIGHT COMBINED:") self.logger.info("\n".join(topics)) self.logger.info("") # Add the insight to the memory bank. self.memory_bank.add_insight(insight, topics, generalized_task) - self.logger.leave_function() - async def add_insight_without_task_to_memory(self, insight: str): - # Adds an insight to the DB. + async def add_task_solution_pair_to_memory(self, task, solution) -> None: + """ + Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. + This is useful when the insight is a demonstration of how to solve a given type of task. + """ self.logger.enter_function() - self.logger.info("\nGIVEN INSIGHT:") - self.logger.info(insight) + self.logger.info("\nEXAMPLE TASK:") + self.logger.info(task) - # Get a list of topics from the insight. - topics = await self.prompter.find_index_topics(insight) - self.logger.info("\nTOPICS EXTRACTED FROM INSIGHT:") + self.logger.info("\nEXAMPLE SOLUTION:") + self.logger.info(solution) + + # Get a list of topics from the task. + topics = await self.prompter.find_index_topics(task.strip()) + self.logger.info("\nTOPICS EXTRACTED FROM TASK:") self.logger.info("\n".join(topics)) self.logger.info("") - # Add the insight to the memory bank. - self.memory_bank.add_insight(insight, topics, None) - + # Add the task and solution (as a combined insight) to the memory bank. + self.memory_bank.add_task_with_solution(task=task, solution=solution, topics=topics) self.logger.leave_function() - async def retrieve_relevant_insights(self, task: str): - # Retrieve insights from the DB that are relevant to the task. + async def retrieve_relevant_insights(self, task: str) -> List[str]: + """ + Retrieve any insights from the DB that seem relevant to the task. + """ self.logger.enter_function() if self.memory_bank.contains_insights(): self.logger.info("\nCURRENT TASK:") self.logger.info(task) - # Generalize the task. + # Get a list of topics from the generalized task. generalized_task = await self.prompter.generalize_task(task) - - # Get a list of topics from the task. task_topics = await self.prompter.find_index_topics(generalized_task) self.logger.info("\nTOPICS EXTRACTED FROM TASK:") self.logger.info("\n".join(task_topics)) @@ -171,7 +199,10 @@ async def retrieve_relevant_insights(self, task: str): self.logger.leave_function() return validated_insights - def format_memory_section(self, memories): + def _format_memory_section(self, memories) -> str: + """ + Formats a list of memories as a section for appending to a task description. + """ memory_section = "" if len(memories) > 0: memory_section = "## Important insights that may help solve tasks like this\n" @@ -179,12 +210,11 @@ def format_memory_section(self, memories): memory_section += "- " + mem + "\n" return memory_section - async def _test_for_failure(self, task: str, task_plus_insights: str, expected_answer: str, num_trials: int): + async def _test_for_failure(self, task: str, task_plus_insights: str, expected_answer: str, num_trials: int) -> Tuple[bool, str, str]: """ Attempts to solve the given task multiple times to find a failure case to learn from. """ self.logger.enter_function() - self.logger.info("\nTask description, including any insights: {}".format(task_plus_insights)) self.logger.info("\nExpected answer: {}\n".format(expected_answer)) @@ -212,9 +242,11 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a self.logger.leave_function() return failure_found, response, work_history - async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int): + async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int) -> Tuple[str, None | str]: + """ + Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. + """ self.logger.enter_function() - self.logger.info("\nTask description: {}".format(task)) self.logger.info("\nExpected answer: {}\n".format(expected_answer)) @@ -228,14 +260,13 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria # Loop until success (or timeout) while learning from failures. for trial in range(1, max_train_trials + 1): self.logger.info("\n----- TRAIN TRIAL {} -----\n".format(trial)) - task_plus_insights = task # Add any new insights we've accumulated so far. if last_insight is not None: - memory_section = self.format_memory_section(old_insights + [last_insight]) + memory_section = self._format_memory_section(old_insights + [last_insight]) else: - memory_section = self.format_memory_section(old_insights) + memory_section = self._format_memory_section(old_insights) if len(memory_section) > 0: task_plus_insights += "\n\n" + memory_section @@ -275,7 +306,7 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria self.logger.leave_function() return final_response, successful_insight - async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): + async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True) -> str: """ Assigns a task to the agent, along with any relevant insights/memories. """ @@ -286,9 +317,9 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo filtered_insights = await self.retrieve_relevant_insights(task) if len(filtered_insights) > 0: self.logger.info("Relevant insights were retrieved from memory.\n") - memory_section = self.format_memory_section(filtered_insights) + memory_section = self._format_memory_section(filtered_insights) task = task + "\n\n" + memory_section - # if len(memory_section) > 0: # Best to include this condition, but it will require new recordings. + # if len(memory_section) > 0: # Best to include this condition at some point, with new recordings. # task = task + '\n\n' + memory_section # Attempt to solve the task. @@ -301,36 +332,19 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo self.logger.leave_function() return response - async def handle_user_message(self, text, should_await=True): + async def handle_user_message(self, text: str, should_await: bool = True) -> str: + """ + Handles a user message, extracting any advice and assigning a task to the agent. + """ self.logger.enter_function() advice = await self.prompter.extract_advice(text) self.logger.info("Advice: {}".format(advice)) if advice is not None: - await self.add_insight_without_task_to_memory(advice) + await self.add_insight_to_memory(insight=advice) response = await self.assign_task(text, use_memory=(advice is None), should_await=should_await) self.logger.leave_function() return response - - async def learn_from_demonstration(self, task, demonstration): - self.logger.enter_function() - - self.logger.info("\nEXAMPLE TASK:") - self.logger.info(task) - - self.logger.info("\nEXAMPLE DEMONSTRATION:") - self.logger.info(demonstration) - - # Get a list of topics from the task. - topics = await self.prompter.find_index_topics(task.strip()) - self.logger.info("\nTOPICS EXTRACTED FROM TASK:") - self.logger.info("\n".join(topics)) - self.logger.info("") - - # Add the insight to the memory bank. - self.memory_bank.add_demonstration(task=task, insight=demonstration, topics=topics) - - self.logger.leave_function() diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index c5ead3142fa5..e017b6cec53b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -36,12 +36,12 @@ async def handle_user_message(self, text, should_await=True): self.logger.leave_function() return response - async def learn_from_demonstration(self, task, demonstration): - """A foreground operation, assuming that the task and demonstration are already known.""" + async def add_task_solution_pair_to_memory(self, task, solution): + """A foreground operation, assuming that the task and a solution are already known.""" self.logger.enter_function() - # Pass the task and demonstration through to the memory controller. - await self.memory_controller.learn_from_demonstration(task, demonstration) + # Pass the task and solution through to the memory controller. + await self.memory_controller.add_task_solution_pair_to_memory(task, solution) self.logger.leave_function() From 1a309f9954f77edd358682b7914bd15dc39d7fdb Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 24 Jan 2025 17:49:10 -0800 Subject: [PATCH 58/93] docstrings etc. --- .../src/autogen_ext/apprentice/apprentice.py | 44 +++++++++++++++---- 1 file changed, 35 insertions(+), 9 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py index e017b6cec53b..a8c663f887d3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py @@ -3,7 +3,23 @@ class Apprentice: - def __init__(self, settings, evaluator, client, logger): + """ + Wraps the combination of agentic memory and a base agent. + + Args: + settings: The settings for the apprentice. + evaluator: The evaluator to use for training. + client: The client to call the model. + logger: The logger to log the model calls. + + Methods: + reset_memory: Resets the memory bank. + assign_task: Assigns a task to the agent, along with any relevant insights/memories. + handle_user_message: Handles a user message, extracting any advice and assigning a task to the agent. + add_task_solution_pair_to_memory: Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. + train_on_task: Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. + """ + def __init__(self, settings, evaluator, client, logger) -> None: self.settings = settings self.evaluator = evaluator self.client = client @@ -22,12 +38,17 @@ def __init__(self, settings, evaluator, client, logger): logger=self.logger, ) - def reset_memory(self): + def reset_memory(self) -> None: + """ + Resets the memory bank. + """ if self.memory_controller is not None: self.memory_controller.reset_memory() - async def handle_user_message(self, text, should_await=True): - """A foreground operation, intended for immediate response to the user.""" + async def handle_user_message(self, text: str, should_await: bool = True) -> str: + """ + Handles a user message, extracting any advice and assigning a task to the agent. + """ self.logger.enter_function() # Pass the user message through to the memory controller. @@ -36,8 +57,11 @@ async def handle_user_message(self, text, should_await=True): self.logger.leave_function() return response - async def add_task_solution_pair_to_memory(self, task, solution): - """A foreground operation, assuming that the task and a solution are already known.""" + async def add_task_solution_pair_to_memory(self, task, solution) -> None: + """ + Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. + This is useful when the insight is a demonstration of how to solve a given type of task. + """ self.logger.enter_function() # Pass the task and solution through to the memory controller. @@ -45,7 +69,7 @@ async def add_task_solution_pair_to_memory(self, task, solution): self.logger.leave_function() - async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True): + async def assign_task(self, task: str, use_memory: bool = True, should_await: bool = True) -> str: """ Assigns a task to the agent, along with any relevant insights/memories. """ @@ -57,8 +81,10 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo self.logger.leave_function() return response - async def train_on_task(self, task, expected_answer): - """A background operation, not intended for immediate response.""" + async def train_on_task(self, task: str, expected_answer: str) -> None: + """ + Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. + """ self.logger.enter_function() # Pass the task through to the memory controller. From 8993aa1fd4c54b828d9c8cf2c4c07eaacb75d24b Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 24 Jan 2025 18:09:52 -0800 Subject: [PATCH 59/93] docstrings etc. --- .../eval_learning_from_demonstration.py | 16 +++++++++++++-- .../ame/eval_functions/eval_self_teaching.py | 16 +++++++++++++-- .../ame/eval_functions/eval_teachability.py | 15 +++++++++++--- .../eval_functions/eval_without_learning.py | 20 ++++++++++++++++--- 4 files changed, 57 insertions(+), 10 deletions(-) diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index 167b86d75c1e..f2fb1cf07a5e 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -1,5 +1,17 @@ -async def eval_learning_from_demonstration(fast_learner, evaluator, client, logger, settings, run_dict): - """An evaluation""" +from typing import Dict + +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from ..eval import Evaluator + + +async def eval_learning_from_demonstration(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, + logger: PageLogger, settings: Dict, run_dict: Dict) -> str: + """ + Evaluates the ability to learn quickly from demonstrations. + """ logger.enter_function() num_trials = settings["num_trials"] diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index 5bd0213e5389..f01af3b34197 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -1,5 +1,17 @@ -async def eval_self_teaching(fast_learner, evaluator, client, logger, settings, run_dict): - """An evaluation""" +from typing import Dict + +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from ..eval import Evaluator + + +async def eval_self_teaching(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, + logger: PageLogger, settings: Dict, run_dict: Dict) -> str: + """ + Evaluates the ability of an agent to learn quickly from its own trial and error. + """ logger.enter_function() num_loops = settings["num_loops"] diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index 8fda2cd3d49c..ff3046e27355 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -1,8 +1,17 @@ -from autogen_ext.apprentice import Grader +from typing import Dict +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from ..eval import Evaluator -async def eval_teachability(fast_learner, evaluator, client, logger, settings, run_dict): - """An evaluation""" + +async def eval_teachability(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, + logger: PageLogger, settings: Dict, run_dict: Dict) -> str: + """ + Evalutes the ability to learn quickly from user teachings, hints, and advice. + """ logger.enter_function() # This eval function needs 2 data strings for each run. diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index f1cf9095039d..c36de0e2f455 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -1,5 +1,17 @@ -async def eval_without_learning(fast_learner, evaluator, client, logger, settings, run_dict): - """An evaluation""" +from typing import Dict + +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from ..eval import Evaluator + + +async def eval_without_learning(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, + logger: PageLogger, settings: Dict, run_dict: Dict) -> str: + """ + Performs an evaluation without the benefit of memory. + """ logger.enter_function() num_trials = settings["num_trials"] @@ -21,6 +33,8 @@ async def eval_without_learning(fast_learner, evaluator, client, logger, setting logger=logger, ) success_rate = round((num_successes / num_trials) * 100) - logger.info("\nSuccess rate: {}%\n".format(success_rate)) + results_str = "Success rate: {}%".format(success_rate) + logger.info("\n" + results_str) logger.leave_function() + return "\neval_without_learning\n" + results_str From fa60d5a42073c47d6d6f721df74a34f231d54424 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sat, 25 Jan 2025 12:46:14 -0800 Subject: [PATCH 60/93] Simplify naming --- .../ame/src/ame/clients/_client_wrapper.py | 2 +- python/packages/ame/src/ame/eval.py | 36 ++++++------------- .../eval_learning_from_demonstration.py | 16 ++++----- .../ame/eval_functions/eval_self_teaching.py | 16 ++++----- .../ame/eval_functions/eval_teachability.py | 12 +++---- .../eval_functions/eval_without_learning.py | 10 +++--- .../ame/src/ame/settings/baseline.yaml | 4 +-- .../packages/ame/src/ame/settings/check.yaml | 6 ++-- python/packages/ame/src/ame/settings/m1.yaml | 4 +-- .../__init__.py | 0 .../_agentic_memory_bank.py | 0 .../_prompter.py | 0 .../_string_similarity_map.py | 0 .../{apprentice => agentic_memory}/_utils.py | 0 .../agent_wrapper.py | 0 .../agentic_memory_controller.py | 0 .../apprentice.py | 0 .../{apprentice => agentic_memory}/grader.py | 0 .../page_logger.py | 0 19 files changed, 42 insertions(+), 64 deletions(-) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/__init__.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/_agentic_memory_bank.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/_prompter.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/_string_similarity_map.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/_utils.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/agent_wrapper.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/agentic_memory_controller.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/apprentice.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/grader.py (100%) rename python/packages/autogen-ext/src/autogen_ext/{apprentice => agentic_memory}/page_logger.py (100%) diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py index 3e582b6a81c2..049059c2013d 100644 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ b/python/packages/ame/src/ame/clients/_client_wrapper.py @@ -9,7 +9,7 @@ RequestUsage, ) from autogen_core.tools import Tool, ToolSchema -from autogen_ext.apprentice import PageLogger +from autogen_ext.agentic_memory import PageLogger from autogen_ext.models.openai import AzureOpenAIChatCompletionClient diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 02e4ee57a159..1a460b62a730 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -5,7 +5,7 @@ from typing import Tuple import yaml -from autogen_ext.apprentice import Grader, PageLogger +from autogen_ext.agentic_memory import Grader, PageLogger, Apprentice from ame.clients._client_creator import ClientCreator @@ -38,12 +38,12 @@ def get_demo_from_file(self, demo_filename): demo_dict = yaml.load(file, Loader=yaml.FullLoader) return demo_dict["demo"] - async def test_fast_learner( - self, fast_learner, task_description, expected_answer, num_trials, use_memory, client, logger + async def test_apprentice( + self, apprentice, task_description, expected_answer, num_trials, use_memory, client, logger ) -> Tuple[int, int]: logger.enter_function() - self.logger.info("Testing the fast learner on the given task.\n") + self.logger.info("Testing the apprentice on the given task.\n") grader = Grader(client, logger) num_successes = 0 @@ -51,7 +51,7 @@ async def test_fast_learner( for trial in range(num_trials): self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) self.logger.info("Try to solve the task.\n") - response = await fast_learner.assign_task(task_description, use_memory=use_memory) + response = await apprentice.assign_task(task_description, use_memory=use_memory) response_is_correct, extracted_answer = await grader.is_response_correct( task_description, response, expected_answer ) @@ -69,29 +69,13 @@ async def test_fast_learner( async def perform_evaluations(self, settings): self.logger.enter_function() - # Create the client, passed to both the fast_learner and the evaluator. + # Create the client, which is passed to both the apprentice and the evaluator. client_creator = ClientCreator(settings=settings["client"], logger=self.logger) client = client_creator.create_client() - # Create the specified fast_learner implementation. - fast_learner_settings = settings["fast_learning_agent"] - module_path = fast_learner_settings["module_path"] - try: - module = importlib.import_module(module_path) - except ModuleNotFoundError: - print("Failed to import {}".format(module_path)) - raise - class_name = fast_learner_settings["class_name"] - try: - fast_learner_class = getattr(module, class_name) - except AttributeError: - print("Failed to import {}.{}".format(module_path, class_name)) - raise - try: - fast_learner = fast_learner_class(fast_learner_settings, self, client, self.logger) - except Exception as err: - print('Error creating "{}": {}'.format(fast_learner_class, err)) - raise + # Create the apprentice. + apprentice_settings = settings["Apprentice"] + apprentice = Apprentice(apprentice_settings, self, client, self.logger) # Execute each evaluation. for evaluation_settings in settings["evaluations"]: @@ -112,7 +96,7 @@ async def perform_evaluations(self, settings): # Call the eval function for each listed run. for run_dict in evaluation_settings["runs"]: - results = await eval_function(fast_learner, self, client, self.logger, function_settings, run_dict) + results = await eval_function(apprentice, self, client, self.logger, function_settings, run_dict) print(results) if hasattr(client, "finalize"): diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index f2fb1cf07a5e..06ada974edb9 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -3,11 +3,11 @@ from autogen_core.models import ( ChatCompletionClient, ) -from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger from ..eval import Evaluator -async def eval_learning_from_demonstration(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_learning_from_demonstration(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Evaluates the ability to learn quickly from demonstrations. @@ -28,9 +28,9 @@ async def eval_learning_from_demonstration(fast_learner: Apprentice, evaluator: # Start by clearing memory then running a baseline test. logger.info("To get a baseline, clear memory, then assign the task.") - fast_learner.reset_memory() - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, + apprentice.reset_memory() + num_successes, num_trials = await evaluator.test_apprentice( + apprentice=apprentice, task_description=task_description_1, expected_answer=expected_answer_1, num_trials=num_trials, @@ -44,12 +44,12 @@ async def eval_learning_from_demonstration(fast_learner: Apprentice, evaluator: # Provide a demonstration for a similar but different task. logger.info("Demonstrate a solution to a similar task.") - await fast_learner.add_task_solution_pair_to_memory(demo_task, demo_solution) + await apprentice.add_task_solution_pair_to_memory(demo_task, demo_solution) # Now test again to see if the demonstration (retrieved from memory) helps. logger.info("Assign the task again to see if the demonstration helps.") - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, + num_successes, num_trials = await evaluator.test_apprentice( + apprentice=apprentice, task_description=task_description_1, expected_answer=expected_answer_1, num_trials=num_trials, diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index f01af3b34197..c0435f01f872 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -3,11 +3,11 @@ from autogen_core.models import ( ChatCompletionClient, ) -from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger from ..eval import Evaluator -async def eval_self_teaching(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_self_teaching(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Evaluates the ability of an agent to learn quickly from its own trial and error. @@ -26,18 +26,18 @@ async def eval_self_teaching(fast_learner: Apprentice, evaluator: Evaluator, cli task_description_2, expected_answer_2 = evaluator.get_task_description_and_answer_from_file(task_file_2) # Start the test with empty memory. - fast_learner.reset_memory() + apprentice.reset_memory() total_num_successes_1 = 0 total_num_successes_2 = 0 total_num_trials = 0 for i in range(num_loops): # Train on the first task. - await fast_learner.train_on_task(task=task_description_1, expected_answer=expected_answer_1) + await apprentice.train_on_task(task=task_description_1, expected_answer=expected_answer_1) # Test on the first task. - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, + num_successes, num_trials = await evaluator.test_apprentice( + apprentice=apprentice, task_description=task_description_1, expected_answer=expected_answer_1, num_trials=num_final_test_trials, @@ -49,8 +49,8 @@ async def eval_self_teaching(fast_learner: Apprentice, evaluator: Evaluator, cli total_num_successes_1 += num_successes # Test on the second task. - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, + num_successes, num_trials = await evaluator.test_apprentice( + apprentice=apprentice, task_description=task_description_2, expected_answer=expected_answer_2, num_trials=num_final_test_trials, diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index ff3046e27355..013aa51b4dac 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -3,11 +3,11 @@ from autogen_core.models import ( ChatCompletionClient, ) -from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger from ..eval import Evaluator -async def eval_teachability(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_teachability(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Evalutes the ability to learn quickly from user teachings, hints, and advice. @@ -23,9 +23,9 @@ async def eval_teachability(fast_learner: Apprentice, evaluator: Evaluator, clie advice = evaluator.get_advice_from_file(advice_file) # First test without memory. - fast_learner.reset_memory() + apprentice.reset_memory() logger.info("\nClear memory, then ask the question.") - response = await fast_learner.handle_user_message(task_description) + response = await apprentice.handle_user_message(task_description) # Check the response. grader = Grader(client, logger) @@ -41,11 +41,11 @@ async def eval_teachability(fast_learner: Apprentice, evaluator: Evaluator, clie # Give advice that should help solve this task. logger.info("Give the advice.") - await fast_learner.handle_user_message(advice) + await apprentice.handle_user_message(advice) # Now ask the question again to see if the advice helps. logger.info("\nAsk the question again to see if the advice helps.") - response = await fast_learner.handle_user_message(task_description) + response = await apprentice.handle_user_message(task_description) # Check the response. response_is_correct, extracted_answer = await grader.is_response_correct( diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index c36de0e2f455..759a0aede703 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -3,11 +3,11 @@ from autogen_core.models import ( ChatCompletionClient, ) -from autogen_ext.apprentice import Apprentice, Grader, PageLogger +from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger from ..eval import Evaluator -async def eval_without_learning(fast_learner: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_without_learning(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Performs an evaluation without the benefit of memory. @@ -22,9 +22,9 @@ async def eval_without_learning(fast_learner: Apprentice, evaluator: Evaluator, # Clear memory then run a baseline test. logger.info("To get a baseline, clear memory, then assign the task.") - fast_learner.reset_memory() - num_successes, num_trials = await evaluator.test_fast_learner( - fast_learner=fast_learner, + apprentice.reset_memory() + num_successes, num_trials = await evaluator.test_apprentice( + apprentice=apprentice, task_description=task_description, expected_answer=expected_answer, num_trials=num_trials, diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index 855ea81f7883..fec69a6b588b 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -19,9 +19,7 @@ client: mode: check-replay # pass-through, record, or check-replay session_name: short-3 -fast_learning_agent: - class_name: Apprentice - module_path: autogen_ext.apprentice +Apprentice: AgenticMemoryController: max_train_trials: 2 # 2-10 max_test_trials: 1 # 1-3 diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 8b3729215aa8..1c66f9fedd8c 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -2,7 +2,7 @@ Evaluator: PageLogger: enabled: 1 - path: ~/pagelogs/temp15 + path: ~/pagelogs/temp16 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. @@ -19,9 +19,7 @@ client: mode: check-replay # pass-through, record, or check-replay session_name: short-3 -fast_learning_agent: - class_name: Apprentice - module_path: autogen_ext.apprentice +Apprentice: AgenticMemoryController: max_train_trials: 2 # 2-10 max_test_trials: 1 # 1-3 diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index cd2930c6da51..487decc2de06 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -15,9 +15,7 @@ client: top_p: 1.0 max_retries: 65535 -fast_learning_agent: - class_name: Apprentice - module_path: autogen_ext.apprentice +Apprentice: AgenticMemoryController: max_train_trials: 2 # 2-10 max_test_trials: 1 # 1-3 diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/__init__.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_agentic_memory_bank.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_prompter.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_string_similarity_map.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/_utils.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/agent_wrapper.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/agentic_memory_controller.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/apprentice.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/grader.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py diff --git a/python/packages/autogen-ext/src/autogen_ext/apprentice/page_logger.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py similarity index 100% rename from python/packages/autogen-ext/src/autogen_ext/apprentice/page_logger.py rename to python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py From 882d578ed5e1da989f2aefc9f5b5f9b1b3f9cf30 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sun, 26 Jan 2025 15:33:27 -0800 Subject: [PATCH 61/93] Simplify tests --- .../advice/add_topic.yaml | 0 .../demos/cell_towers_2_demo.yaml | 0 .../tasks/100_vampires.yaml | 0 .../tasks/10_liars.yaml | 0 .../tasks/3_to_third.yaml | 0 .../tasks/autogen_package.yaml | 0 .../tasks/cell_towers_1.yaml | 0 .../tasks/cell_towers_2.yaml | 0 python/packages/ame/src/ame/eval.py | 56 +------------------ .../eval_learning_from_demonstration.py | 38 +++++++------ .../ame/eval_functions/eval_self_teaching.py | 27 +++++---- .../ame/eval_functions/eval_teachability.py | 20 ++++--- .../eval_functions/eval_without_learning.py | 16 ++++-- .../ame/src/ame/settings/baseline.yaml | 2 +- .../packages/ame/src/ame/settings/check.yaml | 14 ++--- python/packages/ame/src/ame/settings/m1.yaml | 2 +- .../src/autogen_ext/agentic_memory/_utils.py | 2 +- .../src/autogen_ext/agentic_memory/grader.py | 31 +++++++++- 18 files changed, 100 insertions(+), 108 deletions(-) rename python/packages/ame/src/ame/{task_data => data_files}/advice/add_topic.yaml (100%) rename python/packages/ame/src/ame/{task_data => data_files}/demos/cell_towers_2_demo.yaml (100%) rename python/packages/ame/src/ame/{task_data => data_files}/tasks/100_vampires.yaml (100%) rename python/packages/ame/src/ame/{task_data => data_files}/tasks/10_liars.yaml (100%) rename python/packages/ame/src/ame/{task_data => data_files}/tasks/3_to_third.yaml (100%) rename python/packages/ame/src/ame/{task_data => data_files}/tasks/autogen_package.yaml (100%) rename python/packages/ame/src/ame/{task_data => data_files}/tasks/cell_towers_1.yaml (100%) rename python/packages/ame/src/ame/{task_data => data_files}/tasks/cell_towers_2.yaml (100%) diff --git a/python/packages/ame/src/ame/task_data/advice/add_topic.yaml b/python/packages/ame/src/ame/data_files/advice/add_topic.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/advice/add_topic.yaml rename to python/packages/ame/src/ame/data_files/advice/add_topic.yaml diff --git a/python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml b/python/packages/ame/src/ame/data_files/demos/cell_towers_2_demo.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/demos/cell_towers_2_demo.yaml rename to python/packages/ame/src/ame/data_files/demos/cell_towers_2_demo.yaml diff --git a/python/packages/ame/src/ame/task_data/tasks/100_vampires.yaml b/python/packages/ame/src/ame/data_files/tasks/100_vampires.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/tasks/100_vampires.yaml rename to python/packages/ame/src/ame/data_files/tasks/100_vampires.yaml diff --git a/python/packages/ame/src/ame/task_data/tasks/10_liars.yaml b/python/packages/ame/src/ame/data_files/tasks/10_liars.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/tasks/10_liars.yaml rename to python/packages/ame/src/ame/data_files/tasks/10_liars.yaml diff --git a/python/packages/ame/src/ame/task_data/tasks/3_to_third.yaml b/python/packages/ame/src/ame/data_files/tasks/3_to_third.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/tasks/3_to_third.yaml rename to python/packages/ame/src/ame/data_files/tasks/3_to_third.yaml diff --git a/python/packages/ame/src/ame/task_data/tasks/autogen_package.yaml b/python/packages/ame/src/ame/data_files/tasks/autogen_package.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/tasks/autogen_package.yaml rename to python/packages/ame/src/ame/data_files/tasks/autogen_package.yaml diff --git a/python/packages/ame/src/ame/task_data/tasks/cell_towers_1.yaml b/python/packages/ame/src/ame/data_files/tasks/cell_towers_1.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/tasks/cell_towers_1.yaml rename to python/packages/ame/src/ame/data_files/tasks/cell_towers_1.yaml diff --git a/python/packages/ame/src/ame/task_data/tasks/cell_towers_2.yaml b/python/packages/ame/src/ame/data_files/tasks/cell_towers_2.yaml similarity index 100% rename from python/packages/ame/src/ame/task_data/tasks/cell_towers_2.yaml rename to python/packages/ame/src/ame/data_files/tasks/cell_towers_2.yaml diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 1a460b62a730..30ea614a8fe7 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -5,7 +5,7 @@ from typing import Tuple import yaml -from autogen_ext.agentic_memory import Grader, PageLogger, Apprentice +from autogen_ext.agentic_memory import PageLogger, Apprentice from ame.clients._client_creator import ClientCreator @@ -14,58 +14,6 @@ class Evaluator: def __init__(self): self.logger = None - def get_task_description_and_answer_from_file(self, task_filename): - path_to_this_file = os.path.abspath(__file__) - dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, "task_data", "tasks", task_filename + ".yaml") - with open(task_filepath, "r") as file: - task_details = yaml.load(file, Loader=yaml.FullLoader) - return task_details["task_description"], task_details["expected_answer"] - - def get_advice_from_file(self, advice_filename): - path_to_this_file = os.path.abspath(__file__) - dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, "task_data", "advice", advice_filename + ".yaml") - with open(task_filepath, "r") as file: - advice_dict = yaml.load(file, Loader=yaml.FullLoader) - return advice_dict["advice"] - - def get_demo_from_file(self, demo_filename): - path_to_this_file = os.path.abspath(__file__) - dir_of_this_file = os.path.dirname(path_to_this_file) - task_filepath = os.path.join(dir_of_this_file, "task_data", "demos", demo_filename + ".yaml") - with open(task_filepath, "r") as file: - demo_dict = yaml.load(file, Loader=yaml.FullLoader) - return demo_dict["demo"] - - async def test_apprentice( - self, apprentice, task_description, expected_answer, num_trials, use_memory, client, logger - ) -> Tuple[int, int]: - logger.enter_function() - - self.logger.info("Testing the apprentice on the given task.\n") - - grader = Grader(client, logger) - num_successes = 0 - - for trial in range(num_trials): - self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) - self.logger.info("Try to solve the task.\n") - response = await apprentice.assign_task(task_description, use_memory=use_memory) - response_is_correct, extracted_answer = await grader.is_response_correct( - task_description, response, expected_answer - ) - self.logger.info("Extracted answer: {}".format(extracted_answer)) - if response_is_correct: - self.logger.info("Answer is CORRECT.\n") - num_successes += 1 - else: - self.logger.info("Answer is INCORRECT.\n") - - self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) - logger.leave_function() - return num_successes, num_trials - async def perform_evaluations(self, settings): self.logger.enter_function() @@ -96,7 +44,7 @@ async def perform_evaluations(self, settings): # Call the eval function for each listed run. for run_dict in evaluation_settings["runs"]: - results = await eval_function(apprentice, self, client, self.logger, function_settings, run_dict) + results = await eval_function(apprentice, client, self.logger, function_settings, run_dict) print(results) if hasattr(client, "finalize"): diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py index 06ada974edb9..588528e2c529 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py @@ -1,13 +1,13 @@ from typing import Dict +import yaml from autogen_core.models import ( ChatCompletionClient, ) from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger -from ..eval import Evaluator -async def eval_learning_from_demonstration(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Evaluates the ability to learn quickly from demonstrations. @@ -15,24 +15,28 @@ async def eval_learning_from_demonstration(apprentice: Apprentice, evaluator: Ev logger.enter_function() num_trials = settings["num_trials"] + grader = Grader(client, logger) - # This eval function needs 3 data strings for each run. - task_1_file = run_dict["task_1_file"] # The task being tested. - task_2_file = run_dict["task_2_file"] # A similar but different task. - demo_2_file = run_dict["demo_2_file"] # A demonstration of solving task 2. - - # Get the actual task and advice strings from their files. - task_description_1, expected_answer_1 = evaluator.get_task_description_and_answer_from_file(task_1_file) - demo_task, _ = evaluator.get_task_description_and_answer_from_file(task_2_file) - demo_solution = evaluator.get_demo_from_file(demo_2_file) + # Load the specified data. + with open(run_dict["main_task_file"], "r") as file: + # The task being tested. + main_task = yaml.load(file, Loader=yaml.FullLoader) + task_description = main_task["task_description"] + expected_answer = main_task["expected_answer"] + with open(run_dict["demo_task_file"], "r") as file: + # A similar but different task. + demo_task = yaml.load(file, Loader=yaml.FullLoader)["task_description"] + with open(run_dict["demo_solution_file"], "r") as file: + # A demonstration of solving the second task. + demo_solution = yaml.load(file, Loader=yaml.FullLoader)["demo"] # Start by clearing memory then running a baseline test. logger.info("To get a baseline, clear memory, then assign the task.") apprentice.reset_memory() - num_successes, num_trials = await evaluator.test_apprentice( + num_successes, num_trials = await grader.test_apprentice( apprentice=apprentice, - task_description=task_description_1, - expected_answer=expected_answer_1, + task_description=task_description, + expected_answer=expected_answer, num_trials=num_trials, use_memory=True, client=client, @@ -48,10 +52,10 @@ async def eval_learning_from_demonstration(apprentice: Apprentice, evaluator: Ev # Now test again to see if the demonstration (retrieved from memory) helps. logger.info("Assign the task again to see if the demonstration helps.") - num_successes, num_trials = await evaluator.test_apprentice( + num_successes, num_trials = await grader.test_apprentice( apprentice=apprentice, - task_description=task_description_1, - expected_answer=expected_answer_1, + task_description=task_description, + expected_answer=expected_answer, num_trials=num_trials, use_memory=True, client=client, diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py index c0435f01f872..ba4f75c38159 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py @@ -1,13 +1,13 @@ from typing import Dict +import yaml from autogen_core.models import ( ChatCompletionClient, ) from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger -from ..eval import Evaluator -async def eval_self_teaching(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Evaluates the ability of an agent to learn quickly from its own trial and error. @@ -16,14 +16,19 @@ async def eval_self_teaching(apprentice: Apprentice, evaluator: Evaluator, clien num_loops = settings["num_loops"] num_final_test_trials = settings["num_final_test_trials"] + grader = Grader(client, logger) - # This eval function needs 2 data strings for each run. - task_file_1 = run_dict["task_file_1"] # Train and test on this task. - task_file_2 = run_dict["task_file_2"] # Test generalization on a different, similar task. - - # Get the actual task and advice strings from their files. - task_description_1, expected_answer_1 = evaluator.get_task_description_and_answer_from_file(task_file_1) - task_description_2, expected_answer_2 = evaluator.get_task_description_and_answer_from_file(task_file_2) + # Load the specified data. + with open(run_dict["task_file_1"], "r") as file: + # Train and test on this task. + task_1 = yaml.load(file, Loader=yaml.FullLoader) + task_description_1 = task_1["task_description"] + expected_answer_1 = task_1["expected_answer"] + with open(run_dict["task_file_2"], "r") as file: + # Test generalization on this different, similar task. + task_2 = yaml.load(file, Loader=yaml.FullLoader) + task_description_2 = task_2["task_description"] + expected_answer_2 = task_2["expected_answer"] # Start the test with empty memory. apprentice.reset_memory() @@ -36,7 +41,7 @@ async def eval_self_teaching(apprentice: Apprentice, evaluator: Evaluator, clien await apprentice.train_on_task(task=task_description_1, expected_answer=expected_answer_1) # Test on the first task. - num_successes, num_trials = await evaluator.test_apprentice( + num_successes, num_trials = await grader.test_apprentice( apprentice=apprentice, task_description=task_description_1, expected_answer=expected_answer_1, @@ -49,7 +54,7 @@ async def eval_self_teaching(apprentice: Apprentice, evaluator: Evaluator, clien total_num_successes_1 += num_successes # Test on the second task. - num_successes, num_trials = await evaluator.test_apprentice( + num_successes, num_trials = await grader.test_apprentice( apprentice=apprentice, task_description=task_description_2, expected_answer=expected_answer_2, diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py index 013aa51b4dac..dd63fe74f3c6 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ b/python/packages/ame/src/ame/eval_functions/eval_teachability.py @@ -1,26 +1,28 @@ from typing import Dict +import yaml from autogen_core.models import ( ChatCompletionClient, ) from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger -from ..eval import Evaluator -async def eval_teachability(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_teachability(apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Evalutes the ability to learn quickly from user teachings, hints, and advice. """ logger.enter_function() - # This eval function needs 2 data strings for each run. - task_file = run_dict["task_file"] # The task being tested. - advice_file = run_dict["advice_file"] # Advice for solving such tasks. - - # Get the actual task and advice strings from their files. - task_description, expected_answer = evaluator.get_task_description_and_answer_from_file(task_file) - advice = evaluator.get_advice_from_file(advice_file) + # Load the specified data. + with open(run_dict["task_file"], "r") as file: + # The task being tested. + task = yaml.load(file, Loader=yaml.FullLoader) + task_description = task["task_description"] + expected_answer = task["expected_answer"] + with open(run_dict["advice_file"], "r") as file: + # Advice for solving such tasks. + advice = yaml.load(file, Loader=yaml.FullLoader)["advice"] # First test without memory. apprentice.reset_memory() diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py index 759a0aede703..3e96fbc94c2a 100644 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py @@ -1,13 +1,13 @@ from typing import Dict +import yaml from autogen_core.models import ( ChatCompletionClient, ) from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger -from ..eval import Evaluator -async def eval_without_learning(apprentice: Apprentice, evaluator: Evaluator, client: ChatCompletionClient, +async def eval_without_learning(apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict, run_dict: Dict) -> str: """ Performs an evaluation without the benefit of memory. @@ -15,15 +15,19 @@ async def eval_without_learning(apprentice: Apprentice, evaluator: Evaluator, cl logger.enter_function() num_trials = settings["num_trials"] + grader = Grader(client, logger) - # Get the task and advice strings. - task_file = run_dict["task_file"] - task_description, expected_answer = evaluator.get_task_description_and_answer_from_file(task_file) + # Load the specified data. + with open(run_dict["task_file"], "r") as file: + # The task being tested. + task = yaml.load(file, Loader=yaml.FullLoader) + task_description = task["task_description"] + expected_answer = task["expected_answer"] # Clear memory then run a baseline test. logger.info("To get a baseline, clear memory, then assign the task.") apprentice.reset_memory() - num_successes, num_trials = await evaluator.test_apprentice( + num_successes, num_trials = await grader.test_apprentice( apprentice=apprentice, task_description=task_description, expected_answer=expected_answer, diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index fec69a6b588b..2eba26612d0c 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -39,4 +39,4 @@ evaluations: module_path: ame.eval_functions.eval_without_learning num_trials: 1 # 1-10 runs: - - task_file: 10_liars + - task_file: data_files/tasks/10_liars.yaml diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 1c66f9fedd8c..2fcfe18ddfe8 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -38,17 +38,17 @@ evaluations: function_name: eval_teachability module_path: ame.eval_functions.eval_teachability runs: - - task_file: autogen_package - advice_file: add_topic + - task_file: data_files/tasks/autogen_package.yaml # The task being tested. + advice_file: data_files/advice/add_topic.yaml # Advice provided to help solve the task. - eval_function: function_name: eval_learning_from_demonstration module_path: ame.eval_functions.eval_learning_from_demonstration num_trials: 1 # 1-10 runs: - - task_1_file: cell_towers_1 # The task being tested. - task_2_file: cell_towers_2 # A similar but different task. - demo_2_file: cell_towers_2_demo # A demonstration of solving task 2. + - main_task_file: data_files/tasks/cell_towers_1.yaml # The task being tested. + demo_task_file: data_files/tasks/cell_towers_2.yaml # A similar but different task. + demo_solution_file: data_files/demos/cell_towers_2_demo.yaml # A demonstration of solving the second. - eval_function: function_name: eval_self_teaching @@ -56,5 +56,5 @@ evaluations: num_loops: 1 # 1-10 num_final_test_trials: 1 # 1-3 runs: - - task_file_1: 10_liars # Train and test on this task. - task_file_2: 100_vampires # Test generalization on a different, similar task. + - task_file_1: data_files/tasks/10_liars.yaml # Train and test on this task. + task_file_2: data_files/tasks/100_vampires.yaml # Test generalization on this different, similar task. diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index 487decc2de06..cbe780d36638 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -35,4 +35,4 @@ evaluations: module_path: ame.eval_functions.eval_without_learning num_trials: 1 runs: - - task_file: 10_liars + - task_file: data_files/tasks/10_liars.yaml diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py index 41940888ea7a..6deb2bd4c501 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py @@ -1,4 +1,4 @@ -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Union, Tuple from autogen_core import FunctionCall, Image from autogen_core.models import FunctionExecutionResult, LLMMessage diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py index e82c7eabf82d..9810687bbe95 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py @@ -15,13 +15,14 @@ class Grader: """ - Determines task success without limitation to string matches. + Runs basic tests, and determines task success without limitation to string matches. Args: client: The client to call the model. logger: The logger to log the model calls. Methods: + test_apprentice: Tests the apprentice on the given task. call_model: Calls the model with the given input and returns the response. is_response_correct: Determines whether the response is equivalent to the task's correct answer. """ @@ -35,6 +36,34 @@ def __init__(self, client: ChatCompletionClient, logger: PageLogger): # Create the chat history self._chat_history: List[LLMMessage] = [] + async def test_apprentice( + self, apprentice, task_description, expected_answer, num_trials, use_memory, client, logger + ) -> Tuple[int, int]: + logger.enter_function() + + self.logger.info("Testing the apprentice on the given task.\n") + + grader = Grader(client, logger) + num_successes = 0 + + for trial in range(num_trials): + self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) + self.logger.info("Try to solve the task.\n") + response = await apprentice.assign_task(task_description, use_memory=use_memory) + response_is_correct, extracted_answer = await grader.is_response_correct( + task_description, response, expected_answer + ) + self.logger.info("Extracted answer: {}".format(extracted_answer)) + if response_is_correct: + self.logger.info("Answer is CORRECT.\n") + num_successes += 1 + else: + self.logger.info("Answer is INCORRECT.\n") + + self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) + logger.leave_function() + return num_successes, num_trials + async def call_model( self, summary: str, user_content: UserContent = None, system_message_content: str = None, keep_these_messages: bool = True ) -> str: From 00cbb8c333b1ad124768ab52695b47f018bee8ee Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 09:55:00 -0800 Subject: [PATCH 62/93] standardize logging levels --- .../ame/src/ame/settings/baseline.yaml | 4 +- .../packages/ame/src/ame/settings/check.yaml | 6 +- python/packages/ame/src/ame/settings/m1.yaml | 4 +- .../agentic_memory/_agentic_memory_bank.py | 4 +- .../agentic_memory/_string_similarity_map.py | 41 ++++---- .../autogen_ext/agentic_memory/page_logger.py | 97 +++++++++++++------ 6 files changed, 88 insertions(+), 68 deletions(-) diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index 2eba26612d0c..10e8a28b35b8 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -1,7 +1,7 @@ Evaluator: PageLogger: - enabled: 1 + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. path: ~/pagelogs/base client: @@ -28,8 +28,6 @@ Apprentice: relevance_conversion_threshold: 1.7 n_results: 25 distance_threshold: 100 - StringSimilarityMap: - verbose: 1 AgentWrapper: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index 2fcfe18ddfe8..e505aefa7374 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -1,8 +1,8 @@ Evaluator: PageLogger: - enabled: 1 - path: ~/pagelogs/temp16 + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + path: ~/pagelogs/temp17 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. @@ -28,8 +28,6 @@ Apprentice: relevance_conversion_threshold: 1.7 n_results: 25 distance_threshold: 100 - StringSimilarityMap: - verbose: 1 AgentWrapper: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index cbe780d36638..10d5210f7439 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -1,7 +1,7 @@ Evaluator: PageLogger: - enabled: 1 + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. path: ~/pagelogs/m1 client: @@ -24,8 +24,6 @@ Apprentice: relevance_conversion_threshold: 1.7 n_results: 25 distance_threshold: 100 - StringSimilarityMap: - verbose: 1 AgentWrapper: base_agent: MagenticOneGroupChat # MagenticOneGroupChat, thin_agent, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index bba22a828912..4be509754b1a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -48,9 +48,7 @@ def __init__(self, settings: Dict, reset: bool, logger: PageLogger) -> None: path_to_db_dir = os.path.join(memory_dir_path, "string_map") self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") - self.string_map = StringSimilarityMap( - settings=self.settings["StringSimilarityMap"], reset=reset, path_to_db_dir=path_to_db_dir, logger=self.logger - ) + self.string_map = StringSimilarityMap(reset=reset, path_to_db_dir=path_to_db_dir, logger=self.logger) # Load or create the associated insight dict on disk. self.uid_insight_dict = {} diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py index 8535633bf5e9..c3252980977a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py @@ -17,7 +17,6 @@ class StringSimilarityMap: Vector embeddings are currently supplied by Chroma's default Sentence Transformers. Args: - - settings: The settings for the string similarity map. - reset: True to clear the DB immediately after creation. - path_to_db_dir: Path to the directory where the DB is stored. - logger: The PageLogger object to use for logging. @@ -28,10 +27,8 @@ class StringSimilarityMap: - reset_db: Forces immediate deletion of the DB's contents, in memory and on disk. - save_string_pairs: Saves the string-pair dict to disk. """ - def __init__(self, settings: Dict, reset: bool, path_to_db_dir: str, logger: PageLogger) -> None: - self.settings = settings + def __init__(self, reset: bool, path_to_db_dir: str, logger: PageLogger) -> None: self.logger = logger - self.verbose = self.settings["verbose"] self.path_to_db_dir = path_to_db_dir # Load or create the vector DB on disk. @@ -46,14 +43,13 @@ def __init__(self, settings: Dict, reset: bool, path_to_db_dir: str, logger: Pag self.uid_text_dict = {} self.last_string_pair_id = 0 if (not reset) and os.path.exists(self.path_to_dict): - if self.verbose: - self.logger.info("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) - self.logger.info(" Location = {}".format(self.path_to_dict)) + self.logger.debug("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) + self.logger.debug(" Location = {}".format(self.path_to_dict)) with open(self.path_to_dict, "rb") as f: self.uid_text_dict = pickle.load(f) self.last_string_pair_id = len(self.uid_text_dict) - if self.verbose and len(self.uid_text_dict) > 0: - self.logger.info("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) + if len(self.uid_text_dict) > 0: + self.logger.debug("\n{} STRING PAIRS LOADED".format(len(self.uid_text_dict))) self._log_string_pairs() # Clear the DB if requested. @@ -64,10 +60,10 @@ def _log_string_pairs(self) -> None: """ Logs all string pairs currently in the map. """ - self.logger.info("LIST OF STRING PAIRS") + self.logger.debug("LIST OF STRING PAIRS") for uid, text in self.uid_text_dict.items(): input_text, output_text = text - self.logger.info(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) + self.logger.debug(" ID: {}\n INPUT TEXT: {}\n OUTPUT TEXT: {}".format(uid, input_text, output_text)) def save_string_pairs(self) -> None: """ @@ -80,8 +76,7 @@ def reset_db(self) -> None: """ Forces immediate deletion of the DB's contents, in memory and on disk. """ - if self.verbose: - self.logger.info("\nCLEARING STRING-PAIR MAP") + self.logger.debug("\nCLEARING STRING-PAIR MAP") self.db_client.delete_collection("string-pairs") self.vec_db = self.db_client.create_collection("string-pairs") self.uid_text_dict = {} @@ -94,13 +89,12 @@ def add_input_output_pair(self, input_text: str, output_text: str) -> None: self.last_string_pair_id += 1 self.vec_db.add(documents=[input_text], ids=[str(self.last_string_pair_id)]) self.uid_text_dict[str(self.last_string_pair_id)] = input_text, output_text - if self.verbose: - self.logger.info( - "\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( - self.last_string_pair_id, input_text, output_text - ) + self.logger.debug( + "\nINPUT-OUTPUT PAIR ADDED TO VECTOR DATABASE:\n ID\n {}\n INPUT\n {}\n OUTPUT\n {}\n".format( + self.last_string_pair_id, input_text, output_text ) - # self._log_string_pairs() # For deep debugging, uncomment to log all string pairs after each addition. + ) + # self._log_string_pairs() # For deeper debugging, uncomment to log all string pairs after each addition. def get_related_string_pairs(self, query_text: str, n_results: int, threshold: Union[int, float]) -> List[Tuple[str, str, float]]: """ @@ -120,11 +114,10 @@ def get_related_string_pairs(self, query_text: str, n_results: int, threshold: U if distance < threshold: input_text_2, output_text = self.uid_text_dict[uid] assert input_text == input_text_2 - if self.verbose: - self.logger.info( - "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance - ) + self.logger.debug( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance ) + ) string_pairs.append((input_text, output_text, distance)) return string_pairs diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py index b86ec1df7f38..d355d5f0ed75 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py @@ -2,8 +2,7 @@ import json import os import shutil -import time -from typing import Dict, List, Union +from typing import Dict, List, Union, Optional from autogen_core import FunctionCall, Image from autogen_core.models import ( @@ -50,22 +49,34 @@ class PageLogger: Args: settings: A dictionary containing the following keys: - - enabled: A boolean indicating whether logging is enabled. + - level: The logging level, one of DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + - path: The path to the directory where the log files will be saved. Methods: - info: Adds text to the current page. - error: Adds text to the current page. - log_message_content: Adds a page containing the message's content, including any images. - log_model_call: Adds a page containing all messages to or from a model, including any images. + debug: Adds text to the current page if debugging level <= DEBUG. + info: Adds text to the current page if debugging level <= INFO. + warning: Adds text to the current page if debugging level <= WARNING. + error: Adds text to the current page if debugging level <= ERROR. + critical: Adds text to the current page if debugging level <= CRITICAL. + log_message_content: Adds a page containing the message's content, including any images, if debugging level <= INFO. + log_model_call: Adds a page containing all messages to or from a model, including any images, if debugging level <= INFO. log_link_to_local_file: Returns a link to a local file in the log. flush: Writes the current state of the log to disk. - enter_function: Adds a new page corresponding to the current function call. - leave_function: Finishes the page corresponding to the current function + enter_function: Adds a new page corresponding to the current function call, if debugging level <= INFO. + leave_function: Finishes the page corresponding to the current function, if debugging level <= INFO """ - def __init__(self, settings: Dict): - self.enabled = settings["enabled"] - if not self.enabled: + def __init__(self, settings: Dict) -> None: + self.levels = { + "DEBUG": 10, + "INFO": 20, + "WARNING": 30, + "ERROR": 40, + "CRITICAL": 50, + "NONE": 100, + } + self.level = self.levels[settings["level"]] + if self.level >= self.levels["NONE"]: return self.log_dir = os.path.expanduser(settings["path"]) self.page_stack = PageStack() @@ -105,23 +116,47 @@ def _add_page(self, summary: str, show_in_call_tree: bool = True, finished: bool self.info("\n" + page.full_link) return page - def info(self, line: str) -> None: + def _log_text(self, text: str) -> None: """ - Adds text to the current page. + Adds text to the current page, depending on the current logging level. """ - if not self.enabled: - return page = self.page_stack.top() - page.add_lines(line, flush=True) + page.add_lines(text, flush=True) + + def debug(self, line: str) -> None: + """ + Adds text to the current page if debugging level <= DEBUG. + """ + if self.level <= self.levels["DEBUG"]: + self._log_text(line) + + def info(self, line: str) -> None: + """ + Adds INFO text to the current page if debugging level <= INFO. + """ + if self.level <= self.levels["INFO"]: + self._log_text(line) + + def warning(self, line: str) -> None: + """ + Adds WARNING text to the current page if debugging level <= WARNING. + """ + if self.level <= self.levels["WARNING"]: + self._log_text(line) def error(self, line: str) -> None: """ - Adds text to the current page. + Adds ERROR text to the current page if debugging level <= ERROR. """ - if not self.enabled: - return - page = self.page_stack.top() - page.add_lines(line, flush=True) + if self.level <= self.levels["ERROR"]: + self._log_text(line) + + def critical(self, line: str) -> None: + """ + Adds CRITICAL text to the current page if debugging level <= CRITICAL. + """ + if self.level <= self.levels["CRITICAL"]: + self._log_text(line) def _message_source(self, message: LLMMessage) -> str: """ @@ -196,16 +231,18 @@ def log_message_content(self, message_content: MessageContent, summary: str) -> """ Adds a page containing the message's content, including any images. """ + if self.level > self.levels["INFO"]: + return None page = self._add_page(summary=summary, show_in_call_tree=False) self.page_stack.write_stack_to_page(page) page.add_lines(self._format_message_content(page, message_content=message_content)) page.flush() - def log_model_call(self, summary: str, input_messages: List[LLMMessage], response: LLMMessage) -> "Page": + def log_model_call(self, summary: str, input_messages: List[LLMMessage], response: LLMMessage) -> Optional["Page"]: """ Adds a page containing all messages to or from a model, including any images. """ - if not self.enabled: + if self.level > self.levels["INFO"]: return None page = self._add_page(summary=summary, show_in_call_tree=False) self.page_stack.write_stack_to_page(page) @@ -231,7 +268,7 @@ def flush(self, finished: bool = False) -> None: """ Writes the current state of the log to disk. """ - if not self.enabled: + if self.level > self.levels["INFO"]: return # Create a call tree of the log. call_tree_path = os.path.join(self.log_dir, self.name + ".html") @@ -244,13 +281,12 @@ def flush(self, finished: bool = False) -> None: f.write(page.line_text + "\n") f.write("\n") f.write(html_closing()) - time.sleep(0.1) # Avoids race conditions when writing multiple files in quick succession. - def enter_function(self) -> "Page": + def enter_function(self) -> Optional["Page"]: """ Adds a new page corresponding to the current function call. """ - if not self.enabled: + if self.level > self.levels["INFO"]: return None frame = inspect.currentframe().f_back # Get the calling frame @@ -279,8 +315,8 @@ def leave_function(self) -> None: """ Finishes the page corresponding to the current function call. """ - if not self.enabled: - return + if self.level > self.levels["INFO"]: + return None page = self.page_stack.top() page.finished = True page.add_lines("\nLEAVE {}".format(page.summary), flush=True) @@ -367,7 +403,6 @@ def flush(self) -> None: f.write("UnicodeEncodeError in this line.\n") f.write(html_closing()) f.flush() - time.sleep(0.1) # Avoids race conditions when writing multiple files in quick succession. class PageStack: From 88294d2a1b6f18ae8993c15884bb673af0f13379 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 10:15:52 -0800 Subject: [PATCH 63/93] Remove Evaluator class --- python/packages/ame/src/ame/eval.py | 95 +++++++++---------- .../ame/src/ame/settings/baseline.yaml | 7 +- .../packages/ame/src/ame/settings/check.yaml | 7 +- python/packages/ame/src/ame/settings/m1.yaml | 7 +- .../autogen_ext/agentic_memory/apprentice.py | 4 +- 5 files changed, 55 insertions(+), 65 deletions(-) diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index 30ea614a8fe7..d79915996b6e 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -1,68 +1,64 @@ import asyncio import importlib -import os import sys -from typing import Tuple - import yaml -from autogen_ext.agentic_memory import PageLogger, Apprentice +from autogen_ext.agentic_memory import PageLogger, Apprentice from ame.clients._client_creator import ClientCreator -class Evaluator: - def __init__(self): - self.logger = None +async def perform_evaluations(settings, logger) -> None: + """ + Perform the evaluations as specified in the settings file. + """ + logger.enter_function() - async def perform_evaluations(self, settings): - self.logger.enter_function() + # Create the client, which is passed to both the apprentice and the evaluator. + client_creator = ClientCreator(settings=settings["client"], logger=logger) + client = client_creator.create_client() - # Create the client, which is passed to both the apprentice and the evaluator. - client_creator = ClientCreator(settings=settings["client"], logger=self.logger) - client = client_creator.create_client() + # Create the apprentice. + apprentice_settings = settings["Apprentice"] + apprentice = Apprentice(apprentice_settings, client, logger) - # Create the apprentice. - apprentice_settings = settings["Apprentice"] - apprentice = Apprentice(apprentice_settings, self, client, self.logger) + # Execute each evaluation. + for evaluation_settings in settings["evaluations"]: + # Import the function. + function_settings = evaluation_settings["eval_function"] + module_path = function_settings["module_path"] + try: + module = importlib.import_module(module_path) + except ModuleNotFoundError: + print("Failed to import {}".format(module_path)) + raise + function_name = function_settings["function_name"] + try: + eval_function = getattr(module, function_name) + except AttributeError: + print("Failed to import {}.{}".format(module_path, function_name)) + raise - # Execute each evaluation. - for evaluation_settings in settings["evaluations"]: - # Import the function. - function_settings = evaluation_settings["eval_function"] - module_path = function_settings["module_path"] - try: - module = importlib.import_module(module_path) - except ModuleNotFoundError: - print("Failed to import {}".format(module_path)) - raise - function_name = function_settings["function_name"] - try: - eval_function = getattr(module, function_name) - except AttributeError: - print("Failed to import {}.{}".format(module_path, function_name)) - raise + # Call the eval function for each listed run. + for run_dict in evaluation_settings["runs"]: + results = await eval_function(apprentice, client, logger, function_settings, run_dict) + print(results) - # Call the eval function for each listed run. - for run_dict in evaluation_settings["runs"]: - results = await eval_function(apprentice, client, self.logger, function_settings, run_dict) - print(results) + if hasattr(client, "finalize"): + # If this is a client wrapper, it needs to be finalized. + client.finalize() - if hasattr(client, "finalize"): - # If this is a client wrapper, it needs to be finalized. - client.finalize() + logger.flush(finished=True) + logger.leave_function() - self.logger.flush(finished=True) - self.logger.leave_function() - async def run(self, settings_filepath): - # Load the settings from yaml. - with open(settings_filepath, "r") as file: - settings = yaml.load(file, Loader=yaml.FullLoader) - evaluator_settings = settings["Evaluator"] - self.logger = PageLogger(evaluator_settings["PageLogger"]) +async def run(settings_filepath): + # Load the settings from yaml. + with open(settings_filepath, "r") as file: + settings = yaml.load(file, Loader=yaml.FullLoader) + logger = PageLogger(settings["PageLogger"]) - # Perform the evaluations. - await self.perform_evaluations(settings) + # Perform the evaluations. + await perform_evaluations(settings, logger) if __name__ == "__main__": @@ -70,5 +66,4 @@ async def run(self, settings_filepath): if len(args) != 1: print("Usage: amt.py ") else: - evaluator = Evaluator() - asyncio.run(evaluator.run(settings_filepath=args[0])) + asyncio.run(run(settings_filepath=args[0])) diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml index 10e8a28b35b8..815e77c2b297 100644 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ b/python/packages/ame/src/ame/settings/baseline.yaml @@ -1,8 +1,7 @@ -Evaluator: - PageLogger: - level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/base +PageLogger: + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + path: ~/pagelogs/base client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml index e505aefa7374..f0a4104a32d6 100644 --- a/python/packages/ame/src/ame/settings/check.yaml +++ b/python/packages/ame/src/ame/settings/check.yaml @@ -1,8 +1,7 @@ -Evaluator: - PageLogger: - level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/temp17 +PageLogger: + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + path: ~/pagelogs/temp17 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml index 10d5210f7439..8c89a53d29ba 100644 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ b/python/packages/ame/src/ame/settings/m1.yaml @@ -1,8 +1,7 @@ -Evaluator: - PageLogger: - level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/m1 +PageLogger: + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + path: ~/pagelogs/m1 client: model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py index a8c663f887d3..9856365800bc 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py @@ -8,7 +8,6 @@ class Apprentice: Args: settings: The settings for the apprentice. - evaluator: The evaluator to use for training. client: The client to call the model. logger: The logger to log the model calls. @@ -19,9 +18,8 @@ class Apprentice: add_task_solution_pair_to_memory: Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. train_on_task: Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ - def __init__(self, settings, evaluator, client, logger) -> None: + def __init__(self, settings, client, logger) -> None: self.settings = settings - self.evaluator = evaluator self.client = client self.logger = logger From 7d0ed63c2a25f07db358b9fb83ffaa28aa9c636b Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 12:30:26 -0800 Subject: [PATCH 64/93] sample code --- python/packages/ame/src/ame/eval.py | 2 +- .../data_files/advice/add_topic.yaml | 6 + .../data_files/demos/cell_towers_2_demo.yaml | 11 ++ .../data_files/tasks/100_vampires.yaml | 22 ++++ .../data_files/tasks/10_liars.yaml | 8 ++ .../data_files/tasks/autogen_package.yaml | 5 + .../data_files/tasks/cell_towers_1.yaml | 9 ++ .../data_files/tasks/cell_towers_2.yaml | 9 ++ .../eval_learning_from_demonstration.py | 106 ++++++++++++++++ .../agentic_memory/eval_self_teaching.py | 118 ++++++++++++++++++ .../agentic_memory/eval_teachability.py | 101 +++++++++++++++ .../settings/demonstration.yaml | 32 +++++ .../settings/self_teaching.yaml | 32 +++++ .../agentic_memory/settings/teachability.yaml | 30 +++++ python/samples/agentic_memory/utils/client.py | 31 +++++ 15 files changed, 521 insertions(+), 1 deletion(-) create mode 100644 python/samples/agentic_memory/data_files/advice/add_topic.yaml create mode 100644 python/samples/agentic_memory/data_files/demos/cell_towers_2_demo.yaml create mode 100644 python/samples/agentic_memory/data_files/tasks/100_vampires.yaml create mode 100644 python/samples/agentic_memory/data_files/tasks/10_liars.yaml create mode 100644 python/samples/agentic_memory/data_files/tasks/autogen_package.yaml create mode 100644 python/samples/agentic_memory/data_files/tasks/cell_towers_1.yaml create mode 100644 python/samples/agentic_memory/data_files/tasks/cell_towers_2.yaml create mode 100644 python/samples/agentic_memory/eval_learning_from_demonstration.py create mode 100644 python/samples/agentic_memory/eval_self_teaching.py create mode 100644 python/samples/agentic_memory/eval_teachability.py create mode 100644 python/samples/agentic_memory/settings/demonstration.yaml create mode 100644 python/samples/agentic_memory/settings/self_teaching.yaml create mode 100644 python/samples/agentic_memory/settings/teachability.yaml create mode 100644 python/samples/agentic_memory/utils/client.py diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py index d79915996b6e..b867343e3212 100644 --- a/python/packages/ame/src/ame/eval.py +++ b/python/packages/ame/src/ame/eval.py @@ -13,7 +13,7 @@ async def perform_evaluations(settings, logger) -> None: """ logger.enter_function() - # Create the client, which is passed to both the apprentice and the evaluator. + # Create the client. client_creator = ClientCreator(settings=settings["client"], logger=logger) client = client_creator.create_client() diff --git a/python/samples/agentic_memory/data_files/advice/add_topic.yaml b/python/samples/agentic_memory/data_files/advice/add_topic.yaml new file mode 100644 index 000000000000..e07dd8be08a5 --- /dev/null +++ b/python/samples/agentic_memory/data_files/advice/add_topic.yaml @@ -0,0 +1,6 @@ +# Advice created manually for solving the autogen_package task. + +advice: 'When somebody builds something on autogen and wants to contribute it to autogen, + instead of creating a new autogen package, it''s better for them to implement it in a separate github repo, + then just add the autogen-extension topic to that repo. + That way their contribution will be automatically discoverable through autogen''s page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html' diff --git a/python/samples/agentic_memory/data_files/demos/cell_towers_2_demo.yaml b/python/samples/agentic_memory/data_files/demos/cell_towers_2_demo.yaml new file mode 100644 index 000000000000..da626f9783b7 --- /dev/null +++ b/python/samples/agentic_memory/data_files/demos/cell_towers_2_demo.yaml @@ -0,0 +1,11 @@ +# Demo created manually for solving the cell_towers_2 task. + +demo: 'Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. + Then start at one end and place the towers only where absolutely needed. + The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. + This obviously covers houses up to mile 7. + But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. + So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. + The next uncovered house would be at mile 12 (not 10), requiring a second tower. + It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), + covering the remaining houses. So 2 towers would be enough.' diff --git a/python/samples/agentic_memory/data_files/tasks/100_vampires.yaml b/python/samples/agentic_memory/data_files/tasks/100_vampires.yaml new file mode 100644 index 000000000000..2e2341d91fd1 --- /dev/null +++ b/python/samples/agentic_memory/data_files/tasks/100_vampires.yaml @@ -0,0 +1,22 @@ +# From GAIA L1 + +task_description: "You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021B\ + cu IV, son of Costea, has tasked you with investigating the village of \u0218\ + irnea in neighboring Wallachia. The Count's advisors have reported that a vampire\ + \ was spotted crossing the border near the village, and would like you to investigate\ + \ it.\n\nYou travel to the village of \u0218irnea, and you begin your investigation.\ + \ One night, just before dawn, you catch a glimpse of a man in a long black\ + \ cape with red lining leaping from roof-top to roof-top with superhuman agility.\ + \ It's a vampire! You try to chase the creature back to its home, but the creature\ + \ is too fast. However, because of the remoteness of the village, you know with\ + \ absolute certainty that the vampire must be a resident of the village. You\ + \ decide that your best course of action will be to visit all 100 residents\ + \ of the town during the day. You know something about vampires and humans that\ + \ will make your investigation possible; humans always tell the truth, but vampires\ + \ always lie.\n\nIn the afternoon, you go from house to house, speaking with\ + \ all 100 residents of \u0218irnea. You ask everyone the same question: \"How\ + \ many vampires are living in \u0218irnea\". Everyone in the village gives the\ + \ same response, \"At least one of us is a human.\"\n\nHow many residents of\ + \ \u0218irnea have been turned into vampires?" + +expected_answer: '100' diff --git a/python/samples/agentic_memory/data_files/tasks/10_liars.yaml b/python/samples/agentic_memory/data_files/tasks/10_liars.yaml new file mode 100644 index 000000000000..096e12775935 --- /dev/null +++ b/python/samples/agentic_memory/data_files/tasks/10_liars.yaml @@ -0,0 +1,8 @@ +# Similar to the 100 vampires task, for testing generalization from one to the other. + +task_description: 'You ask ten people ''How many of you are liars?'' + They all answer ''At least one of us is not a liar.'' + You happen to know that at least one of them IS a liar. + How many of them are liars in total?' + +expected_answer: All of them are liars. diff --git a/python/samples/agentic_memory/data_files/tasks/autogen_package.yaml b/python/samples/agentic_memory/data_files/tasks/autogen_package.yaml new file mode 100644 index 000000000000..f80840b30073 --- /dev/null +++ b/python/samples/agentic_memory/data_files/tasks/autogen_package.yaml @@ -0,0 +1,5 @@ +# Test where human advice is needed. + +task_description: As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen? + +expected_answer: It's best to have your agent in its own repo, then add the autogen-extension topic to that repo. diff --git a/python/samples/agentic_memory/data_files/tasks/cell_towers_1.yaml b/python/samples/agentic_memory/data_files/tasks/cell_towers_1.yaml new file mode 100644 index 000000000000..f86e370db3ee --- /dev/null +++ b/python/samples/agentic_memory/data_files/tasks/cell_towers_1.yaml @@ -0,0 +1,9 @@ +# File-free version of a GAIA L1 task. + +task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. + Houses are located at mile markers 16, 17, 19, 11, 9, 10, 2, 5, 4. + Each cell phone tower can cover houses located next to the road within a 4-mile radius. + Find the minimum number of cell phone towers needed to cover all houses next to the road. + Your answer should be a positive numerical integer value. + +expected_answer: '2' diff --git a/python/samples/agentic_memory/data_files/tasks/cell_towers_2.yaml b/python/samples/agentic_memory/data_files/tasks/cell_towers_2.yaml new file mode 100644 index 000000000000..5ddc046920c9 --- /dev/null +++ b/python/samples/agentic_memory/data_files/tasks/cell_towers_2.yaml @@ -0,0 +1,9 @@ +# Similar to the cell_towers_1 task. + +task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. + Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. + Each cell phone tower can cover houses located next to the road within a 4-mile radius. + Find the minimum number of cell phone towers needed to cover all houses next to the road. + Your answer should be a positive numerical integer value. + +expected_answer: '2' diff --git a/python/samples/agentic_memory/eval_learning_from_demonstration.py b/python/samples/agentic_memory/eval_learning_from_demonstration.py new file mode 100644 index 000000000000..bd7d1d9eb21c --- /dev/null +++ b/python/samples/agentic_memory/eval_learning_from_demonstration.py @@ -0,0 +1,106 @@ +import asyncio +import sys +from typing import Dict +import yaml + +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger + +from utils.client import create_oai_client + + +async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatCompletionClient, + logger: PageLogger, settings: Dict) -> str: + """ + Evaluates the ability to learn quickly from demonstrations. + """ + logger.enter_function() + + num_trials = settings["num_trials"] + grader = Grader(client, logger) + + # Load the specified data. + with open(settings["main_task_file"], "r") as file: + # The task being tested. + main_task = yaml.load(file, Loader=yaml.FullLoader) + task_description = main_task["task_description"] + expected_answer = main_task["expected_answer"] + with open(settings["demo_task_file"], "r") as file: + # A similar but different task. + demo_task = yaml.load(file, Loader=yaml.FullLoader)["task_description"] + with open(settings["demo_solution_file"], "r") as file: + # A demonstration of solving the second task. + demo_solution = yaml.load(file, Loader=yaml.FullLoader)["demo"] + + # Start by clearing memory then running a baseline test. + logger.info("To get a baseline, clear memory, then assign the task.") + apprentice.reset_memory() + num_successes, num_trials = await grader.test_apprentice( + apprentice=apprentice, + task_description=task_description, + expected_answer=expected_answer, + num_trials=num_trials, + use_memory=True, + client=client, + logger=logger, + ) + success_rate = round((num_successes / num_trials) * 100) + results_str_1 = "Success rate before demonstration: {}%".format(success_rate) + logger.info("\n" + results_str_1) + + # Provide a demonstration for a similar but different task. + logger.info("Demonstrate a solution to a similar task.") + await apprentice.add_task_solution_pair_to_memory(demo_task, demo_solution) + + # Now test again to see if the demonstration (retrieved from memory) helps. + logger.info("Assign the task again to see if the demonstration helps.") + num_successes, num_trials = await grader.test_apprentice( + apprentice=apprentice, + task_description=task_description, + expected_answer=expected_answer, + num_trials=num_trials, + use_memory=True, + client=client, + logger=logger, + ) + success_rate = round((num_successes / num_trials) * 100) + results_str_2 = "Success rate after demonstration: {}%".format(success_rate) + logger.info("\n" + results_str_2) + + logger.leave_function() + return "\neval_learning_from_demonstration\n" + results_str_1 + "\n" + results_str_2 + + +async def run_example(settings_filepath) -> None: + """ + Runs the code example with the necessary components. + """ + with open(settings_filepath, "r") as file: + # Create the necessary components. + settings = yaml.load(file, Loader=yaml.FullLoader) + logger = PageLogger(settings["PageLogger"]) + client = create_oai_client(settings["client"], logger) + apprentice = Apprentice(settings["Apprentice"], client, logger) + + # Call the example function. + results = await eval_learning_from_demonstration(apprentice, client, logger, settings["test"]) + + if hasattr(client, "finalize"): + # If this is a client wrapper, it needs to be finalized. + client.finalize() + + # Finish up. + logger.flush(finished=True) + print(results) + + +if __name__ == "__main__": + args = sys.argv[1:] + if len(args) != 1: + # Print usage information. + print("Usage: amt.py ") + else: + # Run the code example. + asyncio.run(run_example(settings_filepath=args[0])) diff --git a/python/samples/agentic_memory/eval_self_teaching.py b/python/samples/agentic_memory/eval_self_teaching.py new file mode 100644 index 000000000000..26fceeab6e0d --- /dev/null +++ b/python/samples/agentic_memory/eval_self_teaching.py @@ -0,0 +1,118 @@ +import asyncio +import sys +from typing import Dict +import yaml + +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger + +from utils.client import create_oai_client + + +async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClient, + logger: PageLogger, settings: Dict) -> str: + """ + Evaluates the ability of an agent to learn quickly from its own trial and error. + """ + logger.enter_function() + + num_loops = settings["num_loops"] + num_final_test_trials = settings["num_final_test_trials"] + grader = Grader(client, logger) + + # Load the specified data. + with open(settings["task_file_1"], "r") as file: + # Train and test on this task. + task_1 = yaml.load(file, Loader=yaml.FullLoader) + task_description_1 = task_1["task_description"] + expected_answer_1 = task_1["expected_answer"] + with open(settings["task_file_2"], "r") as file: + # Test generalization on this different, similar task. + task_2 = yaml.load(file, Loader=yaml.FullLoader) + task_description_2 = task_2["task_description"] + expected_answer_2 = task_2["expected_answer"] + + # Start the test with empty memory. + apprentice.reset_memory() + + total_num_successes_1 = 0 + total_num_successes_2 = 0 + total_num_trials = 0 + for i in range(num_loops): + # Train on the first task. + await apprentice.train_on_task(task=task_description_1, expected_answer=expected_answer_1) + + # Test on the first task. + num_successes, num_trials = await grader.test_apprentice( + apprentice=apprentice, + task_description=task_description_1, + expected_answer=expected_answer_1, + num_trials=num_final_test_trials, + use_memory=True, + client=client, + logger=logger, + ) + logger.info("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100))) + total_num_successes_1 += num_successes + + # Test on the second task. + num_successes, num_trials = await grader.test_apprentice( + apprentice=apprentice, + task_description=task_description_2, + expected_answer=expected_answer_2, + num_trials=num_final_test_trials, + use_memory=True, + client=client, + logger=logger, + ) + logger.info("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100))) + total_num_successes_2 += num_successes + + total_num_trials += num_final_test_trials + logger.info("") + + overall_success_rate_1 = round((total_num_successes_1 / total_num_trials) * 100) + overall_success_rate_2 = round((total_num_successes_2 / total_num_trials) * 100) + + results_str_1 = "Overall task 1 success rate: {}%".format(overall_success_rate_1) + results_str_2 = "Overall task 2 success rate: {}%".format(overall_success_rate_2) + logger.info("\n" + results_str_1) + logger.info(results_str_2) + + logger.leave_function() + return "\neval_self_teaching\n" + results_str_1 + "\n" + results_str_2 + + +async def run_example(settings_filepath) -> None: + """ + Runs the code example with the necessary components. + """ + with open(settings_filepath, "r") as file: + # Create the necessary components. + settings = yaml.load(file, Loader=yaml.FullLoader) + logger = PageLogger(settings["PageLogger"]) + client = create_oai_client(settings["client"], logger) + apprentice = Apprentice(settings["Apprentice"], client, logger) + + # Call the example function. + results = await eval_self_teaching(apprentice, client, logger, settings["test"]) + + if hasattr(client, "finalize"): + # If this is a client wrapper, it needs to be finalized. + client.finalize() + + # Finish up. + logger.flush(finished=True) + print(results) + + +if __name__ == "__main__": + args = sys.argv[1:] + if len(args) != 1: + # Print usage information. + print("Usage: amt.py ") + else: + # Run the code example. + asyncio.run(run_example(settings_filepath=args[0])) diff --git a/python/samples/agentic_memory/eval_teachability.py b/python/samples/agentic_memory/eval_teachability.py new file mode 100644 index 000000000000..0c49e4d0550b --- /dev/null +++ b/python/samples/agentic_memory/eval_teachability.py @@ -0,0 +1,101 @@ +import asyncio +import sys +from typing import Dict +import yaml + +from autogen_core.models import ( + ChatCompletionClient, +) + +from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger + +from utils.client import create_oai_client + + +async def eval_teachability(apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict) -> str: + """ + Evalutes the ability to learn quickly from user teachings, hints, and advice. + """ + logger.enter_function() + + # Load the specified data. + with open(settings["task_file"], "r") as file: + # The task being tested. + task = yaml.load(file, Loader=yaml.FullLoader) + task_description = task["task_description"] + expected_answer = task["expected_answer"] + with open(settings["advice_file"], "r") as file: + # Advice for solving such tasks. + advice = yaml.load(file, Loader=yaml.FullLoader)["advice"] + + # First test without memory. + apprentice.reset_memory() + logger.info("\nClear memory, then ask the question.") + response = await apprentice.handle_user_message(task_description) + + # Check the response. + grader = Grader(client, logger) + response_is_correct, extracted_answer = await grader.is_response_correct( + task_description, response, expected_answer + ) + logger.info("Extracted answer: {}".format(extracted_answer)) + if response_is_correct: + results_str_1 = "Answer before teaching is CORRECT." + else: + results_str_1 = "Answer before teaching is INCORRECT." + logger.info(results_str_1 + "\n") + + # Give advice that should help solve this task. + logger.info("Give the advice.") + await apprentice.handle_user_message(advice) + + # Now ask the question again to see if the advice helps. + logger.info("\nAsk the question again to see if the advice helps.") + response = await apprentice.handle_user_message(task_description) + + # Check the response. + response_is_correct, extracted_answer = await grader.is_response_correct( + task_description, response, expected_answer + ) + logger.info("Extracted answer: {}".format(extracted_answer)) + if response_is_correct: + results_str_2 = "Answer after teaching is CORRECT." + else: + results_str_2 = "Answer after teaching is INCORRECT." + logger.info(results_str_2 + "\n") + + logger.leave_function() + return "\neval_teachability\n" + results_str_1 + "\n" + results_str_2 + + +async def run_example(settings_filepath) -> None: + """ + Runs the code example with the necessary components. + """ + with open(settings_filepath, "r") as file: + # Create the necessary components. + settings = yaml.load(file, Loader=yaml.FullLoader) + logger = PageLogger(settings["PageLogger"]) + client = create_oai_client(settings["client"], logger) + apprentice = Apprentice(settings["Apprentice"], client, logger) + + # Call the example function. + results = await eval_teachability(apprentice, client, logger, settings["test"]) + + if hasattr(client, "finalize"): + # If this is a client wrapper, it needs to be finalized. + client.finalize() + + # Finish up. + logger.flush(finished=True) + print(results) + + +if __name__ == "__main__": + args = sys.argv[1:] + if len(args) != 1: + # Print usage information. + print("Usage: amt.py ") + else: + # Run the code example. + asyncio.run(run_example(settings_filepath=args[0])) diff --git a/python/samples/agentic_memory/settings/demonstration.yaml b/python/samples/agentic_memory/settings/demonstration.yaml new file mode 100644 index 000000000000..88dfe748d122 --- /dev/null +++ b/python/samples/agentic_memory/settings/demonstration.yaml @@ -0,0 +1,32 @@ + +PageLogger: + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + path: ~/pagelogs/temp18 + +client: + model: gpt-4o-2024-08-06 + api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. + temperature: 0.8 + max_completion_tokens: 4096 + presence_penalty: 0.0 + frequency_penalty: 0.0 + top_p: 1.0 + max_retries: 65535 + +Apprentice: + AgenticMemoryController: + max_train_trials: 10 + max_test_trials: 3 + AgenticMemoryBank: + path: ~/agentic_memory_bank/temp + relevance_conversion_threshold: 1.7 + n_results: 25 + distance_threshold: 100 + AgentWrapper: + base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + +test: + main_task_file: data_files/tasks/cell_towers_1.yaml # The task being tested. + demo_task_file: data_files/tasks/cell_towers_2.yaml # A similar but different task. + demo_solution_file: data_files/demos/cell_towers_2_demo.yaml # A demonstration of solving the second. + num_trials: 10 diff --git a/python/samples/agentic_memory/settings/self_teaching.yaml b/python/samples/agentic_memory/settings/self_teaching.yaml new file mode 100644 index 000000000000..046c1b263a15 --- /dev/null +++ b/python/samples/agentic_memory/settings/self_teaching.yaml @@ -0,0 +1,32 @@ + +PageLogger: + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + path: ~/pagelogs/temp18 + +client: + model: gpt-4o-2024-08-06 + api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. + temperature: 0.8 + max_completion_tokens: 4096 + presence_penalty: 0.0 + frequency_penalty: 0.0 + top_p: 1.0 + max_retries: 65535 + +Apprentice: + AgenticMemoryController: + max_train_trials: 10 + max_test_trials: 3 + AgenticMemoryBank: + path: ~/agentic_memory_bank/temp + relevance_conversion_threshold: 1.7 + n_results: 25 + distance_threshold: 100 + AgentWrapper: + base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + +test: + task_file_1: data_files/tasks/10_liars.yaml # Train and test on this task. + task_file_2: data_files/tasks/100_vampires.yaml # Test generalization on this different, similar task. + num_loops: 10 + num_final_test_trials: 3 diff --git a/python/samples/agentic_memory/settings/teachability.yaml b/python/samples/agentic_memory/settings/teachability.yaml new file mode 100644 index 000000000000..2ba70b3fa63d --- /dev/null +++ b/python/samples/agentic_memory/settings/teachability.yaml @@ -0,0 +1,30 @@ + +PageLogger: + level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. + path: ~/pagelogs/temp18 + +client: + model: gpt-4o-2024-08-06 + api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. + temperature: 0.8 + max_completion_tokens: 4096 + presence_penalty: 0.0 + frequency_penalty: 0.0 + top_p: 1.0 + max_retries: 65535 + +Apprentice: + AgenticMemoryController: + max_train_trials: 10 + max_test_trials: 3 + AgenticMemoryBank: + path: ~/agentic_memory_bank/temp + relevance_conversion_threshold: 1.7 + n_results: 25 + distance_threshold: 100 + AgentWrapper: + base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + +test: + task_file: data_files/tasks/autogen_package.yaml # The task being tested. + advice_file: data_files/advice/add_topic.yaml # Advice provided to help solve the task. diff --git a/python/samples/agentic_memory/utils/client.py b/python/samples/agentic_memory/utils/client.py new file mode 100644 index 000000000000..1c13e6616488 --- /dev/null +++ b/python/samples/agentic_memory/utils/client.py @@ -0,0 +1,31 @@ +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.models.openai import OpenAIChatCompletionClient + + +def create_oai_client(settings, logger) -> ChatCompletionClient: + """ + Creates a chat completion client from OpenAI. + """ + logger.enter_function() + args = {} + args["model"] = settings["model"] + args["max_completion_tokens"] = settings["max_completion_tokens"] + args["max_retries"] = settings["max_retries"] + if not args["model"].startswith("o1"): + args["temperature"] = settings["temperature"] + args["presence_penalty"] = settings["presence_penalty"] + args["frequency_penalty"] = settings["frequency_penalty"] + args["top_p"] = settings["top_p"] + if "api_key" in settings: + args["api_key"] = settings["api_key"] + + # Instantiate the client. + client = OpenAIChatCompletionClient(**args) + + # Log some details. + logger.info("Client: {}".format(client._resolved_model)) + logger.info(" created through OpenAI") + logger.leave_function() + return client From 5b3876f13579f3632a19d60ec8904ebed513a6a8 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 16:00:49 -0800 Subject: [PATCH 65/93] readme --- .../autogen-ext/imgs/agentic_memory.png | Bin 0 -> 52136 bytes .../src/autogen_ext/agentic_memory/README.md | 85 ++++++++++++++++++ python/samples/agentic_memory/README.md | 0 3 files changed, 85 insertions(+) create mode 100644 python/packages/autogen-ext/imgs/agentic_memory.png create mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md create mode 100644 python/samples/agentic_memory/README.md diff --git a/python/packages/autogen-ext/imgs/agentic_memory.png b/python/packages/autogen-ext/imgs/agentic_memory.png new file mode 100644 index 0000000000000000000000000000000000000000..8fc7f3a5933401b89dfa09b710dfc308676c6f36 GIT binary patch literal 52136 zcmeFZcT`i`*Dj0&X$O&J6e-d>QNRKS2+|Rd-h)!4S1CaVsK9~HdsLeAUWFhefMNgz z1!)p$B!F}M50%*i%7Q)R^PgnC)(odJp&L&prjDP?Nz@b7pFHWMPQU!QD)@^+<*1< zS*O4?v4(0}_e9qYXH?|Q%5F~8trn1G&rNKf)CeJBmV@-oDJfP@{Prm`_TW)xkB)t< z7|_M7;YOyMh>rzq1g4OXmYFLI3i(xT8X$_2)4z}F!hQp;*|^77inhsqU?Cb(mb+VO zf~KIbrw26Z8_>&w>$MC@7`p4;3krc1jT!v{#jB5ZMv?RD^QR5cD55|VLgxY9*kB+l zV28qSMleOkoo!C_ruC;DYRM919t4^va6QE=Gpc${^22PuKYV>MHW=LJI4M1(6STED zgfrf-m0!tqhIljYqmiK-C9NLgo7)L*xr{$%O4lW1?0l7iZ|!}dA8K~JaOTU#61y?c zZDjSq?Q2P1XK9MUPGm2s$?rv3?&$B@jkS(HoT~L|e**QrHa`(;T$AP1Wbpi}Q{XCM zcBDBA5g))*=Q02O(PrLe!`Ex6xHdox0{bF5^=SH3eYrvhyeF~5lk8s6sEt=7gX(oH zs*0y(0sPm9uW+`@nkkpv&(5oMl?i_cyXQXk z<7KJ`D}Xp8hyFA$QP3koC!8l^+so4HT^?&eE)QaQ;3L~7AuA25#tb$G`1Kpf1x)=j zDfG!#?!&p{L$fW{cf^_68W&Wp>)uw$X$cb~1svp@@pH5K``w(OsanM9Olbd#@mvu- zL*pFtdDT;3+0LAt#A|)d72|<}myM@2Ubuz6dN_q#wp-j=FNN+?+>!%2Gt2eUnDIa; zqF)siuV+G&Vx!Z1Y>QY5F+GI=v4jH&diVnaFj8UkJRwgjU5o1EX<4EH_`n(O&~B(# zOiM2cG4if^GLJTG2V~+?QERfob0cAb5AlU)OC4|AbB|^oZtRjcWI(}`282@(@xGQ` z?_4E1jUQJ}p9r8Rk^fB+w&szJr-N*4-A$jZ2c`=PDyuw&!u_|L=}Z+KLmCo^brad{ z5v(%)=Xi9`ZubsCrgG>+A&*?5QyoNeQ-d{`!F#clRw9wTlS8Icv6XGv__>LU$iQ)1 zOCf%n^lVfPpTgkjL@>n`u-*Kkq6bcy3^`|Nhs9dg)YusKRw*y@>7Z5md-fk5)1&FU z$LL=HLcOm?Sme0XIK4x7$+3+T4z=oNlr8|DER2c<`GGgwudEob#?LXu@7FCZ$_4gy z8OWqw13BY6co_5ViRaCr&D@4#!Zo2MVRkvxam>sT%(m5*mK`z8hpEXtD9D)bRl%|J zv2xG0@ZL?BngC7K`mk(ej+ug0fUg+BYXyWmpjr;yjl=96InVXm+ASq`USq=Th&xZl zsmaS0&pxQ0!^EgWdA>`oUxgQfU%quuPo|d@@RO+{mDSn|G;^zp%0Rml|2LhANE|OAT!aH_>ZGQik4)K9wY(CvsMgfAu-A z{t4M#(7spIx|6TsGOq~Mwv2-mmqS9@)2HkXb8jwJqLTQ+tSwO%_cqKpH#>0vwIdkbIVd_ zTZW>9{&&fAa5jY(T(6moRYorR-^OMGNC$W;GkX4!XouziD-6R*ey(J@%k+ ztg*|0C~jYPEX8gB+gAwBk{AOe1jh{UVjE{2tHIpi6wmwtPvWg2&@r-4qJpLvLTI7R zMidzTS>U(N=!{o=PG9IQ7`^UbxdZugysko3vO|l1`rNxKI_N*P#9TGc8JY5&XZPf% z$6Xauc#zScJP$QjHbtQcko`>6f2zo!MV&YRJ2Ml4A5X zKjxO=KzP)Nrk0Id(u6UCj!xp`{d*Q8IR@j-#sqv&Q}t;}LVqmYfkbz51DT&IwJwax z(z@#4tLSOk>{3!-ae|_bydVE_j|?GiT#&#IkfI{rb7dyzo#rbvIJ!!`%LU@DGJ!@m z>mfXlQ{Yuf!qHp*lKw6|!@C>KV8l(fRoD?VG!}L)xrRl14GrDE&)#es@~eX=dtH(0sjvct%$1J^Vc$S3>gT7amWsP7LpbAOq?0n?<5b zJr@37G|tIQ-W&VWY*a}h4Qz$KY?-^fPo{$&zJK)GgY7AFRF@g7l&1a!^trw}6R9gG z&+Tx3C9$F?rAh_OF7=IP{o+>nZIPw5l&=SgkBZp{bzdj=x1Tc! zu?_RL^te`d(&pXb?0_s!8~&KFZ26-1eG8x5spgucvgD?4I*ZYg zImaH1J^0ldN>l{4$bl1GI=q#1A5;vj4YbhIEfjiye}LHwGe;OGTv_Uu@#t573)^b> z_NPW37RKz%qqQRfRk?&${q`+foH>V85Q3BAu2LQlaX9+$wu5OrzVIvZTMmtL43yD% z23;cJFpHLf-&6ct8wfGy-ZFLIvill-4v=D5d~@+=5-M#PAeS8Hd9ekBRG(|h0lN*2IwkV# ze#5&;+qqxH09jFPbm?BXjTB|3n5FuC0GanTji(U$HlSRe0^?|nJe3h+IJA_dm<$EAgtwpECq17z(@GC~Ou; zD@%5!8$n=CPz?RAHs4fF<5i7#Z7$2WQ?0*j8!@2|2@l_~52JPnn@#C`4LDJ$Sj6*A`mK{2%e>kICZ3DFUtSJQ0IqhxHu60yunwx$zH0n+z|MeV z)k@!>?|y@);;B2ub>wgTlU7WNLR5TtdqQ9LG7uVX{-%Q;z5xc5;=JB({>D?lZ=|4@ z1%mruqJzya=n;ws*M4(#okC85foNRtw*`97fCsRPjpM)V1G`E<+;KhoU)==428uV# zzglu~g#Cu;fA0Oy9Q=q}7wdW%(#(T*xJcJ%`0?^6#RCvlIN<9C`A9nX)v)%YvJSBk#u&PI^o|GrR}7@ZWti znjdnybyNLH@yyd#es2w50{fH2IgIw|;~Hpd2H|rCJZFUMKXs-S69aWSV80TCOG~%T zxpo{_dh)Cc#mlV27EYe4O+a~Rfv@bxjZEex93+_)o&HY)juwPq@|mkOt%@yZ&-_@j zQt>kAXi=BNtH2g7tv)MqyZ>6E!pC^kcdLH(c#C1JzdNjt$iLMZWkCnD^B<rD8S(!8R{JY|7!JiF`oDa|h`iA|^e-u7CM&2|9pH15Upx&o{au`Q?Wo3e@>`AqYWBC} z38zAz^l#gb+#B@7)byBTXZh;jrS_g!pNx)icg!Q}qACc?wq7_y?lsTPUKc9toB&#? z{G}zJ8-NVotan+tQL}(o)p(YA&h1K^0r?rS$S$dmk(&?!n)qGgo^5@hd2A7dc;AN| zg9Wr1+fHn+q+)zdIlfE{Su2jLX3ebTK&>u)ZYA8dePZGB`LWbwH0e;te==k$@WwV? zFs&i-NB~hg7}BM`IG?LP{#Mf~|3M+a+id3OZ+q|DnQ^=Uiub5+y!1zZRpxpL{UbA^ zur@Y=r1$iDxZ+dQk$!9D9_7qi&Ef2!TQp6rZabKmt9CMH4EVxU zBT1|WoAc2k{f-{@m`+sM$#0tYpxLiU#VF9hS9{w)Azy8vS&R4 z9;$uYQ7;@0;>5SDeTdlSc04YTn{xm7ia0+6@R8u_lyZ_!r$rRog|gTSLr%6;7_JnOx)*Ou=*d+z zt+erL)_YDdM9htd%7$t;AFT27K#r2G~A&UdX6PP|B2tsKL1Lc^2Y@sy15FfW2Qs40tw zROuRmb%^ZxSO}}OZ<7A={h1}=VpFJF+v}xrx-OerJ{-hrk@H4OqM3|06$kxdB^ph) zVUu*i^9CelYZH?3qS+~6ubnPgX1JGP1MQSE^8{p~Pqy+L3c&{q`g4SyR^zgB-{hPw z`g6f2Nc0qQMmEv4(KG?ZjC&7+%*Gd+yH@Y%Y~N{nyJ%M#n{}_nQbzK+mymY7jrt$5 zk~_VBY%7s`_e!LgMn03M_N9>t&SJX@1PpPR{vT__%GHl6-zyxF<+ICcGi`{fJgOg% z@5BoKc&9L8R>N1mb%w67xEr)TwT5EwqpVy0faH(-8hA(4Ib3fE>!8dRUyojwGPfIl z;xvqnybX+?J>cNX8|I+>Zhfs6;r9HHM1{a6c6U+@uaLSl66K(PmUr5-SPwC!LinhuaUs?(Re8CdWY-Z>*{N~kkXB^0z;8$koytU~ zAo7G(7DM~+)>CYbcD6Zp3exQ<)Op!^!76#b5J{?A(saT*iaWm>!D#lA|m z{hK#P%=;d7c!!qWbE#=p^ZGbi(vAE7Ig6exWw(>gB<&<#s9lc3Gjr%`GIw|s4~1a6 zGqh?a^9N?*swF!)%lBLKJ`J&r_o|Q``WP_?XBJck-(1*#dYUOY0)T%?Sy}rPo!kR> zb>Pk;+|5=nTeqe{TmDuKOQQRs7ji>6?RfIV>GiF{?L|g}%ByE>Nn2(FraX|nUnos+ zRD8S*Z(g!ePx$1+6AI!!^R!i;CUf>)WH|18-nK! zzq$o0ARkx|P$iCze?*?nn@Xt0sEs6>G5dw$Pu_h-f}ITUxif?G%+E*0~rZ;kXG1V1_F@R^;BL7MMPhF%zg0y?B z=BFoCf}gA1y*H5=@5)z|Q78|HH60-jG_t^zc;`y~mu>&%JGh<~HF)meCkeKqu6aJn z1|6*xBqXmE4SNo>)<^9)_(M|Y+xlgKc#8i@J7*g!m{_P#Ii7CaTO~*@dOVLL>vli* zk0hb~vZn^i)VmR*ia5X1%^&++7{dwowwO}XY)54x8X1it41_>@;uDyHRCnr8;Z|11UOfwB=;OCMp!SRzl z-}}6@$IL3AO{uzJ)*5=tXv*yqN4P(|q5N|`Nwew#Eh`H5MA^rlzVdrh2~}N!kcv!3 z|4$Qz%W%^gKrj#mJdb|5#Z$~_sG)0BB`wxpWp2T@$U72mmUECUlMImkunqxk#q2k-(Ao(`r z246{(rg=hTY|?@-KcqXZ%Rsl{8$LYFj^y5PQ|1(S>&}-OcB1JLX|#w$H6}39fPd`0 zkbRg$28M5CvTp*2p#W^#H}lC{yUO+pmB@Vv6H-}s_|hHKY*uiY0bw=9+GFtm&~tin z&{%v6-*M$$V&#V#vnSla#-iLz7nnqs?%J0t4Sc*a7r`o(X1hJCm8exqvd}_f?dnM$ zBMyo=RDW4Y9dzosn^)+jnG7nSrC(LYqT_+h__5i@lYCTXdaZSwk&I!P!Q8uk$4&*@ zX<(XG+xB^JjhQt)4*U?2QIiHU?%UhL0hK+*_pp9Wr+HEFTEc>m0TqoPgr#iNyMfi+ z_#S^4ZEdo^!XEP?)9n$5gdT%3QTG#w$U&iiH=qtr{^fHx$iim6}Nfwo1)0L0*)=aZ9)mqy1E@wuc6zz4?&Lf3Rqw zcRKlMj}2K=nB78fqizb<>xinT_)f4`3LCn(rF)s=CN8Y6_>Td`=VEajf#wP;M zXtCbG%NnD7`7c-?0!VSuijb~N{OI@Q%FHd_FXPflcI6g%2AEzKzNVSwqjSva{WwQ{ z$clIIdl&o0`4odf;uSC%rC>2c@0=K{jPRlX@=mZ4D(q3tH@|&JggHMNAd5 zH5=#MW$qwcFB1&neU@tgadGk}#3Bl`BD4y78cfmHXhdO`Sr)W*`45z!d;cW~d(OZ~ z++-PGD!&O`dF;{9&R?L9iCI-1Ivs4CERv-wD;stziWS`IZ7%YPHci^x7Qy@-1_5|U z;Y?_4GOq|I@2D!%D+%7<$=H>b%_g%N7G~3SsH>wm*R1{}#{EzfhiG60wR1I;K*Q;7 zNn$q{Gm}X{%A;cUVL#R)8=7PVk6$(*uSOiSHtaJptuXA1Mc{y+xkF6WT~lyPvT*V# zf17ZQ8Gd(QZE>%kaEF(*!e^0&ZrKFqAwc!5w!Vvhu9Sz2jtic~HUNGhT!-(#b7t0akRF zUK87Ez|!Hbs&y`de`0$fAf>a37S&;qn=GKc;?o|D(DrP(iqvyn7Ge4sVd_m|V!mjG zY1Pai0AeE^qcg_xAz93yqg5%m=T5OlFF@nH@U z4%fP7(b~Pq;n0cwnoM2lFN7_}HrAH^tZI@(o^_xfL*`Z^% zjb5j|1_V0KIx5f2D^J8PeLbkMna$Z^5p%soSW29g@+S^foQPL7)6}QcOqE__%JVi~ znGe=zHoL==7oA+Ml_6x&Y4UC-zLbr?!)GHh%4IyY)67GIs`zU<;-NHDgYOMrgd6b! zUPefpFpRlJrC%FxV(y#&5z58NQ0Ate7HO?6^VNsVy6xcV`jx`II@~hl{TxohlcWlY zx;l&?Loi_YGd<6?L`5DNuvsGksRpkoZ#WFCxQ6*@x|z$&oYNKr+?Dzn303~(nqgk$ z4em@1%~ed5y4gc}Rn0=>!hyLwU=#$#wE(C%Y~IGbrx$6TP-#H0r5bG**OaUBIS+A3 z;JDdn0%#W_28d)j9M3G>Q(sSZH^b4H#9CeS`%vc*+;1Kyy;CJ}RJ5r&&zDW?EDJ)e ze?K{s?0eLbOnXwF>dI;pc$(wn6n8uZ0(32@26~VEHH$TP)e`Nv*+lH|(2N-CPEot}H0s@<)G6Hu@;jouylw~GEc4QzOXRpY!n7Y-)7E#4454-~ zf)nJSo@+oaTkvybi>8=^4!r_D7~HlvJ3jukZJ{RH{U@eZ_(dhctMU2lp!l}){_jlE zcV-`Y%1m2U4IQW;z=Du)`Ovv9mArtTF%XYpNdqEVR~h1l(YBd>GI8MJ)LaErf4t7N zkiAueB`98m72F;VEWC#r@Zck8skqcs@V(L7%?+m|xB(37wc{Ugez}aT!g=EUON%$C zI9pdfY+|@p1luuPq%2D`rGQB z27h4tZu#S(rGl%hU}T1Vsf4t?1Q0prq4etqaw~0LU3pL?;&559rEbfqla3T9YBVtW z&{}sV{oL)&Y)#9lZt5KAewndiN3?BMWUuR&0Z#=R+P_B0do5n;#K%-$nsE&lQ7sWx zku(X%j+k2=sV%w+=~Spa2JL3vQHvS@ ztg{lRJhj#F}!c>OhLfADSiDx!|1>aC+2H=a#3$!2@selRCMsjl4r! zH>|yfCIy;<%`Vzexzg7v^XqV8=fXXkyhdTyNCPGWChMGn6pjaGq%zlwS0v563eC6^ z{U-{YmSv@TlK8UaPwqFy>rIvn%qFq&6DiBa=7RD3_So3mhgYK^dM(G7pvFpb%&Oc( z_6AinR>@!Wf~!IIMdPFRLjTyuSKdzU;%ys+1OdBmPrxrr7E~#v)!)HAK;JPvzwa?m z7oZm`h?rS(6bM{bq+ytxDf5Xj8PF(5Wh)q=O>^n!8E{Zg|}9AxdtQyNjo7>@^l*FR0*i9uLAt2GTdUFxq-`CdL| z#u~wQ*XSa$xhWKIu|5F2?C(T_J_~^<_AK)*0$sIqbkL~xsAE*M(o(sWmB|T>3HigA80x!+7Y&UN$_96Pml+EDog;|90_5tHn%pcsv=;ojRM8~kW%!7g48Y`eqraWmv_Ne z?)LkBPv8!rTK#XBsUiqowSDffzWR$4583|rA=_b?^;}>@|Ce0VtT;q@I&VF@h01-I zGh}-oBYBJCA9RY|UHsgr=E#fejF5b5tlx0PS(sk@DsCYksVR1B^v<~X6n<{TalVP+ z^v&mrRoX!L0KkzQaWdXu0_#ubxvxwF@Lt)CaGc0XSg=h6$_4(x({voe^V_(|^=P9t zUR;9_LOe7N^CuJ)_fz)$tiR`_1AJ1sfh)l_Li^1^gyGqdLbYGV!4M|~sbAL*kt6kD zBvoG{pIgKrZoP1lqtnH{xGC)#%D_V%NB z2rICw`iXL!vVq`o`(WQPFm>EnCphL>8xIrG#oHF0+x+`r6GT{q?LRsx-LBonfYENn z`lmNQYXJvAXr@`^JLmh65gchVB}_E9C0zF438^y-zQvkUs=emx7!M%u`KNM*N(7@2 z_{215Q$ws5;~zyC4CZi9trEhfiIsQz_WX3$5AsFEd*G5F|C+gX&$n|e+uH+Mo=ld9 zlrznRRa^jf^B0>-cmRD18t(A`zU z<8^jV7TVTZXG^X>#X4+B?e~TV6aX&ZNQOV!pQ}Cubu;SY#l?zaVICI!eG9=(^3VUw zWM&WjD&-$Ks4-sE8y4cYZAf|s1Kh<^s4TCny$Yh-H*+>*ma!;Aiw2}$@XFVntVv9V zyrpMt++dQ*R#y*>t`l5>5jp-=qP1wqx79k;*8vr(K}lpmAOetOgf?~9;%D`Ayo2*L zx7-PBpG;6(WW&Zo0>N6DxYSx5G{x(>vLmtVH&tWb$S0)mS8Q?zFx)l+H~8#UByA5G z>K`e}oVWbYDc}(HFUX7aj7N?by27d{u3G3uEe0CD6pG zmE0$+2J{n0A%L{)11Pt1`FEKCn@5B0L_~XLA|3_^@N3o6_21*-kBH{L6;517-vU^m z?|XF4(`XRfAdM>Tx{R?Dz)Z=1t0yuJS{pD7Z3ZJkprtoFKH=7B{xoi70g*o zny^bClpe4GHRD_E&soM8P`LKS%>5_(H(3CP)Y1U%#i+`DbL4yO13@wQ>1v0GUA7rx z?nTRB@LoDVeaaGL{8Y6_naF&CQT@%YQco{wwyP;)LIk2G)!o$(qbno~`kx@Amy7*bLuNUeM^uTtPK4&@al2cY7p^xQmGNT zUi}62iNW_d6&*bxEhl4PdOMwga4i-XLrIXnuUf&fBY-Q*`3ECJi%$B0gX@A@P*(No zZm1DOX`Y>_K{xHTt0DQHo&&Bzb@PQR=zGmq^6qV>0ax3?ES z-7@PbcTM{khU-k5fE{NABMPC`}w`m6%~wxCFtE~)Rp31;a#DR9+vx^Kl%^e<1Tv=mwz zz94HP%($ObjY?X*Yu^D+yMi>hacINJ+wTBW46vRJi~@;@u1FFmqH4-h4@}32*2nyG zXhT7(7IVx-V^qdxBYH?8UMnFqfFj0^3u46pR7}gD)Qd(Kf4liIkvj?sKTI-O>OeB; zH*%A@w$ldx&c2}Cn5rUbz@7W-Qa6Rd_x{6fnQ2~XoeqLL~HYK$+M`0Ut6v5K1ffyK>pLspts)?v#8 z_r9vG#@HGI)%i)218UK_`b_Ii55uE^?%&wCfcY~ZYo{s@ME<290}!`!thi^*M1}#E zL|@_UNG}#+n;oR*2xBFm=1h&oh@j}n0U>^fTVYgGG_58`l(&021?+D#Xs=O-FEm&* za0&9y37*eO!)RY>r;Jv=1RhN0^S4-BxC$n2Pv36tk3KC*wpgv+w=1CivMVr7R5LeCF{&Mq>yA!iReXH3rw_HJNgEn#a)ISEoMPx(b$afTB#&a4L1DDdE&#xxbxcC zl<-T}-47)C*$vQ<(gq{N$`kb|;rsd6ZM^3Q@7J!)v-y40AM?AjYI?L&z2xus#W(L_ z{WQ;nlXwG}JO-l?i8XR5&AO`P!C$NM`(4iD+R==xj;mWf;a#~z$YRikL(fW)_OKVl z%Io13de+5mssk-W<4Kdni8UF?y{e^ET5-{OXmQav!EYWtH71#DY-#9FN z2lr?+bwin;!!@P=Ztv<&Hp*p>72bW$ds`}nDUY{VkvsC_!h)JYuue_Y5yQQv;e;sD z!HR&16Y!;+lc3eJj1+>g?qghUs-@MWczDJ z(&^A1(+(A0i3zE1Z}*qlFt!QrDV0=9?xa1zWfDH5jb6a=U#~}VWPXhF3`!FlOrHyv zk2LWg^&9O&yhchf>I6l^2<`osqTAcJo#P-WTf1bYBY7AZtsBE5(*EEnd2^v>6jl?8 zS&qO}X@{1K&kgaHPDuJ&as!Gs{T>8GY-MVM-PSV@6TlYdzTrk?_!+1yzjjTQZ z_1%&FoaC{sd9RoUQ2r3`uAD|sw|?)OWc4W>hbR&P2Wnoo9m{IhoS%$$_Ge$N_7dRlU4w7U#@#1 zK{_Pm)f7%(>kBvz-pt;dPdRP!7>l!9Oh#JOFDI6$p%-kD2Gfm)qhK_3QW@dc?lW?a zYg+_{qd@d^XRX!RwS3;&qE4D0aom1u@?KgLNSt_PLC7Ph<|*@x`3Z@f&y!*v^|orq zrCFG=0C@c;R2q+_BtVJ23YcMJ8|2$)G0;VqlqY$r3G_SHh}=)FcI{gu35eVTFgI2q zlTYDpP`p5`T^w!FGfNZuoUfDT{B!Ee7GaY&c+;~0i}`3xUzjuqq~ZN=5tO45(yweI zVmZ`R4&4i5n&+KhV~85MSito0VR0iC=#od6D(x<|lfD^9P8@ONesE8{gUlVEW4AH> z*2K*@fTTNb^^6sxE#3vKee_x@QOINfdn9DFd4Sk2;n-*%2i0Ug-|A5LBFdIOj$@=# zE53f!ga@+HCS)2t8H&CC6*&c@U{pWq;CY+3o&&qT#+C6<2-t4;-9K5~DR)elKv@l#5e!^DS&gR& zHCI^c0VRBpGLF6us-G^ip$36RiHTKc`ek*$AF1dv(wMi^AsiTN`1iq@<<6GR5-PhI zIM*xe4OmhZqX0w{ZnaeThOwgNz%n1cUB}P{O)i(BE<^3<>9;80oE=VWu>BTtM@O zsB8swsnI91z&j%|mIzN7QKOYJBJEe`k?Y?~>Ll*;d5%@65i|FScH$2pSR7gNDf(&g z9V746k9DQ*9aF%VlwuKnD2S+%R;B2f7(SmW^0M{)7=taq?QKhl$_yaKIRWVQeI?6} z#d5~;2YZ*Oy)5w&XqI>rSy7yF@~ncL6S}+o7EVxKR?KH&@mNAr}CuK>q3E8;KO81Rx2=qiQ4zht7cg?>mY* zuWV_Id+_5s;(?N_%Q00-S4qhq0;3P6)>1qHT3Cj_GypkrZ|KeIiAM~F*=Afr@fL8< z#44 zT-dfcpLuW(fTRDOAfsq&)2PZX)JczPdT8m}T@kEIv3~_g#|5bmXB%1)a-{N=!_RBp(o~ z@XwCi6(!=J+zipjN5tAQAOs-`6qV}waUr`nye@{VHuw3ye(kqWy7oQ z3q*@JyHNr=z6RTF+5G`z`MocE-?0p za>2xi7_Y%s5;{Xi^Oi5e*E<2i4hw*l9zf2~qxEdEi(!*lKjuv(BbnO_qB4QEt!L%tO{0NQwU83X4O6C+`H?#hTTF^^h{cy{G9?dlWSZPSOr@Bs8@45<#TV>@^k|`>v4E`rOFQV zYO!bFscDW||ar7Gxf zATQT{4V9GftzF84aij3_kO6vu5+^7yP!xVTnG>0JL*p)1D_IB-;U!z<)f23f)j9sL z+9N`90Gl4wxYOxbi!U7T+buKagxyQGoO!cTVK5ivZ%VsOMM(ah`40enl+RiKe?Y5H z{^;k%Z7WAp!#BVIA$a}+b%W<^ZZ0e7TXLoX3|}LCOr@$~5(=$T1`b~^^jByQBNQ>w zVfF50LO^u}fu&`@ZJ3w=HPgzh0wxAK)a7OaV=1dVER$`w&>COeLOMUf9m2&&8oQGJ z#EgVf&M=Ye5@-m?-&%!K`H%kj%6~IA9k1&4Va~L+>XGuXRW+Lpab$TcHBd{pbJG$N zBV-)ME4@=GGMTrL3)J{K?&+?rvZ`~pR%x_HgWR=oL50NHz5;vGf>Un`#kGCMqwy&s z0OS3S8UW_Bg36H&QPATHF}!|A}nxKl>M1_zRBrZM6CySE-^8& zKtG^xVXr>KUEN`hD5bx`HRudJ<1do~WgO$$Px4HtwjeM`y6xR+)Jxb8+hh$uW8-(}Z{`T@A(@y8-BY>FI*+@ScahL);0&kpQ(@FlI*4 zv?!4A#=?B-RvAfu%$tOXsS4}#h5H8?jO4vO!MV0(eXVJQKm!ZGWs|6eBl|b=VZ%^! z=Vc}IEctW_;4lkv*~AkkZ4uTBnMa_`m;^`W59D7)#MQezTuyyn%c^1|U<$rD;$aP38-KAT5Ct7FMYU z1{}X;UgtecWxk-w={7>HIsuS+lS1HQ7+Ug708bLbprt2&VY{BYbZ9>|I9gN^wmJsL zCr|+Wp=_2PqMSz-9@k^|LICbAJN63+AO_&80y-~QylbqVGIZ5d#yL)Q+mLaR0wN=1DLpAZzUR837oi z|5StWf1c0y-&lB4tb?aK4<^@jd`Enae=iEECBQ)sfZ7kTL2v|W9W;LYu6d9P{0@*v zwLqOj(se*asNYIOfrR+}zpABL>gLbRQS$g+F{RDie;gO_qozv07&rrJBLPN@tb;z> ztMLnH`(m*(t;p1aWb2BRo#2oR)&qE-Ho*`oiuYZ}>?d9rJFeh#Gi2ITkFQCu0;cUk z-Bve0y!o|0^g6(Xf1B8Qm*3O-J{6R$`HhnT5C`pKnwWWTxmEMbuAt??=!cU3@KVVb z|22?N3q}2jC{xrEug%^L? z+3)8@ML{2;YabmUFjbR(qkIOGvSQ#SsPJ1`1CM|IG*+?fhJ`VDr_5Tp421g+ZQ-R?FL#n<0%Skm4(VC>0=3$IB z|IbGO%A@=H2b|e|tqMNxIbeUIeE*`gW`j1hF__{#4Zg6>1#zkj!uHz<3~w^(6>BUt z(e;Joaey8Gd}@cb<&Q@H_*bLhH$?ZjC)2;TMDd*`b}#)g`Ty=IFUlCB9rSbj5*Fs@ z`r_|Xh5zjY2Vndh7IO&;L?iN?AN^oeUk7#8``+)4-%h3=&+4iPU@d<>dab?;T3YnI z)!)yB09xg=m}XdT`Pl(ZV1AX(?4gGs!|A2SMq&WFa9L} zZzJy>&d9|4oPjW4iOs)v#=z=dlXdXP<1QiD;l~QZ>6OC~V&!BJ6BdX3=9 zIknw(NNQVaJ6LWTdl$x_VCIzTO`X^Uxvtg2rX4m{5H)n8G5)K3LeL01x$^?4tD4L zFDj>#b@#_vU%=pK`YS@Ei;YoMP)G0;Qj6k=o`RN};jH4YBI`vY@eQa9Lcx|{0 zueVmT?kzdkG;40oE!!O4uW-i!-QY)MaQ~NT+o`H(NFBU#E4Mbvm7I&{fX@XhJ1ok; zO`lKf;-9a5DBsqq7?hx*h*K($0$Hx+(WbraQ%u{Ls#ZZqp&%A`pcvBAHtW6r6x);g z(TF_Lx>E+uz&c8Mk$wH6frBDI1cM5nX~NE_6at=0Jc(=*?FIla!ldNa+uGiMh8nY4hyW(F^Il-UB2+?QOaua?yaHt$&MjZ6{4R1oM`(`+^u2<+G2^WUjCC+ml?x?S9I`=rK707Q-NbO{;-|LR# z3RzH#W1Ayl?GmuEzHWlq(7d3X=}7Htb-#$c4MZEe+($jded(Rah5mfI<>88(vPh6k z@Wm~w71Vlb?f$}Gv0e?Xy6vD@ZYEah(dMDz;ih5_(R<>B_p2?M{2BcqtZj^Vx_IN5 zOL>!ob@Ph=9PEl8s{iIw>9-u-7)Fl{R#Pkg%7%3+4&Pf|E&1cmpD2d5l9o){*yOm# z0}sQ+os|-J!D;gi53-w|i2y+~GH3RQ#mDhjLZ?@KGvjoqH+Q2J4yHkaG?BhK{E3Om7?{y0oV;~c-oRr&Ufpckgq{W$+ z#~5y=omL>!*}r9K95%5OXXZLh6G=&T8d?n$5H5VPOk$nenMq(c!JhtQ?_j$v=l(UO zOY7*(`0cAKZ!gi`%|zn3&$-tpkybrJm#u+UoLVeBTl`jDzb8$+ z$pB1qnZM@uQ2-Lv1tagg1Sfw&9-v)UcFCiL5O~_XZR%U@%7n(<1e_dtrwk$w7r%&h zYLzvr6Ow8C|FHJv@ldaC_;|}{FI%LnktM_wN(d$Uh{)K@lzj==X6!1;79vcxgzSv$ z%ve&EkTtSSLUuC@F&NA587=30KA-db{qz0)bI!a@otgLYJkNdK*LB_3Eqrg{nQ5+B zNpP)P-&FD!;Orlu+LUD&|Xg;)XOtDvRRo^=i zoOZ!izKw&GwgNxYOfS@`7RiV39!rlh>)c4)M?HvM)Ke4v=Mrrqa2%D)deF%pVp5@$H9b;Fe0bd+&KR7-s8hbn&(>^dumQ!TQ^~M|F}$Ht8`8+T(>y9MD8SCCR(Wd{}nG zDJn*Ozq+%z>`#DulNy56cwy)Ola$oR>9PWy^eIH=?|PcqZQ1^;8?qbybmhZ$P)b$YpwaKJPh~I zIjoLVmz?0hC5de+eb||SwG0~F2rJw-KVX%7Yu-r5VcNTYT<5F%_=)oOM}_LYtb8$Y zY}kb{iu>nm%LN=NMxWNN-#%@Oets~VYAtZqG2W!ABrp%?vcyk;w>fxI7j%JZf~5wt z???MKAn6&(T9qR>WK1a*O^*xb7Moddz;}fi$IeQ5kFec5gUgHI1;F_wcwWivvH{Bk z{RS0Y`35~$sCP=L#AV<)14`WZZ_(yUhMAyCm{p{NJLaa znV7|peR)`ZE^6WH*SV))i#(!g5tu&$^&~J!xo)@HX=1|>e}kHKG9vW2DPxYfA6Ze{ zQkdZU^Q0qmBis50TFs*O=4FH9#H{$*Z=S|E4BVhAY@qp9z)HZY=@v2 z-j@`D=cz~5gXL(#VDeePHRIepi(s&_idhEwos8$P??EDC^KG|8qo&V)fyy<0S8Nv_ z=o!MCtdq^7D-P7njBS=D^Ou5X-rM2MtP3i7Q$PUYZ4Dp=Gaw|=E+X+PVtj@t#*7g0 zd$JXBbc&v7LrLAI@-BdcO5T)8L>~JNS{sjm();o*xGqi`#5|=G!NGp=uX_?_jXYAF zw7l^~|M*r=rau7|v;THohw4LJz}fAot{yO6(N98n?_UtC*(&5tjk0*JKYGrvI=8D! znUnqG*})9WhdNZ}8J<(r3Y%JgL{ZD^lho233S4vySa2F`4~oBf&NX1qq{EW>J2u{^ z@wO_MH9T~@f2KjA0%KBk1~dp7>-8$z^odHf6}zwW>X`x01^L6lyAGPqB|t?&Rr38s zt?j$7orT9vf@40Sx<41E;mQi)!*lWzBNE`m=wmLNY|~O^XEK7ru7XokLw>VAhwWZd zip@dNSi<0d$jOXhbp?+gbfH<3XNM{<9N?lF*izRm9YmFC7jT41sib>kWx8wi$_LW0 zGZc60bNFqNUcM%&KC^8I6;!TSiYcg%4U78HrUHJ_X9i;)XYGv-<3Tiru}*P#e5aSN z9~4K`(<|4%c?c}>qAlqlA?#C%Vz;xgQ^EYN4i5$fsAXy-F-$4Q6}UCDB8TjmhswjQ z44$_Z!|H0vtmw#JKOUO{$_ID<}s98(atsz{Bj4lyv@V7DE?rbmZ3UY@3_JT;cCR{#$-J23zPBm;rw z@AUA$wg#xe#LOC>)h}@;PMjD1_2#|f#BHLCKxAueIe6qLHC}U>L|f8BLIfX`fhgYp zW5k9HJgcuR+Mty!L|~{vfa+;f_pf`84no>o43tw>2^YAN;m&*g^sig6>B@tqDTO(E z%@-=)XU};cNvc1I|3E93INicbhYNv*MB> zOwEf}X*CZ{8~LLg|F#;Rqk(1eg}=R7|955m(^T+(*YbZj7<>HZyV?Jr7Eu4Y760jQ z_kZ=ny%#~h<4HLoK>txUJISVl%=L5G{#O}jd-}(>)2KPI2tHqU$c6gQGLMzvA>isE zxT78kP$zL&e@>P;2Mr%zxhx3wXQT2+AmzkBmx-*4c>~#@bWE|xD!Fb5XHZMIIfe=Y zsF}xqU*L?)iW;!Ev8xCr&QF}uJ&OR%TtBp3NJ&1A?N{PG6%~hU8Ns$}yD?&;S80ze zm=Q_r{w%zHPcGmCW*fUy61@0GdVoRcpJC1iVaLO!wE9<$E4km}s?x zFH|QFQU4qo=$lYqLz|F22$ahIZSD9m#JU$6WTV4fq{||{kos5m#E1U* zGpOq|^t5Q3?!R8nlY&{rekTN=?xx*EwBkz#75kZUCZ?#-D`+hTu(ss%MEBtEDbrfo zg#3TK2nIwibS;W5oB|vR`$@*BKODMkneQQTVv~WG{(9P2t|xt$6N1{T`3f3snFrp) z|E#+rp8Q!}h4GsE1drgue$4D0ylP^4Kc7gX?h5E8LX^$a=#EW|E=c{Lz{aLh4RTHX z`rnn7r<&`TeGrCQJ2-29in^jy4SS)N#qCOg>klSFLJdpz*R4e}ZGcMYg+4OvI zmw)@4w~rO&dD^Vn?%EQ;sh=mQpKUn)6ODL%${e=ReY97EY7$gM{X9?oOpRp!u~zdx zrp~4_<$&uy@Vc@Yd>;7Mdok3q?}g^->^=mSJK(br^)ofr{>S-L`X)XHt(*OGd1q2T zXZ_m)wpI1Yg&XcJqCN4+tGd-CYX5qxc7u0Ty48N(14~KfZ2FYXO~cfGd+k}5?ejft zN`F3;6N84o{NM%ja7Xl?|cHxZ3TN0;HbGL6Fexdi0MSSV}bpulnZ+ zV5bEZBGc~`!6@Ok@i}?;Q=AwcRKC+7{tA(k7QFeN^XMk_giui^hqlXo#|5oG0!V=S zu1{Rw6FCd8zhD04-&M20g2>-p9ADCOz#5|d2z3n!=*q5tKZ#nXCS%9LrQ7qdvpJO^ zvCesK-ye0r5z1U{K0WRPsafM@$N*lse^AsY-IeZ^pNqru4XnDv_V)|gP~JD5o%zan zD$zxw6Q8M4#Ct;ZCBScb@t$+}#PJ9@BtWfptY|}{4}$>u(=z?JL=Ct&%+HIf*IVh% z`Mq6c(H(L-!+5to6Nhp<(gm@-gg)t=*Ds1V~FUjC(zGl^q`}=EF&Vm&@yL)fX zPayJ~PlbCfrrFmJe0`d`y|(#>e1^8je*9<~rm`yQm3s2)wPXlmZD>G*$AVA1HLzkD zP1d#oL3FEbb^8;!6q*22MNWH7mm8o$Zkw=w=m54JphKq{29?#EAEcqc5ZX z`TKdEDT5XQAWGf0Rv;}kKz1T1z1eb33CZR0lo6(y=W9MfKM$RC^RHDN4w*^F9K#Ch zO>~T|VY7Yh)+(`Tx&4?W>!9~?+fen_YVud<(}>AeUPPz;2ai7p4_wU~s$_w>PCP_-rXw>-CxfV^UP*rq&j2DoeGuwGg~ zO8gjgA}+<9^C!`G4tvGn7Te*(;USR+RvK6Pl4NUgrQL?JiS?|S50aYL#>3CJ1xPPp zZyGn{l2L)jhd#4kEb$+5{7B=laAOE7b8vf*Aba;%`P{=cSv;E9KF<=_e7(6oWQmNu-cX;f zwOztfsz=G+>T{ZQcQuxhWzP)PYsw!V!Q$d(RChLfnmdL+jnte>l=ITq`%d#* z2y;pfn$vM;@b}`gO#l^jN0R4kHc<$%u9k|2we;1jmfpRR0Za~wD!x`SI8v?%mfnRiJNGp48wVJqy( zMkG=hj>?jD?|CXsxZKr6Qz~r3$2Kr8=ZWqNJwqzFH61H#9jp zjG0Y1P54b9CK4ucUbptf#{k3Y98dbysr*LTE0he%=%ng4#V@M$%056?L4`$ay5C$k zEU}7BE%05*z47GJPgkyHnWyB7GJJKOLsU%X-bi!l$VBd1h z4dQ(86z+w?JVRAaZird=UZV#k8v8~!NCnI0y;v8gf`0DkyPV+g zCC}hUompCT+GUMfURI`t08bq*vH7eWkq-l&-#^by=<>xK|Bm63J_L%IE%#lM8dyob z^xh3O9`%ivvPe%kWMWpfu(Dham3*j3P#`E0lnBZM6@n^3jQ}O66WYpdJoE$_ZBE~{ zdyA0VJ*D*$$OC}4je?2CwnD&Dyn<@$I%Q-XODt0|z3wMWdM_R{A2d6{Pk%oDd+MlKle)-iaR@7SB2WQm@?_Y(5Ahu@p z9LDN{>na2%me!||51C=2SwAZ>ybkXf}J zh>Q3`cnF;GG)P(+6RW$*zGZ35?s`aacH_hFE3{*h`;0xK_+5g-mV`6q#CECl_Z)WP zOnO}9xDsJpqh|DOpPM&7>T7*FF{(MU;wvh7=AZ@`XyX1F$9f4@z$)qZmX*>U<1m~$ zN{k&bo^$YWj!xM8bb^Snazv74Iyq2~qiP^LqZkju!OXX`H4mgmMkKv;tPE}6HZijh|YHC(6%X)NI|Cq}3 z-|6+6o7$+3{k*i1C|j^%?PFN*rGQ}VHHaxI*y`N2t}XE!)Fc$F8c_;>UQO$E!Zms7xCjkM)A%jwG*%T^sWX|IvFtGUR$)jVYWYFP9m!#{0?*|Nz#t8vm> zKR&k}jXcjnQ50i<8Lp&=Tn7ic;edS>gA&7cknr5YO@xLk?kAUB4rPYWV6VpvHJ!IB zs4fGQb5je_$(AZ~{}~CjIrr6p6U3puO8T*ZLr;>~h!9e}s^ZimcCY-YRgRKM>HBNp z#I-I;rC1}%nOReIV5N59nuMz*3PnhKL~K0pVc1?X3`xF*%JupdBaI&}x2hP~+@>cF zH$@yW-Jf%FN^9!Q6l_Xw%3#W9%47=BccX87Oj?76CmeI9DHnAVlYmLC&QQEgFd@JR z2=XK1p@8E$KqKU3XKP_dXU1tJ9UaRqrj)AqS;B75 zQ1a$?{_L^z;hI^OD0$$Z7#k8kQ$r!Tu-<=rqXC~^L%}a#n_uvk_f$}oO+PuP9*z^b zo~>do_;-geE}0Q>X~f5hkTD9)*KT$n0+Cqa#O{w?X|*!`j!6)^nAKS^RcSPQv*7E* zsZEDiuhkEVM~>_X3&f;hGBI;9P=_O!VoW&(hp8QWH0VF5SgjaFq{Z`N5w0T_`ynL< zeoH-|lr#rXBl}WV!{d9Q!exgWAKKWtH3iY18%EdsNUz^WRf+wvFe3Nz)CR5Jnm>gU z;g=`~50{)gP5zv`ExtmQ7d0P!|HS@l-^FC_rC{9R`(yZ{LV3^i%V}?w`)sZBl|xs> zP5*4L=2t_p7mhb#I~iLK9mn&MObwu`SnoTio>JT)m_Rj=IQvv?xDUUyR!aOj<>(es zB`G{DWrk?kd%>397Gf)5D`%^0t7)rei?FpSBlI%hl`}`Wdrh{#si20U4gg>I9ThO3 z1k^4+y(znEtmy_%NDRN_o=Lresyo2^muPB!C54E?1FmzWUP`#yi%jz-6bQPlFT?{@ zQ0O`PDoX3hh_?%Qo3c=zE!s3W8pw&PU2A{{N@>ofx&or-f^@hg)ywr~{?{u;8aPn* z3_?v5kkh5UW9gTmt7l3SJHx)GN^2a)qOCWdQ{XSZarjK_CBILO4cFWE{E)4qEy~u< zHqbW07Hyksn^{J%1Qs9dX5q*!D`NtmEaf!!!Pq~0%*G4uIm27Ny)l;=5_UcpgSG8k z$&kTYzj7ZtQr;q%8;uWtqoJb<#B886I3@`7T%9*4qjU>oCU#>;@YKkvZ1dZ9xo)@s z$oF!!_n(A4o$$$h`IdzFmoUuobQ*qDra1YP0bWLlRIk*i6j5qcYE^1i>Nqr+t4O{#)dh?)iya<(9`Qcs)FD6j@OKNcjS)5C zhBQG2Krw>{30pS?01$iA&P24>Pkn!(sSy00F!a_s;btI>z3uwn!OdcLt2^qhZtMqTw&(6Mc98he$sSf(H=hN` zp|ChP@s~`>Vl(~B`VTE{5{&=e%3cvA=nu;C3mfgHiA2wtf%@v>Z6|*2ZH>tKRzN78 z3<_Hf3b~eg!>)HLBk?$W6NgtG1+ZQOLBSB7rj$SqL+zj-e;TQeTN4n!Le2vHs`G%Y zC<8>-iZ+^wB})$`YXFj!@dZ$4@s{)J1dAc(`*WUXYW>NTpi=SoET%t<58petdaJ6x zG{zq@{3S_xRA0hdH~H?O;VtNWgwkF^c04my&NX@3x68+DA-&AGKgeg8so24Jd3$lH zUWVqkt1!6iQ0~GM2LYCwyrIlnd8uW^V0oisxW-aLMbvasN_bg#%8i*{=a+w#a@&k4 z3u2xC`09D-@K4}h)3Sgp>PHSHD2AS@Ed63A2=efiaxAb0& z__!I5r>P#2fVVpAzv65)>>YrcO@R}MyyN&goS!tS?(Vy8F}>a8^M;~Ijv+$4I$)29 zQw}KOSw6WPakKa0mwq)?E1rZ<%fVQf0-MM8$R~6cXZb!?x=aqaMjfp$T~PdLJ4UDH zPZ%jmjB~6VZ?7S2)*myHL~_0@oRg}#b1}T(lO9p6>gvAhWz(`aPvp+Gn$KHS3(d}n zmr2*i?~H11c^|qy{>yjwjyNe5)U5o-q*>v+2Wi;!b=6d`*2Yn`^#NGXt<5PpO(woE z){(WR37743Rp|jwyHsAgm49C`f+r|IRk!cP_=t|ESdS8`{dZr=(1>Lj9yBBT(m0^g zk9&WUDmUo>6UMA<*krw#QR~k_hqoY_8dZ$@uGdVqi;B;)Q3UCBrP?3l%9U^1>fEkd;_dVTtu zEkfeHE~f-Jeoqhz)I1yq1R4%$tzLaNx$Ig{j@% zD56~Q-CXPGDCY-seF=z4z1n061$;%0Oot+MZUOboH8QBu;Whzh?W;N2332i}Arv2c zanI}i;hh%7zVY_Y`0#3|CdGI7Wm+WU*1q4HyCGED_EZWmfDS%z;{4o6IW3K=qX8l2 zD%rc#17L9=0vxU!vH%MS2`^3UZ^fn`eq?-6#4KOw;Z+wA41G;qJTP4Luh#ozJB2n-!5Grwmm5kO5rPx)1xB`qD? z6Bb6ClO-oklo5%wWPT7`#5gVkL-8!;8fIx=_}vg{3M`$^3GGcQ)M#mb1xlsQ)f=<9 zFvmHtegwVlgL7C!Kf% zFh$Mm<+lqp6ica2OzK|O);0{!jF{uss!!>ful{jGTLt~Rq;9vB4_jcr?deQ>_7V>P zb4WfNnW~_ScnguL_W}jkZqkqm?8c(50AFST{l$}f>gNq+xI9;8W17zvpiPhls&PL6 za@8cW{{4mhspAIVGzHY+39RHmN1*iC;q@vQygsvd)|31`BiFT(4s->nAFE2fw$zVPHGbu|1#(X(+0++!>#2kGYA-nO> z{%zesUc?%JKKj(*YX~pGWpWzlR@V2jr^nA|N1iPNzIWojx6%jd`9uzvCaQRUNwvUKj2)%2@;qPY6~~K!YDR#xzUmBk?*luycB#4O z==*Xz>mB=>qmeS9RLh{BFlN-n3X$tB+71u!;^FK*Z@lLBXCyfM#bN^b`>LVUbV146 zKQf6A_tQK`Q;wk7PXCJ8KkE;SuEA8Ni&O!@gN}^mqv!e12wx9Ae9h9&Eusdz^EoqK z9JJ;TjMKofHHFEBfam@Y$WgR@$Y;hG06g(UJ@zbu4noJkbP;XrtU6%Je4Zn>pax5o zwG2Ig{Y=+|Oz0r#11Wu;7)tc#;r#`}anx;Znno#WO3K8sd+l6va?I?a5PyWcJfbdP z;tnirf*gJ3jEI0%y}H4 zzklaHgV5UE{|Z83B@~k}#knd=CP9P)|PElMH@Xdj9Rri|5o_E)#pmp{<|{mUE_{7uXdfuF}09*#eB ze;ymY@E!9C-_hnhfvJab?J*A}7Ux=#X^GN!ztvY4n%2wN7dX78xXP*18fuOB;FE7c znF~Q29ezkDxQCs24EpQRg@U)Cy4(j{ZAJ3W*# z(Bb=q=*GyD?0!#bW^E*)O)AM%ry^hg6rXpR?7BPj3@n(+XLE9OpAbhlx2Bb|hv-@Q z$xU+GPGiM{8$*JZVJjdTA=Fz5-e@BEb8f!=UWu+RN^ad}Poe5SX3N~R#UHLyVSIfr z0o%V1295J@`Bz&HB5@Aw7Q`U>7!*Qf{IyWI+my!DFD6?LMy@wr+onhcGpj$;RVF_n zMvNA3{6!8WvU)*EneerJ?HtpmcAV~+ddY$=Q0mTQbrD}mKMPv^JisIO-z%NgbQK&I z1y{RTzk}@@zoAX9IOGZ9@5#B_eHkG36wgbEe_WU!D6aP3w3VALTS;a(Jpnwb0J^ANeJl5IptTk3kcM#Vs^84KpA^s$4A^`G&yValF>EQayEF z!gBkz#Q21&11azr!_YzND?RPKUwetY+islGzHiAYUR^#?)7#MDPkPV}rOA^Jg5uJX znT$Ej;$So=SUYbeTxr9DG?)m^&<9gVD7fhZljOuBREfb!$cqi;uk5JdOInrOoTYwKluDTmMQoam zZKMP9;rG_p=e6&s(4GD9 zR?Vs6#5f&syhFjTf~+vVdv_HzGl zkmMG=3PseMhk|ensV2_G8`6Jp{k)PnP!AR0k@QaGQGoHS@+!&S+-YGI5xd|2wjY`R zmrb35_KG0cA+?*%_gMjHV0}N?e&trJ@0Kb4B+m;SEGIGW%RO|L?$GpOt%9KYNeTi= zTIbn~6@1D$R;4&dlrn!z-I zi$C#!{7qL0`QNmlg(T2E#AZX(T|Zc^I86I=R~a4w%K z+@=dtHtXX=-YM$GXr||Qgd7H?OjkqbAToRQDAkbM5-_#FS2z{Tip+UwXh6v8sdKn| zcV4XH+UE=(2>M!mXgzT^;lke30Ext>dYHK1)*QmW5Y=7dce%ay5U2NDIbCX}z)!5R zC=tx9o1o&kDB;Qhll8k0)M~ym4&4W+A%}ULA&SB>^a@n(FvP^ogwz+ISv4qOG zfdqd_6Njemn_%=ZAKMhi7D0Vs0SXMTz5yQ=&+hwU4SMO^aVdo~RD`lIe7Rygd>J^S z#Dao*gaEj?YzULqeP+%!%ji`|Cl|vdpnp<();gB^GMlj;YIet*r0oDqhYzDK;^vE> zv!1=eDtP~*Y0Dn~PD8Vy6-346j)q;U2bA>^GZ`F1+$(GtKpE5(S~=0!i&T7j+_NA= z=%8w-;>2@d|AbAu-sycIqHL%Ugh?k4wp>4t3LjTI*TX$h*#$%X1rJW0A2;; z4~$iL7qdBR!(4%gC!y-(`yOFfgakWRosoPf||O zOwvn2Bv~abB%Go_^~s2Kf|n8T&C~eW`uEp@ihVP0N2L$;<$vYoJyD7(^(ytlAHs4j zo$`?_6x~W!ZS@;Dw@%SE2?JV=8W8A?kd8!7zbo5d_8Iwxod8a#>T#m!sc~UEcT*`3 zK}#2-R@Ivm8Af3bw1ChsAJ8d*Y|bi{y(~b4PA;pYyVU{D|EsY1X4QE-)=g-=I>oHu z$sU0|yvlp&+&R76f2^Ks4N#Ux&pq5iSj!h}CB8p`nv@UWD#n$F>{Zu?w(J>qDMz1o zjdgjK1C2}l=fq7VP{Ub1!_5+GY$}P%d`Ki(UAAGRhN3Rd~;FS8Nk}axOP-A4qYM5f^Izq_^N)46p5zS^DBCv6B?%Fp4W4P9<~DWbpcq z(YoMYEOeX@n*|x1M-I1bUj|hi0WsdidW-Ht_rD&x%r65vrQkqf- zDaWDAs&5keY|CwHZJTU6Z2N3SJdVYR1Fugh-6+kfXcl8VFuw;1jzj6Ep)bK%Rpg}g zW62c9mFV`xpbpxa79oqhFDb1A+=&8ov}IfX3n4OOB?M=2>Ks-hRsRM2*noW2e@0ix z`28@5o?nr4sX1>i)CV|^K4InyKc`^g2Ef$5;QSX}GQ7%CqI}`CFn|hK%RTok-TY5x(QHq4(&Z07 zCaDIRu=-G*25JuIzBdH947^@9aC9TCU0S46qB=`4H`^X^*^mSDBdbxbY2Uq_1Csg2 zx~KyA+)h;^4Co3RV+6Clf!nenNQ%uOWEfzUT%g#6_#>kfzj{oulR?gV)@diO$9B(P zReh*);~A?rN9o#FRzRS(@ZE~0;?Z7^VO*ssk;6_+^~bN*_#G*WY79YVee8s0H3}_O^C+bU=jU|Q%FBWJvrsMi-iN2W zvB9rgM)bwI1m2cH;#a>A!2l15D~b#dU^@i_8a}rILQ~6ZdCj8FMO+gVJCj)(xSJS0H)%Zibl5L@&TB!k?mEtD#Q7ODb9;>5`0#Xr@l$xQ-Oh>OG8uXagV)Ph z%h}60%el*W%lQJamo=VYK76A#fqF;b6bZwtt$(`UZ*?2^K7(!*^`7=PH89!Xa46LA8qKhGEO8c5g7&HbXE}5jXAI(BE2w-5z^V z@C)ZLi^f(|^rj3oQiix|HKE7Agxd4IH0wYXv*Jx7G4v7p;E#YS>>n7a)-Nz`?^ zvqvzx03=_;24=*k`?tSn7l+LFVql6&zzUX1jL2kC@m2pv*kY;7E;iJ#Q?}_GbM2bq zqKw|(Mc%GS?BF?g%N96<5de6)J`TDuKHH8>d=l$W_c1`P>GV}zNEM6#lobT#n+Vi4 zabdh>FMt^?z{$upN4qXKl%8%?DjE!rWLnx<@EfTHJztmUWr0D_?_Ni$e=L|ACJUOp zA>NQ6U`9m`$vFs}dsPI!)JpeBYN2N89aE!&v>b`qn?Zbzq*NytodMJGlcLt0XUP$M zn;(4hW~qq!SBinPQo3PS$_vFmY12R3_1}|IpHd%A>X{ea*c~?4&Kq&Clnd~x{(PX8 z3$_nXe|a&54|Z1VD`=9EnaeBIm|YE*+g|$=pesJhU|?%`u1ye@>tYNBTK;Md{pXj1 z5tNxRK)t&y0qP?ik~3&7Gv|YgKQAlDe{v1r9hY+(KLUU^o?bXVN<}OeG;H}5b7o6v zULyqng0CjFAO0~~;12le=S#aTb!B7eAL5KEN@_OzuE*1y|*rb$LWUSkt7XvOV(Zg*D zT$nXee|J1!W0Ee!sL=IG(0Fw+fZl=AAZX5`0`XiRBs_KZ z0O5WZu>Bu&ZtQ7$a`2)OXxeGnwNxAkM%!aIxk*dxqBeD5!4nBd*Dcv9j8&o;=oRN} z8J!ghs77~9Q%8EDd-Ir|NK1Tj8av}K>#KLX}x?)6Sc!`AP8-jNsF{o z=JC%lPKAu}8~f_R<|0zn7@GIM%ox`vYJ|tqzeY1a#u@{ad1TMgR6LlfFA955^%j&H zyB7`YFu;yX?_uodGzPgb7t?MZ4)|UXH0oKkD?-}m_F1L(J70D>#(0hKZY*k+;@ve$ z_0S)KM-Zq>(yuXF>X8x5vA_2qXkIZ?t*QbXD)Erq8hC4pr^T)5sJ%zB)Q;$8r7DG{ z^GA3?H5>ao48(8SEs&-TK?ti22$)z`a*XHy%(+vrkgwRDf$|+1iqKt}?{-_hJAVqd zr5$rlD>bR&O-ji|7k1%Us#`0{rGM>5hWSbS92R=uJkn3FnO49U!?|WmC7J6dBsK z)u#qH#(NrGFwC7aIq3Gxr)F-kr2qt&--C6jnX^DxpS~Fw_7i&1^M`fD3%-D8;AO!L z&tZlnc{cnX=d&UHYd)LV;`dj?^qqOle1JX`eoKqWetk2K?r?hWR^|6NCQ+N#j9_#b zM^m>8Jt888A;y^Bh%360gl3QLcP#BFBbIB{^t*f?%3Q>OZg;iO%jLZ6LQ~>2BOcB6v_-6iOW9OXlo3t|GFYFCU+Nrhq zE(&^I%p54%4zG6kmQ`CPnBb6Zi1ijdhM}X9-yZH4#b8PZ!JqeT@g{N)hP=))?-ada-f*o#9%oMy8hT>A z^xmn^WpsDI+HY$b5RO^_KIvHAc4eqyXFYjGwPbHs_8opQ@It`*=9zc^$Qh5SSx&0I zB5!LDMgk(W6mztoTV1o~P(jsF2xYarHmP6tBhXF6uV3K=N`m~#?RMtS0Mg9hF%=sr+3R#*Pa=S+m!0qG&F ztNq!uM)_dI923Mdgz2uo(1lxNrWtIY z%K9~QAQMb_!0z!Og(B2T_7(z$u|iXJz25hYNn}5@kzYI4jz;qP^G`exw`A&D0hO($ zo2;I2Mc9QT>c04`2ctByMMUA}3Tlbl3p?8#zIk+i>fZD%U{S_wBP6B6Yu1-8atfF| z>z#wON9X8NT7Q};>?nT%O7s^0lApJz-CUN_aC9%OpvKMw6aNA80mT*rv}XO78`-Vt zhffN@UAI3_Qih!rf6gcykidy}8J}|FHRMkhio~e8)?QxL8uU=v|7((Wk5QGe-^c}t z>B;6N>om5PNum$Fz3VB|>w)pLqTRyAqjXJ*sVoSU)LA_j_l5&x+t1Eq_wq++x=gjV zRc*ZKxhe%@m&ir+e=t5aweE#QZ?wJf59d#d$U`b(XfUC`&PwPnZwiY^#_)nq>sol4 zcHV21!!eAb&-F2lV-~}Sb|;%o<1TQag4s>bO!s0Q{>o@) zdEq?9SB8A~%ki%$6%>STpYX;sXRyWWja)F8qat3Hm&>x_RZ`s9>95sZIj6HM6qUdm zStE^ClVkPoJz*LKjr$1$9w$BnV-20c($5;w2mA$?|JD$fApU*s7@LY$a~XAh9$-_P zqm2S(Xp$f1MhDy$2m)q>5UEC6LAW<{;uyhG*ZAWnuMa;bj!mXTHni>n5-A>I!TgU! zjv@`tbJU0nE4buqVP%)ldaR#!50h1fRG!+k^o5$(G`vxcY418PeL19EgO@=*{pnakNQAaoRsSV}dC9j^ zSq!Aq)yn3`)}qIb*WOXPZ?Yltvo%q@pOm1n7X&2pT{>RL*onS#0Phs2 z5u>D#?Evtc0<^u{K!gs{k&*(?T?8VLF=*4hELr{!2xb)^ceh+GwrT`-K%Xg~%gss@ z{&$+#YJ((owp?**bkvQBfMC28Fs%IVabhyz;v~A`+N|_8$jX4**(vU%Qeo+j`EMWB zmxE(-wW9m2|7y-G@5V@jtp;eh1HLMqAh$K6$!fY&tn|5bb>?%^3K1`T z7ZBKW$6`$|Dm{i(`lAe@cjp7-eHXxD&?*C`Z2iiS0q0g>vE1m+(;`&WsP=)6KzW`x zP?nk<=sp1Iit3TDQK#*n^^c%gQ~1i)RxZU?YQWX@S05F14b*&t=Ji{oPN(8~g>0bSCcgY0*?CD)m7s#mv<@ENc6Xxn{!uZE_flt;?b{csBwg91>-?_0jFM zF)a2oEoObmU|v#|IeVF>f{JWF48)cnDf9m5o85;)uj?H^VTVIU*7b(8E!144SNj2b z#bVya0d=WQOEHd_7 zBon*7up>;0*#twd*If=KPp&P*GBt8Wwa6h7H| zt2yM@Xztp1S5mTpx5or*^ybn+XRN zV4Ku}3`3bqg@trmMswv0r?Rw3-19F*xCjJG9_k(dE;gX!yMDVW-lj)j+_Dfat7X4N42C&i8cMJ3(&)uq#XM4cZF$#b1%d{Q*%oR zP8Ct;>X<>yrp3_MGaakh>k1S2oMLA&S15Mw$lqhZM0pKHM}N-+o9&T?7mnYYf%)2eozw#UT-+5r-oMGqT8rMkm{hS*o z`8AGjH%M(RP&F)ZZ2owfF%+LKPJOV8jDBVu;2-YuDQx)xsB?0lRMHt5N)0#YPXYKMAQ3 zxfv7QJjTj%W&Y@!6}310=oaVW;jX+PsTfXA8$DFQ^wPbCXY6^G>%kaEkGtrTH52bG ze16RcYmfOgBWx%CP1boZI@6}fB%};8o-xpOkfDPkaNDRU(?JK4UeX+JyZJ^{fU$&` z%GvPLzB@(0zWAtM^7*4+F$jBgUNe7J7oJ^x{XeEP2Nec>#+9@Gb z9$wbe~L*tB0n)=QW?Xi}lmY%d{>@b=TsN`)vM*D14$_&?ZXp;PKPnl1>OK_-k7U z$O9yi1!qy*b_H4uDFft~I&Ld~Tgd+G2u#<2*Ci4b`iK0|_9JnpFeKF}nLL~qp9izUiuKHG>*k6XYNlh57XpZ*$s#?m?(>OUW&a}|B`?RF<_aeb~U^8aVB$E2kf5s44!>Q!0 z4a$E!W0e>=P{9MbSwtlBU4i+AD7*I+P2eJLQUlgBUm3kK;DgKu2~(h-RM10&R%-h7 zC#%~ze?xNuuiUj@p#`|7j`!)@Smmp(7mGTym)32iARW?Mx0&Z(4mRXy@18-y%rVVI zTJ(6mA^ds4^QqH_CutCQ(1jNk%p|1X-76MfbQvog%vv2300u*YwDzWHD;wymDO~9% z1Z2ruetgd50J6Y3#;!6^e#4bi^cTuPt=#Ej=Qbc6hJoSG+LJZQw;jD9L$ai9j#2Iq z*_W7f7$ChTrRMTp2=eP9eB8*Cksa9=El2WiI`ifW`vBQB5|o+xmP}VXLePLmDS8uB zz1%J>1{NPCBE2lL?pd)^nc>0T7$PVM(R+q~XRoyhOzynh|^15Ut zYnB(wu{&P6|B(v;*y8t#s44=lVw-{o^kEl&-i7m^O(t(nXX-OfIax8A2cR7V&3KSM z+1uGNhRY*_xveJl3tplmoie5Zd_; zr&|*(54-f@)Yq5#wAaNV`QTU@IgITL)pJ4lTrm2U1YUe~-${ZcW6>8wyM|b5CdtsM z14_NCVhmiMmP#9<`kC|_=%nIf!O)P3DF^&H7h7|Vbpl_qIb9RC zUY#?kA^}t(3t#tH2#Nd5{aU-!EL%vcgIxTiQSS|;G#9%x@{_Aq>G9v1I_RbP(AyKx ztE`di3KG%6@dPCk$h_6w_{c1GttV(=SV?xy5HO%!w`L@n=BfvLmzV5H5s@xS2qW;B z<5&y|O~F5!yTH6@5Tj>0v4!Y=fbaY7mHj6m700;BkRkhuBS{amgpeKz8l#?)@fl3BSu^~-PdPDFEBG7vPNN4BFTeHK?Sw9w1Xyn?Xnulg8?w24x&#Y;1%?WVmBIiW(wqiTpsQK^t=v%( z6Fd=(?af74zKDOj3`H|W1#(B<7JrDKVP4kDt4?D@9SVn1dcNUoVuaJL&b)bH3JHqZ z9V<&cxfp&Al?8OiJkW~L>kLLqAy&9=RR!1=v+PZ4iSwvM)L@ie$7H?;SGtpf_N+ws zZLOF9Yn@@xYl>jP2D){`Fvw6*ug4;)ER<&78n%sRjp+g~uA+%pqcVzq$xeEP>nW<> z54Va>^|`kAZaZmd6di`q<3zqFE(=zhee;9=Ks)rjrw z0x*q^3HbJ112lBqmQ?{iV}e-w)wEGy?e!!qCzeJ@35ymmp>k*eY;3uMA%oo+;$`ekz5fCBmy{<1*p8+ ztT?gHvY-9)@MF1cnZj4V`O|Nm}PRE~szRR^ykI={lsBmY$ zP?v~&&TDFcZDy)qI{B242eM6#l!r`Uli16kjXRsS17{z{V z{~;Rn!^-D5SX`z6%I9-a7kBBDL6|2mlrNBL$ zVejX|{seCLm=6~j0xvp!j>-~N$&*$S+(Wq{(=0EXKtNcC$ z3+KOeU?w~SKO7Mw$~kedE{vDbzqTNQ{=^0NLqCjFf1H%6m>aUS`In@&QyBGuYHDj-f|WxE%WES_bAUM~ z<2r%k!5>$tF9O;v_9YAVL@+mK>|H4JIZ^|2bbD4p^SX9no-2)KZq>Ffe|@Wb*#Skz zm#dFbHYUQdizN~#6%`=G^CIZAfibL|dH`)6e$?u*T#x8b4Zklx)VeyNdiRcub#>q+ ze0eVbi}e+AQbC%K?jZgN0>sjxtv-?Yp~C`BYte+ECz5McjT{A#UK@~FMN(=x=%nro z!T=vk#*nX=+aK>N@iE{CMTkG&-y3yRXAjs6wn3rbKhs((<|YdoM1wf*UO02Ot+&kG zpziLy380|26S7dwzAH2V?kr{wT7j!xd?~KDpofkTzpmUlbY2#$RQ2^HS3ax`^RK_= z4+SuxGN8Q{4nF`e3wdUsQpFbkgL)~E@CoEuKY%1fSJA^lR1zFd+cd@;BVc$(YQ6Do7rO5tl zgaB`xfC{{cT=O0b_qWsh4{9bIub_jR*RP-fmQBDL1DKTE|G1fJ(Btu2={*f z(fwko3^V#B7GBhSC7^ViTpzL=v!)o zV?4jFRHaMiH(A=mZ~)@M6Ls@KkC!sde1jHWCmFy5=3hN9Ff%C*ggbaBkGkS;-v@;7 z@Q%jwl|V1$=L+d@G)oX5)}*YyB)>HX;oj0 zH^bLNJ|pJ0Q?vKfVS14tW-$CQR~7$0*P@eB_RSvsD}9UlDQbat{60t9Gj2|)@S1Is zp@47O5^2VD>Lj5L8L5V1?QQoH~ysl2dk`OMnVFa>Bx?2Ow^{k5Qh zVeh!QP}V2glWTAt+Xzy5^p_mhx{rw~19^eGfY3YP17@q?VA_y{pu^;-Pfv6t=!l5W zL#DG9IvyV+Tq;#G#kjoj*}NUhZfN)UMWG<^X%~Knh(>K;J`PV!n|2fX;IhH{Pr5-% zSwsC}h@k@6)|eGKjAkT3aoEZyriiK{7)~;{mk2k`gz3wiAYA5`+s|B1Zv5Bz3O%T< zeqf^&{D8$CuBU$?y&-HLr%@E>l_k=(RZ{0nyFwkOG!_r`7Xvg|{~e7ppN7YY=;_FpZ=Cgm&e8n)7?4s(!a{j(l|q zpM&D>)(wDWuBFp19qBx-JT#d6{IVNp;!TsUtm=PU5*eyy*YR*EpHWjCq5?x}h6bN= z88z~rk|XU-)=GQK&0G`Ysz8jkP&UbN;g7%~W1rCp!^bW#lD@-Bj-cJwv#i0C5;NQI z;+GNN+!B3@$t4kzDa8VU@q$*TlwJ#lt?s%34eC}ZSE^8gCe|5Zz|z&51L`jO8ny-n z0>L=$=F1mgnTy1_YD|Ld6`m4dq(w=)nOk-=%4^)L|C`YeDl&fk`)JkOQs-x)Y>JuJ zmMvQr%UPL=&;S!roW*UB^fT;&%t3FgG1W{RfhNG5XxDlt+cW>GvPO) zrB0MvR7@xV@oDC|4{-TAT33tOFH(*{86}mm8oS;VutU$LO=99&kEns}IiOT@H$y#P z>S1^Ur&$DgjD8IQNdZ9F|Gb*`Zc9H3v_b7_A|4jJAgB&$Sx>jH5Yu$xA3spz0(+{X z>Tr(3!Cqjm!4LTw9!L;Sc~}ZL+B4egzT)zHn?iYH3(OB?KKrq@RT=FQ1CXDCGw+I= zTX2+-nvc=9U-=pyvQYM67$s#MkiV#V1WZ2>GzXU!KO?Ygo%Ztb;hYAxBCJrbiyL{- zRAz>^bu_NfX0+!Ru;SG){^U1=YeR-StD|vRb&`8TvS0hl58h|*4Hm2HNqroCQM4@C zX<0w)im|zL`sa)bc!z}qaPC4;_cHXC1qG2r^Q;P+{!>3=gPzPGYVi>!icuV0Epd@z zkFVhu)hN$CPj(3Q%s-@ZAtlJ}e8C%I4A1Jdcx|#Bm{I=5_zE;7D@WwH>SndCnspQBy_w3am92i| zFxg{S^;TJVP4ILfSDJ0R|BVBud7mpBo6Od1vrVjfUi@Z2ebM=eybel!Va;o(FU;*7 zun5iiebXMARmcbO>eFBp3LC_K>eA~@0-^}Tr$WUQ5h{{4z&6WVT@R6y!ULL-(|zZp zmLa=X8WP=p>(X%D6V%FNp^obC`=!<%Nl@iDDTFXR@*_&QWjk|7_?eghR(*>0uxVoW zbEWjT8GlC2>{CzgZdX-#F(#?-Y|spY^){h++>+tfW1Ts4>p>Wv^^?TS>{|t$ClsCu z4vvs5uZGcEfa)7RtUiSyVa2sDd&dv`efLeN&(!%`p_%)q0DX2CFX)t_!bx}J3)u z5LINf2>lYvah?c+g{y)t7N^XwTM6M=w>6da@Jf`c3|VNa1khNpCV1{Bo$P}aj{!#m z1XAFEYDg!yMi^U+8vzwAvLSf)j;DZKD3t!Nt%wNfrAVlt!wt$G;wn$gE7^dJMjXQE zlmQv`R+=mby&>4%C~qMCBoHiEZpUAC)S?bXp8T>SRN?9zwbr*yZnHuIO(CK`#v*z6XivR z@GZb>s#rShvHl5Nz%$y9EJsgbAFdiSFry~g7+^97%00U%!hdr(-mz44 zyJIdZ;1Y##^6|W*$S)yGNu19Ty8%lAHUNgJ`9Xjg6eQXdm4EyeR5(5Z%O!`$l|$pd z&mC(PA7|Fau_xkoYXlkZevF6quw`PH?nU3fxHLT~Xt4nMZ^)ABKo&ab69Z*PQS{QS zmamiQ!PolHEt-b-`-YN~Q$t%}uo?Xp2jnX_1z!fnI@B?rv@%41R{u5>FgzZ*Cj3uH z6N)TSGvb@e7JwcCsyec5XfGSnr3jfL;rXOUzZ{K*qtG8+b@7AOKjf76 zr%|Q2$+&@~q6={>O!jSPR7ZakdUUP2GI^#o3k6(o@#8=r9$YP#&lmip$``Bu)D(dCf!{$kP?MW^=9ev#&4LqCHg*h-H7fuEzk5j~{ z;M8#!aXL5yGP3YSodvCoy^e1LaxfC6mv;pDiqw?%_f6+|m0Idn6(F{n@6h3>h)2_t z>ey+3Pz-~CfX@0eSbpyjx`iE|9T+=QdY9VyvjF8&kj1-c3jXFYfk=Md)7o;iOROdgOPIZJsJ ztnV`;6uO)vZI}qV3uOpt3ZAE2&r+l2zg5uj>Z0!U{oWzu*@EJWi9c+Ajn&>s5CubG z`}CUe8RvsQ0XOHg|WRna(f}XA@A%kT(AXf{ZU`-F|Z=hl! zWG675EAGrRKKB+XktZTtPWM84Wfj&b%X!e#PYN3iv*6 zdi#tyNw&5DcqE1{^PpeJFLv2b%GjSE(t2+@e0k@H5!w77JNsUUa5Tj8^s2@v(%# zinzdr`}H@peQFp)oERNx%BrHL#b?sH>+exJ*HS9_D@O6>7cF^Orxrx%rO(jY#9EQd zYUjm@2&_G4qHv#|W+885)h0iOapRuIFFy=m2y*`yT5x{|S;87=DdW~>C9{w(5-_7; znBYo&p42;H58s}7a4s>&#MQ3S+gO+K<%3xViGE z9;MsO*_2$kl=J}pQ9aoIp1$Q$2YZ8EM%#Z(DDC!YU2gSiTW<4eUvBpz_s#nEA5W)b zu4eMGdJX8yzy~{l2{Wze;_JXsg*(04=*&J)W~ zXrJ=#^?C?xv=FlqPuT_!Upnz8TJpv1@DQiGYkRmFjUvv;oaFe-cv)Vtbeo4*&r}KK za4vlSSZ@;u>k&dh&MQwt2q2O^pAutk{K3^GES0nsHlPR|?T?DT3R^|-5BTrIiCS(p zLYbi~3lpOaNLt5leCu^bibvLr)X0=Q>VL`Qp{ggd7qO~mTa#jNc?aH|$Eyhr?>}#b z=k=C>&ux$EbDHsVBP+ z-c0UbUwQU$MCWIGY>m|e)RY%yU+P-e`uD(~mcqs$&7X8o#FnqNa}q%ZB>1^X4Tnu{ zk$u6mtODi2YzOWbr^R-8oz<3XweHe!F>N=de%~{m(T3g8vM!eqi)a0^xZjL%$yg)T zUu6#XlL=eIKGj*gDUn~QpYnHNtawdwRvJ#ArPDHKnY1igHZ6yiOUpyPT6l$Qi>tcT zp&M&JHsCfiGmJGP8*=*!}*^iL%` z!vKW$)T)9Dr4sJgy~Yhlu7}_SN`KNLyLE@F$K-ZZuD1{1$G(lpjeC#dJFR!R;`Zd2 zIjcFVJ8L*=t|a*yaA&1l%^%}yKO<}0Jh`W_zH@)uA-bPe0ryYB7a}cT#rN%Mcj*Iu z$y6rT-M^<89+9`JTn&?7DV}8)bD*}uGSm7z9mEf7FAAk?8@5f4ePk}G1A>NzR}FQp z-P2RewqwTiJHgov&WDu$(Orl)bR!`awpOwb&tH4 z=$_uUOF2sjf2GFt?b958N4#`W6CZ=jj^^Z6*Y7e}>6TjDyh+oXv&qI4&&Q@RV+CEblP=GHaSjcr|i zJ#_>Wi^llcDjj#*G%ehClEx!tS=L~Fj^KCO7s>x6tNL`kYghHGe+G351%ts({Ohix zQ_H`E5Uku#B`VYN>|(bfUX$;JuFYC|X^Nv8AiPB%AZlIaLf`&U;DQm`?9JJI;w+cL zmG<)ytX$l~1>*-8{m)B%>n(S`xrgDY5i}?`jJ zb?)z?Rs%AhfmlN>Dy$U1kNicYF*H^*lqY zcfW^#-u#D2nQ8agJLorNUkHzS6H)xg;J#R1L#?wjkG$@^Zfz(_W$*H+novtJLBy>u z1t~d{FMTxF^lklH^qn-@qHKkv7AIa)OjictCO0XvIK%lSe~}pJ*i`I--b$u#S0#YT#wXv6Dy<9{!P0c-TT>~CS7GRGb+9>F z59g0R{+sq<%lt}SBq~VN-1t;q%@KpkAd~^Gs;I`JJKTa@n{+P{KNA;-jGg;Ck9MB! zR6CNLeI{eCrr2+!z~lnCVes$*%GZ7u3RN+8qVpsSHU$GZnzdh1B$QT& z_IrFPJqB}34^7WWZ%Us^XU;g8p`GEJ5t@->a=m>&(V`RHd8ae1Gp;kYvvO+9Fqr-x z-~inM>?s4OZ7XLk&RNwEY0^#ijU7F85gq&c!Qu`odZ)j{nm8BSd_yR=d zm4Ub2M)fnX`*S)a+T^4+gw*aVbDueb8uq_l#UbE3eU1zen56^MMG)t+rf%uFB#-27 zjT$qZh?yWwaM4U@F*H&ZbNb12?IL?|GycB`syMy|jYX4D33bAnR4$S|-Zvz3KkRM@ zxM|^jzdD0Ld?hDM#vshJ{A#2WWs?d@v;0&S+ZBninw5|l?6o`s0Y3<0J^aJomG3$9 zXEJFiVHF88+H>g8mV})TxDQ;T=H4lF5r?bi0gBDOJu&pfYKd)l5W}B~+ToDbAj4$G zT(AP0QW@1BoqL*9%-WpajB74yuE?!=#d(0S`$D%`w??;C_oeP;ixww6`8IqfoOp*A zMvNoo5-W+#lWX1o!r2Cbi{Jt!R>AdOk+Z(f=kmn_0yVk}KETjj)=?KfxKLqTR~>8{ z=a6ZOSZr-rbsUz%2Tz~*WUerrXyr6wcE{aW+%uc;x%n4@32-Pcr1UpVm_#p)97lD$8?1U;2sz!26?&w4B84SHD`pK*Rmq~~ZEHMk8+|k#HHO14j?4IaH)QPU1zD<~0eL&7=`38T~ zM=Rs?&Z8C6aAD-)QPZlu9UwS5th8>a*fCuc64_}#XOg2rYnQ{pomI%~r*Y0x0Sl@t z$-W)HAct!qp?CwrRO^y$IwU5pA{yn8SOzQ;mL1EDJ&YB>7CO~xW$gWjv(9X>5p}E* z=Z)oC^E1rTeS1znKZaMl%bMw#_O(SkZJW(x>P+$I|=*#dCueOE<<# z&wTrT#QT68?WcGCSw`6o-i?1v@@4+M@XpZ$f-Q9ce|MuM-bW$N^+ z;(+MQOS*s2sQN3!aUx&3|nPTb-H?zgh2T@|Ca4A+IHFN zxhPAn%A4``0klL~5-pjQLQAFD=Lx}JNZh+c1@SE2e42A1e1%4$2C z%=q%L-kg?3cNqnVj!;w2JTPx#L5bs*}zTFS+-V|HLvMS?B zm|KY;mrbj|(-gQ%0dVo%RO?M&2&n77>1N4zek3_{CFWkwe?DsC??kp^xo~tZ4E8GW z?=Kd_jWDJE{p^45!T&eU!4~L|OotqDPTW6U)6LLFL@scIW8M%6D{R>WedGikk$(1} zp-*5q=*an}Vg>r-t|@d>3_>a8?E?2V{vkrDx}OJ6xj^3Z^8gI!1nAKR_GF_P+^ z-`TCK5ehcip9kAB^uR*$>%Wu+wTWK`$!h=EAA7OR8Zc15jrCw@Ds Date: Mon, 27 Jan 2025 16:11:17 -0800 Subject: [PATCH 66/93] readme fixes --- python/packages/autogen-ext/pyproject.toml | 2 +- .../autogen-ext/src/autogen_ext/agentic_memory/README.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index b01dccee0e88..eec5815271f0 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -60,7 +60,7 @@ jupyter-executor = [ "ipykernel>=6.29.5", "nbclient>=0.10.2", ] -apprentice = ["chromadb"] +agentic-memory = ["chromadb"] semantic-kernel-core = [ "semantic-kernel>=1.17.1", diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md index fd16deb3680d..08bdcc75ac2d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md @@ -28,7 +28,7 @@ can benefit other users in similar situations. ![agentic_memory.png](../../../imgs/agentic_memory.png) The block diagram above outlines the key components of our baseline agentic memory architecture, -which augments a base agent with the agentic memory mechanisms described above. +which augments a base agent with the agentic memory mechanisms. The **Agentic Memory Controller** implements the fast-learning methods described below, and manages communication with an **Agentic Memory Bank** containing a vector DB and associated structures. @@ -37,7 +37,7 @@ The **Apprentice** is a thin wrapper around the combination of agentic memory wi Some applications will use the Apprentice class, and others will instantiate and use the Agentic Memory Controller directly. The **Base Agent** is any agent or team orchestrator designed to perform tasks passed to it, -perhaps by interacting with an Environment such as a web browser. +perhaps by interacting with an **Environment** such as a web browser. We’ve successfully connected and tested several different base agents: a simple LLM client, the Magentic-One orchestrator, and the GitHub Copilot Chat agent. @@ -75,9 +75,9 @@ Retrieved insights that pass the filtering steps are listed under a heading like ## Setup and Usage -After installing AutoGen core, install its extension package as follows: +After installing AutoGen core, install its extension package from the `autogen/python/packages/autogen-ext` directlry as follows: -`pip install -e .[agentic_memory]` +`pip install -e .[agentic-memory]` We provide [sample code](../../../../../samples/agentic_memory/README.md) to illustrate the following forms of memory-based fast learning: * Agent learning from user advice and corrections From 232ed0fc7e6d6d7278cea49925c35b6100826c8d Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 16:43:23 -0800 Subject: [PATCH 67/93] samples readme --- .../src/autogen_ext/agentic_memory/README.md | 2 +- python/samples/agentic_memory/README.md | 53 +++++++++++++++++++ .../settings/demonstration.yaml | 2 +- .../settings/self_teaching.yaml | 2 +- .../agentic_memory/settings/teachability.yaml | 2 +- 5 files changed, 57 insertions(+), 4 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md index 08bdcc75ac2d..dee35d1d5770 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md @@ -75,7 +75,7 @@ Retrieved insights that pass the filtering steps are listed under a heading like ## Setup and Usage -After installing AutoGen core, install its extension package from the `autogen/python/packages/autogen-ext` directlry as follows: +After installing AutoGen core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: `pip install -e .[agentic-memory]` diff --git a/python/samples/agentic_memory/README.md b/python/samples/agentic_memory/README.md index e69de29bb2d1..d82df29f5d6c 100644 --- a/python/samples/agentic_memory/README.md +++ b/python/samples/agentic_memory/README.md @@ -0,0 +1,53 @@ +# Agentic Memory Code Samples + +This directory contains code samples that illustrate the following forms of memory-based fast learning: +* Agent learning from user advice and corrections +* Agent learning from user demonstrations +* Agent learning from its own experience + +Each sample is contained in a separate python script, using data and settings stored in yaml files. +Note that since agent behavior is non-deterministic, the results may vary between runs. + +To watch all operations in a browser, open the HTML page at the location specified at the top of the settings file, +such as: `~/pagelogs/teachability/0 Call Tree.html` + + +## Setup + +After installing AutoGen core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: + +`pip install -e .[agentic-memory]` + + +## Running the Samples + +Execute the following commands from this directory. + + +### Agent Learning from User Advice and Corrections + +This sample first tests the agent for knowledge it currently lacks. +Then the agent is given advice to help it solve the task, and the context window is cleared. +Finally the agent is tested again to see if it can retrieve and use the advice successfully. + +`python eval_teachability.py settings/teachability.yaml` + + +### Agent Learning from User Demonstrations + +This sample asks the agent to perform a reasoning task on which it usually fails. +The agent is then given a demonstration of how to solve a similar but different task, and the context window is cleared. +Finally the agent is tested again to see if it can retrieve and apply the demonstration to the original task. + +`python eval_learning_from_demonstration.py settings/demonstration.yaml` + + +### Agent Learning from Its Own Experience + +This sample asks the agent to perform a reasoning task on which it usually fails. +Then the agent (running in the background) iterates through a learning loop in an effort to find a solution, +which it then stores as an insight in memory. +Finally the agent is tested again to see if it can retrieve and apply the insight to the original task, +as well as to a similar but different task. + +`python eval_self_teaching.py settings/self_teaching.yaml` diff --git a/python/samples/agentic_memory/settings/demonstration.yaml b/python/samples/agentic_memory/settings/demonstration.yaml index 88dfe748d122..6bbdec8b3743 100644 --- a/python/samples/agentic_memory/settings/demonstration.yaml +++ b/python/samples/agentic_memory/settings/demonstration.yaml @@ -1,7 +1,7 @@ PageLogger: level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/temp18 + path: ~/pagelogs/demonstration client: model: gpt-4o-2024-08-06 diff --git a/python/samples/agentic_memory/settings/self_teaching.yaml b/python/samples/agentic_memory/settings/self_teaching.yaml index 046c1b263a15..60b6f2ab51d4 100644 --- a/python/samples/agentic_memory/settings/self_teaching.yaml +++ b/python/samples/agentic_memory/settings/self_teaching.yaml @@ -1,7 +1,7 @@ PageLogger: level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/temp18 + path: ~/pagelogs/self-teaching client: model: gpt-4o-2024-08-06 diff --git a/python/samples/agentic_memory/settings/teachability.yaml b/python/samples/agentic_memory/settings/teachability.yaml index 2ba70b3fa63d..382ba6ad19d8 100644 --- a/python/samples/agentic_memory/settings/teachability.yaml +++ b/python/samples/agentic_memory/settings/teachability.yaml @@ -1,7 +1,7 @@ PageLogger: level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/temp18 + path: ~/pagelogs/teachability client: model: gpt-4o-2024-08-06 From 87ee27b8020d3d3ec6b3f5bac3014046ea0e712c Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 16:55:16 -0800 Subject: [PATCH 68/93] readme files --- .../autogen-ext/src/autogen_ext/agentic_memory/README.md | 2 +- python/samples/agentic_memory/README.md | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md index dee35d1d5770..fdef02142409 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md @@ -79,7 +79,7 @@ After installing AutoGen core, install its extension package from the `autogen/p `pip install -e .[agentic-memory]` -We provide [sample code](../../../../../samples/agentic_memory/README.md) to illustrate the following forms of memory-based fast learning: +We provide [sample code](../../../../../samples/agentic_memory) to illustrate the following forms of memory-based fast learning: * Agent learning from user advice and corrections * Agent learning from user demonstrations * Agent learning from its own experience diff --git a/python/samples/agentic_memory/README.md b/python/samples/agentic_memory/README.md index d82df29f5d6c..98239108452f 100644 --- a/python/samples/agentic_memory/README.md +++ b/python/samples/agentic_memory/README.md @@ -6,9 +6,10 @@ This directory contains code samples that illustrate the following forms of memo * Agent learning from its own experience Each sample is contained in a separate python script, using data and settings stored in yaml files. -Note that since agent behavior is non-deterministic, the results may vary between runs. +Note that since agent behavior is non-deterministic, results will vary between runs. -To watch all operations in a browser, open the HTML page at the location specified at the top of the settings file, +To watch operations live in a browser and see how agentic memory works, +open the HTML page at the location specified at the top of the settings file, such as: `~/pagelogs/teachability/0 Call Tree.html` @@ -21,7 +22,7 @@ After installing AutoGen core, install its extension package from the `autogen/p ## Running the Samples -Execute the following commands from this directory. +Execute the following commands from this (autogen_ext/agentic_memory) directory. ### Agent Learning from User Advice and Corrections From b21d1402957a2e01e9381f52d63f4fbd1fd995f4 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 17:14:40 -0800 Subject: [PATCH 69/93] readme files --- .../autogen-ext/src/autogen_ext/agentic_memory/README.md | 2 +- python/samples/agentic_memory/README.md | 5 ++++- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md index fdef02142409..81dac83803db 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md @@ -75,7 +75,7 @@ Retrieved insights that pass the filtering steps are listed under a heading like ## Setup and Usage -After installing AutoGen core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: +After installing AutoGen-Core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: `pip install -e .[agentic-memory]` diff --git a/python/samples/agentic_memory/README.md b/python/samples/agentic_memory/README.md index 98239108452f..9ab6b69bdefe 100644 --- a/python/samples/agentic_memory/README.md +++ b/python/samples/agentic_memory/README.md @@ -12,10 +12,13 @@ To watch operations live in a browser and see how agentic memory works, open the HTML page at the location specified at the top of the settings file, such as: `~/pagelogs/teachability/0 Call Tree.html` +The settings files specify a _thin agent_ by default, which is just the model client plus a canned system prompt. +To use _MagenticOneGroupChat_ instead, specify that in the yaml file where indicated. + ## Setup -After installing AutoGen core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: +After installing AutoGen-Core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: `pip install -e .[agentic-memory]` From 1e88eb6a5f62e0d085f5d8344116f5290791a1dc Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 27 Jan 2025 17:23:18 -0800 Subject: [PATCH 70/93] remove ame --- python/packages/ame/LICENSE-CODE | 21 --- python/packages/ame/README.md | 1 - python/packages/ame/pyproject.toml | 40 ---- python/packages/ame/src/ame/__init__.py | 3 - .../ame/src/ame/clients/_client_creator.py | 126 ------------- .../ame/src/ame/clients/_client_wrapper.py | 173 ------------------ .../src/ame/data_files/advice/add_topic.yaml | 6 - .../data_files/demos/cell_towers_2_demo.yaml | 11 -- .../ame/data_files/tasks/100_vampires.yaml | 22 --- .../src/ame/data_files/tasks/10_liars.yaml | 8 - .../src/ame/data_files/tasks/3_to_third.yaml | 5 - .../ame/data_files/tasks/autogen_package.yaml | 5 - .../ame/data_files/tasks/cell_towers_1.yaml | 9 - .../ame/data_files/tasks/cell_towers_2.yaml | 9 - python/packages/ame/src/ame/eval.py | 69 ------- .../eval_learning_from_demonstration.py | 69 ------- .../ame/eval_functions/eval_self_teaching.py | 81 -------- .../ame/eval_functions/eval_teachability.py | 64 ------- .../eval_functions/eval_without_learning.py | 44 ----- .../ame/src/ame/settings/baseline.yaml | 39 ---- .../packages/ame/src/ame/settings/check.yaml | 57 ------ python/packages/ame/src/ame/settings/m1.yaml | 35 ---- 22 files changed, 897 deletions(-) delete mode 100644 python/packages/ame/LICENSE-CODE delete mode 100644 python/packages/ame/README.md delete mode 100644 python/packages/ame/pyproject.toml delete mode 100644 python/packages/ame/src/ame/__init__.py delete mode 100644 python/packages/ame/src/ame/clients/_client_creator.py delete mode 100644 python/packages/ame/src/ame/clients/_client_wrapper.py delete mode 100644 python/packages/ame/src/ame/data_files/advice/add_topic.yaml delete mode 100644 python/packages/ame/src/ame/data_files/demos/cell_towers_2_demo.yaml delete mode 100644 python/packages/ame/src/ame/data_files/tasks/100_vampires.yaml delete mode 100644 python/packages/ame/src/ame/data_files/tasks/10_liars.yaml delete mode 100644 python/packages/ame/src/ame/data_files/tasks/3_to_third.yaml delete mode 100644 python/packages/ame/src/ame/data_files/tasks/autogen_package.yaml delete mode 100644 python/packages/ame/src/ame/data_files/tasks/cell_towers_1.yaml delete mode 100644 python/packages/ame/src/ame/data_files/tasks/cell_towers_2.yaml delete mode 100644 python/packages/ame/src/ame/eval.py delete mode 100644 python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py delete mode 100644 python/packages/ame/src/ame/eval_functions/eval_self_teaching.py delete mode 100644 python/packages/ame/src/ame/eval_functions/eval_teachability.py delete mode 100644 python/packages/ame/src/ame/eval_functions/eval_without_learning.py delete mode 100644 python/packages/ame/src/ame/settings/baseline.yaml delete mode 100644 python/packages/ame/src/ame/settings/check.yaml delete mode 100644 python/packages/ame/src/ame/settings/m1.yaml diff --git a/python/packages/ame/LICENSE-CODE b/python/packages/ame/LICENSE-CODE deleted file mode 100644 index 9e841e7a26e4..000000000000 --- a/python/packages/ame/LICENSE-CODE +++ /dev/null @@ -1,21 +0,0 @@ - MIT License - - Copyright (c) Microsoft Corporation. - - Permission is hereby granted, free of charge, to any person obtaining a copy - of this software and associated documentation files (the "Software"), to deal - in the Software without restriction, including without limitation the rights - to use, copy, modify, merge, publish, distribute, sublicense, and/or sell - copies of the Software, and to permit persons to whom the Software is - furnished to do so, subject to the following conditions: - - The above copyright notice and this permission notice shall be included in all - copies or substantial portions of the Software. - - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - SOFTWARE diff --git a/python/packages/ame/README.md b/python/packages/ame/README.md deleted file mode 100644 index f2b0f7a18b9a..000000000000 --- a/python/packages/ame/README.md +++ /dev/null @@ -1 +0,0 @@ -# Agentic Memory Evaluation Framework diff --git a/python/packages/ame/pyproject.toml b/python/packages/ame/pyproject.toml deleted file mode 100644 index 49b5eea467c2..000000000000 --- a/python/packages/ame/pyproject.toml +++ /dev/null @@ -1,40 +0,0 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[project] -name = "ame" -version = "0.1.1" -license = {file = "LICENSE-CODE"} -description = "Agentic Memory Evaluation Framework" -readme = "README.md" -requires-python = ">=3.10" -classifiers = [ - "Programming Language :: Python :: 3", - "License :: OSI Approved :: MIT License", - "Operating System :: OS Independent", -] -dependencies = [ - "autogen-core==0.4.3", -] - -[tool.hatch.build.targets.wheel] -packages = ["src/ame"] - -[tool.ruff] -extend = "../../pyproject.toml" -include = ["src/**", "tests/*.py"] - -[tool.ruff.lint] -# Allow prints in this package -ignore = ["T20"] - -[tool.pyright] -extends = "../../pyproject.toml" -include = ["src"] - -[tool.poe] -include = "../../shared_tasks.toml" - -[tool.poe.tasks] -mypy = "mypy --config-file ../../pyproject.toml src" diff --git a/python/packages/ame/src/ame/__init__.py b/python/packages/ame/src/ame/__init__.py deleted file mode 100644 index 98c56045ead7..000000000000 --- a/python/packages/ame/src/ame/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -import importlib.metadata - -__version__ = importlib.metadata.version("ame") diff --git a/python/packages/ame/src/ame/clients/_client_creator.py b/python/packages/ame/src/ame/clients/_client_creator.py deleted file mode 100644 index 2d5ee5efbaf5..000000000000 --- a/python/packages/ame/src/ame/clients/_client_creator.py +++ /dev/null @@ -1,126 +0,0 @@ -from autogen_ext.models.openai import AzureOpenAIChatCompletionClient, OpenAIChatCompletionClient -from azure.identity import AzureCliCredential, ChainedTokenCredential, DefaultAzureCredential, get_bearer_token_provider - -from ._client_wrapper import ClientWrapper - - -class ClientCreator: - def __init__(self, settings, logger): - self.settings = settings - self.logger = logger - - def create_client(self): - self.logger.enter_function() - - # A few args are shared by all clients. - args = {} - args["model"] = self.settings["model"] - args["max_completion_tokens"] = self.settings["max_completion_tokens"] - args["max_retries"] = self.settings["max_retries"] - - # The following args don't apply to the 'o1' family of models. - if not args["model"].startswith("o1"): - args["temperature"] = self.settings["temperature"] - args["presence_penalty"] = self.settings["presence_penalty"] - args["frequency_penalty"] = self.settings["frequency_penalty"] - args["top_p"] = self.settings["top_p"] - - client = None - provider = self.settings["provider"] - if provider == "openai": - client, source = self.create_oai_client(args) - elif provider == "azure_openai": - client, source = self.create_aoai_client(args) - elif provider == "trapi": - client, source = self.create_trapi_client(args) - else: - assert False, "Invalid client provider" - - # Log some details. - self.logger.info("Client: {}".format(client._resolved_model)) - self.logger.info(source) - - # Check if the client should be wrapped. - if "ClientWrapper" in self.settings: - wrapper_settings = self.settings["ClientWrapper"] - if wrapper_settings["enabled"]: - # Wrap the client. - client = ClientWrapper(client, wrapper_settings["mode"], wrapper_settings["session_name"], self.logger) - - self.logger.leave_function() - return client - - def create_oai_client(self, args): - # Create an OpenAI client - args["api_key"] = self.settings["api_key"] - client = OpenAIChatCompletionClient(**args) - return client, " created through OpenAI" - - def create_aoai_client(self, args): - # Create an Azure OpenAI client - token_provider = get_bearer_token_provider( - DefaultAzureCredential(), "https://cognitiveservices.azure.com/.default" - ) - model = self.settings["model"] - if model == "gpt-4o-2024-08-06": - azure_deployment = ( - "gpt-4o-2024-08-06-eval" # This is DeploymentName in the table at https://aka.ms/trapi/models - ) - azure_endpoint = "https://agentic2.openai.azure.com/" - elif model == "gpt-4o-2024-05-13": - azure_deployment = "gpt-4o-2024-05-13-eval" - azure_endpoint = "https://agentic1.openai.azure.com/" - elif model == "o1-preview": - azure_deployment = "o1-preview-2024-09-12-eval" - azure_endpoint = "https://agentic1.openai.azure.com/" - else: - assert False, "Unsupported model" - api_version = "2024-12-01-preview" # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - args["azure_ad_token_provider"] = token_provider - args["azure_deployment"] = azure_deployment - args["azure_endpoint"] = azure_endpoint - args["api_version"] = api_version - client = AzureOpenAIChatCompletionClient(**args) - return client, " created through Azure OpenAI" - - def create_trapi_client(self, args): - # Create an Azure OpenAI client through TRAPI - token_provider = get_bearer_token_provider( - ChainedTokenCredential( - AzureCliCredential(), - DefaultAzureCredential( - exclude_cli_credential=True, - # Exclude other credentials we are not interested in. - exclude_environment_credential=True, - exclude_shared_token_cache_credential=True, - exclude_developer_cli_credential=True, - exclude_powershell_credential=True, - exclude_interactive_browser_credential=True, - exclude_visual_studio_code_credentials=True, - # managed_identity_client_id=os.environ.get("DEFAULT_IDENTITY_CLIENT_ID"), # See the TRAPI docs - ), - ), - "api://trapi/.default", - ) - model = self.settings["model"] - if model == "gpt-4o-2024-08-06": - azure_deployment = "gpt-4o_2024-08-06" # This is DeploymentName in the table at https://aka.ms/trapi/models - elif model == "gpt-4o-2024-05-13": - azure_deployment = "gpt-4o_2024-05-13" - elif model == "o1-preview": - azure_deployment = "o1-preview_2024-09-12" - elif model == "o1": - azure_deployment = "o1_2024-12-17" - else: - assert False, "Unsupported model" - trapi_suffix = ( - "msraif/shared" # This is TRAPISuffix (without /openai) in the table at https://aka.ms/trapi/models - ) - endpoint = f"https://trapi.research.microsoft.com/{trapi_suffix}" - api_version = "2024-12-01-preview" # From https://learn.microsoft.com/en-us/azure/ai-services/openai/api-version-deprecation#latest-ga-api-release - args["azure_ad_token_provider"] = token_provider - args["azure_deployment"] = azure_deployment - args["azure_endpoint"] = endpoint - args["api_version"] = api_version - client = AzureOpenAIChatCompletionClient(**args) - return client, " created through TRAPI" diff --git a/python/packages/ame/src/ame/clients/_client_wrapper.py b/python/packages/ame/src/ame/clients/_client_wrapper.py deleted file mode 100644 index 049059c2013d..000000000000 --- a/python/packages/ame/src/ame/clients/_client_wrapper.py +++ /dev/null @@ -1,173 +0,0 @@ -import os -from typing import Any, Dict, List, Mapping, Optional, Sequence - -import yaml -from autogen_core import CancellationToken -from autogen_core.models import ( - CreateResult, - LLMMessage, - RequestUsage, -) -from autogen_core.tools import Tool, ToolSchema -from autogen_ext.agentic_memory import PageLogger -from autogen_ext.models.openai import AzureOpenAIChatCompletionClient - - -class ClientWrapper: - """ - Wraps a client object to record messages and responses (in record mode) - or check the messages and replay the responses (in check-replay mode). - """ - - def __init__( - self, base_client: AzureOpenAIChatCompletionClient, mode: str, session_name: str, logger: PageLogger - ) -> None: - self.logger = logger - self.logger.enter_function() - - self.base_client = base_client - self.mode = mode - self.next_item_index = 0 - self.model_info = {"family": self.base_client.model_info["family"]} - self.path_to_output_file = os.path.join(os.path.expanduser("~/sessions/"), session_name + ".yaml") - self.logger.info("Wrapping the base client in a ClientWrapper.") - if self.mode == "record": - # Prepare to record the messages and responses. - self.logger.info("Recording mode enabled.\nRecording session to: " + self.path_to_output_file) - self.recorded_items = [] - elif self.mode == "check-replay": - # Load the recorded messages and responses from disk. - self.logger.info("Check-Replay mode enabled.\nRetrieving session from: " + self.path_to_output_file) - self.recorded_items = self.load() - - self.logger.leave_function() - - async def create( - self, - messages: Sequence[LLMMessage], - tools: Sequence[Tool | ToolSchema] = [], - json_output: Optional[bool] = None, - extra_create_args: Mapping[str, Any] = {}, - cancellation_token: Optional[CancellationToken] = None, - ) -> CreateResult: - response = None - - if self.mode == "pass-through": - response = await self.base_client.create( - messages, tools, json_output, extra_create_args, cancellation_token - ) - elif self.mode == "record": - response = await self.base_client.create( - messages, tools, json_output, extra_create_args, cancellation_token - ) - self.record_one_turn(messages, response) - elif self.mode == "check-replay": - response = self.check_and_replay_one_turn(messages) - else: - raise ValueError(f"Invalid mode: {self.mode}") - - return response - - def convert_messages(self, messages: Sequence[LLMMessage]) -> List[Dict[str, str]]: - converted_messages = [] - for message in messages: - turn = { - "content": message.content, - "source": "System" if message.type == "SystemMessage" else message.source, - } - converted_messages.append(turn) - return converted_messages - - def record_one_turn(self, messages: Sequence[LLMMessage], response: CreateResult) -> None: - # Record the messages and response. - converted_messages = self.convert_messages(messages) - turn = {"messages": converted_messages, "response": response.content} - self.recorded_items.append(turn) - self.next_item_index += 1 - - def check_and_replay_one_turn(self, messages): - # Compare the messages to the recorded messages, and return the recorded response. - # Get the next recorded turn. - if self.next_item_index >= len(self.recorded_items): - error_str = "\nNo more recorded items to check." - self.logger.error(error_str) - raise ValueError(error_str) - recorded_turn = self.recorded_items[self.next_item_index] - self.next_item_index += 1 - - # Check the current message list against the recorded message list. - if "messages" not in recorded_turn: - error_str = "\nRecorded turn doesn't contain a messages field. Perhaps a result was recorded instead." - self.logger.error(error_str) - raise ValueError(error_str) - recorded_messages = recorded_turn["messages"] - current_messages = self.convert_messages(messages) - if current_messages != recorded_messages: - error_str = "\nCurrent message list doesn't match the recorded message list." - self.logger.log_message_content(recorded_messages, "recorded message list") - self.logger.log_message_content(current_messages, "current message list") - self.logger.error(error_str) - raise ValueError(error_str) - assert current_messages == recorded_messages - - # Return the recorded response. - cur_usage = RequestUsage(prompt_tokens=0, completion_tokens=0) - result = CreateResult(finish_reason="stop", content=recorded_turn["response"], usage=cur_usage, cached=True) - return result - - def report_result(self, result: Any) -> None: - if self.mode == "pass-through": - return - elif self.mode == "record": - self.record_result(result) - elif self.mode == "check-replay": - self.check_result(result) - - def record_result(self, result: Any) -> None: - # Record a result. - self.recorded_items.append({"result": result}) - self.next_item_index += 1 - - def check_result(self, result: Any) -> None: - # Check a result. - if self.next_item_index >= len(self.recorded_items): - error_str = "\nNo more recorded items to check." - self.logger.error(error_str) - raise ValueError(error_str) - recorded_result = self.recorded_items[self.next_item_index] - self.next_item_index += 1 - - if "result" not in recorded_result: - error_str = "\nRecorded turn doesn't contain a result field. Perhaps a turn was recorded instead." - self.logger.error(error_str) - raise ValueError(error_str) - if result != recorded_result["result"]: - error_str = "\nRecorded result ({}) doesn't match the current result ({}).".format( - recorded_result["result"], result - ) - self.logger.error(error_str) - raise ValueError(error_str) - - def finalize(self) -> None: - self.logger.enter_function() - self.report_result("Total items = " + str(self.next_item_index)) - if self.mode == "record": - self.save() - self.logger.error("\nRecorded session was saved to: " + self.path_to_output_file) - elif self.mode == "check-replay": - self.logger.error("\nRecorded session was fully replayed and checked.") - self.logger.leave_function() - - def save(self) -> None: - # Save the recorded messages and responses to disk. - session = {"turns_and_results": self.recorded_items} - with open(self.path_to_output_file, "w", encoding="utf-8") as file: - yaml.dump(session, file, sort_keys=False) - - def load(self): - # Load the recorded messages and responses from disk. - recorded_turns = [] - with open(self.path_to_output_file, "r", encoding="utf-8") as file: - session = yaml.load(file, Loader=yaml.FullLoader) - recorded_turns = session["turns_and_results"] - return recorded_turns diff --git a/python/packages/ame/src/ame/data_files/advice/add_topic.yaml b/python/packages/ame/src/ame/data_files/advice/add_topic.yaml deleted file mode 100644 index e07dd8be08a5..000000000000 --- a/python/packages/ame/src/ame/data_files/advice/add_topic.yaml +++ /dev/null @@ -1,6 +0,0 @@ -# Advice created manually for solving the autogen_package task. - -advice: 'When somebody builds something on autogen and wants to contribute it to autogen, - instead of creating a new autogen package, it''s better for them to implement it in a separate github repo, - then just add the autogen-extension topic to that repo. - That way their contribution will be automatically discoverable through autogen''s page on community extensions: https://microsoft.github.io/autogen/dev/user-guide/extensions-user-guide/index.html' diff --git a/python/packages/ame/src/ame/data_files/demos/cell_towers_2_demo.yaml b/python/packages/ame/src/ame/data_files/demos/cell_towers_2_demo.yaml deleted file mode 100644 index da626f9783b7..000000000000 --- a/python/packages/ame/src/ame/data_files/demos/cell_towers_2_demo.yaml +++ /dev/null @@ -1,11 +0,0 @@ -# Demo created manually for solving the cell_towers_2 task. - -demo: 'Sort the houses by location: 3, 6, 10, 11, 12, 17, 19, 20. - Then start at one end and place the towers only where absolutely needed. - The house at 3 could be served by a tower as far away as mile marker 7, because 3 + 4 = 7, so place a tower at 7. - This obviously covers houses up to mile 7. - But a coverage radius of 4 miles (in each direction) means a total coverage of 8 miles. - So the tower at mile 7 would reach all the way to mile 11, covering the houses at 10 and 11. - The next uncovered house would be at mile 12 (not 10), requiring a second tower. - It could go at mile 16 (which is 12 + 4) and this tower would reach up to mile 20 (16 + 4), - covering the remaining houses. So 2 towers would be enough.' diff --git a/python/packages/ame/src/ame/data_files/tasks/100_vampires.yaml b/python/packages/ame/src/ame/data_files/tasks/100_vampires.yaml deleted file mode 100644 index 2e2341d91fd1..000000000000 --- a/python/packages/ame/src/ame/data_files/tasks/100_vampires.yaml +++ /dev/null @@ -1,22 +0,0 @@ -# From GAIA L1 - -task_description: "You are Van Helsing, a renowned vampire hunter. A Count of Moldova, La\u021B\ - cu IV, son of Costea, has tasked you with investigating the village of \u0218\ - irnea in neighboring Wallachia. The Count's advisors have reported that a vampire\ - \ was spotted crossing the border near the village, and would like you to investigate\ - \ it.\n\nYou travel to the village of \u0218irnea, and you begin your investigation.\ - \ One night, just before dawn, you catch a glimpse of a man in a long black\ - \ cape with red lining leaping from roof-top to roof-top with superhuman agility.\ - \ It's a vampire! You try to chase the creature back to its home, but the creature\ - \ is too fast. However, because of the remoteness of the village, you know with\ - \ absolute certainty that the vampire must be a resident of the village. You\ - \ decide that your best course of action will be to visit all 100 residents\ - \ of the town during the day. You know something about vampires and humans that\ - \ will make your investigation possible; humans always tell the truth, but vampires\ - \ always lie.\n\nIn the afternoon, you go from house to house, speaking with\ - \ all 100 residents of \u0218irnea. You ask everyone the same question: \"How\ - \ many vampires are living in \u0218irnea\". Everyone in the village gives the\ - \ same response, \"At least one of us is a human.\"\n\nHow many residents of\ - \ \u0218irnea have been turned into vampires?" - -expected_answer: '100' diff --git a/python/packages/ame/src/ame/data_files/tasks/10_liars.yaml b/python/packages/ame/src/ame/data_files/tasks/10_liars.yaml deleted file mode 100644 index 096e12775935..000000000000 --- a/python/packages/ame/src/ame/data_files/tasks/10_liars.yaml +++ /dev/null @@ -1,8 +0,0 @@ -# Similar to the 100 vampires task, for testing generalization from one to the other. - -task_description: 'You ask ten people ''How many of you are liars?'' - They all answer ''At least one of us is not a liar.'' - You happen to know that at least one of them IS a liar. - How many of them are liars in total?' - -expected_answer: All of them are liars. diff --git a/python/packages/ame/src/ame/data_files/tasks/3_to_third.yaml b/python/packages/ame/src/ame/data_files/tasks/3_to_third.yaml deleted file mode 100644 index ab9a7ffe0133..000000000000 --- a/python/packages/ame/src/ame/data_files/tasks/3_to_third.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# A simple test that doesn't require memory. - -task_description: What is 3^3? - -expected_answer: '27' diff --git a/python/packages/ame/src/ame/data_files/tasks/autogen_package.yaml b/python/packages/ame/src/ame/data_files/tasks/autogen_package.yaml deleted file mode 100644 index f80840b30073..000000000000 --- a/python/packages/ame/src/ame/data_files/tasks/autogen_package.yaml +++ /dev/null @@ -1,5 +0,0 @@ -# Test where human advice is needed. - -task_description: As a contribution to autogen, can I create a new autogen package for a copilot extension agent that I built on autogen? - -expected_answer: It's best to have your agent in its own repo, then add the autogen-extension topic to that repo. diff --git a/python/packages/ame/src/ame/data_files/tasks/cell_towers_1.yaml b/python/packages/ame/src/ame/data_files/tasks/cell_towers_1.yaml deleted file mode 100644 index f86e370db3ee..000000000000 --- a/python/packages/ame/src/ame/data_files/tasks/cell_towers_1.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# File-free version of a GAIA L1 task. - -task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. - Houses are located at mile markers 16, 17, 19, 11, 9, 10, 2, 5, 4. - Each cell phone tower can cover houses located next to the road within a 4-mile radius. - Find the minimum number of cell phone towers needed to cover all houses next to the road. - Your answer should be a positive numerical integer value. - -expected_answer: '2' diff --git a/python/packages/ame/src/ame/data_files/tasks/cell_towers_2.yaml b/python/packages/ame/src/ame/data_files/tasks/cell_towers_2.yaml deleted file mode 100644 index 5ddc046920c9..000000000000 --- a/python/packages/ame/src/ame/data_files/tasks/cell_towers_2.yaml +++ /dev/null @@ -1,9 +0,0 @@ -# Similar to the cell_towers_1 task. - -task_description: You are a telecommunications engineer who wants to build cell phone towers on a stretch of road. - Houses are located at mile markers 17, 20, 19, 10, 11, 12, 3, 6. - Each cell phone tower can cover houses located next to the road within a 4-mile radius. - Find the minimum number of cell phone towers needed to cover all houses next to the road. - Your answer should be a positive numerical integer value. - -expected_answer: '2' diff --git a/python/packages/ame/src/ame/eval.py b/python/packages/ame/src/ame/eval.py deleted file mode 100644 index b867343e3212..000000000000 --- a/python/packages/ame/src/ame/eval.py +++ /dev/null @@ -1,69 +0,0 @@ -import asyncio -import importlib -import sys -import yaml - -from autogen_ext.agentic_memory import PageLogger, Apprentice -from ame.clients._client_creator import ClientCreator - - -async def perform_evaluations(settings, logger) -> None: - """ - Perform the evaluations as specified in the settings file. - """ - logger.enter_function() - - # Create the client. - client_creator = ClientCreator(settings=settings["client"], logger=logger) - client = client_creator.create_client() - - # Create the apprentice. - apprentice_settings = settings["Apprentice"] - apprentice = Apprentice(apprentice_settings, client, logger) - - # Execute each evaluation. - for evaluation_settings in settings["evaluations"]: - # Import the function. - function_settings = evaluation_settings["eval_function"] - module_path = function_settings["module_path"] - try: - module = importlib.import_module(module_path) - except ModuleNotFoundError: - print("Failed to import {}".format(module_path)) - raise - function_name = function_settings["function_name"] - try: - eval_function = getattr(module, function_name) - except AttributeError: - print("Failed to import {}.{}".format(module_path, function_name)) - raise - - # Call the eval function for each listed run. - for run_dict in evaluation_settings["runs"]: - results = await eval_function(apprentice, client, logger, function_settings, run_dict) - print(results) - - if hasattr(client, "finalize"): - # If this is a client wrapper, it needs to be finalized. - client.finalize() - - logger.flush(finished=True) - logger.leave_function() - - -async def run(settings_filepath): - # Load the settings from yaml. - with open(settings_filepath, "r") as file: - settings = yaml.load(file, Loader=yaml.FullLoader) - logger = PageLogger(settings["PageLogger"]) - - # Perform the evaluations. - await perform_evaluations(settings, logger) - - -if __name__ == "__main__": - args = sys.argv[1:] - if len(args) != 1: - print("Usage: amt.py ") - else: - asyncio.run(run(settings_filepath=args[0])) diff --git a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py b/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py deleted file mode 100644 index 588528e2c529..000000000000 --- a/python/packages/ame/src/ame/eval_functions/eval_learning_from_demonstration.py +++ /dev/null @@ -1,69 +0,0 @@ -from typing import Dict -import yaml - -from autogen_core.models import ( - ChatCompletionClient, -) -from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger - - -async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatCompletionClient, - logger: PageLogger, settings: Dict, run_dict: Dict) -> str: - """ - Evaluates the ability to learn quickly from demonstrations. - """ - logger.enter_function() - - num_trials = settings["num_trials"] - grader = Grader(client, logger) - - # Load the specified data. - with open(run_dict["main_task_file"], "r") as file: - # The task being tested. - main_task = yaml.load(file, Loader=yaml.FullLoader) - task_description = main_task["task_description"] - expected_answer = main_task["expected_answer"] - with open(run_dict["demo_task_file"], "r") as file: - # A similar but different task. - demo_task = yaml.load(file, Loader=yaml.FullLoader)["task_description"] - with open(run_dict["demo_solution_file"], "r") as file: - # A demonstration of solving the second task. - demo_solution = yaml.load(file, Loader=yaml.FullLoader)["demo"] - - # Start by clearing memory then running a baseline test. - logger.info("To get a baseline, clear memory, then assign the task.") - apprentice.reset_memory() - num_successes, num_trials = await grader.test_apprentice( - apprentice=apprentice, - task_description=task_description, - expected_answer=expected_answer, - num_trials=num_trials, - use_memory=True, - client=client, - logger=logger, - ) - success_rate = round((num_successes / num_trials) * 100) - results_str_1 = "Success rate before demonstration: {}%".format(success_rate) - logger.info("\n" + results_str_1) - - # Provide a demonstration for a similar but different task. - logger.info("Demonstrate a solution to a similar task.") - await apprentice.add_task_solution_pair_to_memory(demo_task, demo_solution) - - # Now test again to see if the demonstration (retrieved from memory) helps. - logger.info("Assign the task again to see if the demonstration helps.") - num_successes, num_trials = await grader.test_apprentice( - apprentice=apprentice, - task_description=task_description, - expected_answer=expected_answer, - num_trials=num_trials, - use_memory=True, - client=client, - logger=logger, - ) - success_rate = round((num_successes / num_trials) * 100) - results_str_2 = "Success rate after demonstration: {}%".format(success_rate) - logger.info("\n" + results_str_2) - - logger.leave_function() - return "\neval_learning_from_demonstration\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py b/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py deleted file mode 100644 index ba4f75c38159..000000000000 --- a/python/packages/ame/src/ame/eval_functions/eval_self_teaching.py +++ /dev/null @@ -1,81 +0,0 @@ -from typing import Dict -import yaml - -from autogen_core.models import ( - ChatCompletionClient, -) -from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger - - -async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClient, - logger: PageLogger, settings: Dict, run_dict: Dict) -> str: - """ - Evaluates the ability of an agent to learn quickly from its own trial and error. - """ - logger.enter_function() - - num_loops = settings["num_loops"] - num_final_test_trials = settings["num_final_test_trials"] - grader = Grader(client, logger) - - # Load the specified data. - with open(run_dict["task_file_1"], "r") as file: - # Train and test on this task. - task_1 = yaml.load(file, Loader=yaml.FullLoader) - task_description_1 = task_1["task_description"] - expected_answer_1 = task_1["expected_answer"] - with open(run_dict["task_file_2"], "r") as file: - # Test generalization on this different, similar task. - task_2 = yaml.load(file, Loader=yaml.FullLoader) - task_description_2 = task_2["task_description"] - expected_answer_2 = task_2["expected_answer"] - - # Start the test with empty memory. - apprentice.reset_memory() - - total_num_successes_1 = 0 - total_num_successes_2 = 0 - total_num_trials = 0 - for i in range(num_loops): - # Train on the first task. - await apprentice.train_on_task(task=task_description_1, expected_answer=expected_answer_1) - - # Test on the first task. - num_successes, num_trials = await grader.test_apprentice( - apprentice=apprentice, - task_description=task_description_1, - expected_answer=expected_answer_1, - num_trials=num_final_test_trials, - use_memory=True, - client=client, - logger=logger, - ) - logger.info("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100))) - total_num_successes_1 += num_successes - - # Test on the second task. - num_successes, num_trials = await grader.test_apprentice( - apprentice=apprentice, - task_description=task_description_2, - expected_answer=expected_answer_2, - num_trials=num_final_test_trials, - use_memory=True, - client=client, - logger=logger, - ) - logger.info("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100))) - total_num_successes_2 += num_successes - - total_num_trials += num_final_test_trials - logger.info("") - - overall_success_rate_1 = round((total_num_successes_1 / total_num_trials) * 100) - overall_success_rate_2 = round((total_num_successes_2 / total_num_trials) * 100) - - results_str_1 = "Overall task 1 success rate: {}%".format(overall_success_rate_1) - results_str_2 = "Overall task 2 success rate: {}%".format(overall_success_rate_2) - logger.info("\n" + results_str_1) - logger.info(results_str_2) - - logger.leave_function() - return "\neval_self_teaching\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_teachability.py b/python/packages/ame/src/ame/eval_functions/eval_teachability.py deleted file mode 100644 index dd63fe74f3c6..000000000000 --- a/python/packages/ame/src/ame/eval_functions/eval_teachability.py +++ /dev/null @@ -1,64 +0,0 @@ -from typing import Dict -import yaml - -from autogen_core.models import ( - ChatCompletionClient, -) -from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger - - -async def eval_teachability(apprentice: Apprentice, client: ChatCompletionClient, - logger: PageLogger, settings: Dict, run_dict: Dict) -> str: - """ - Evalutes the ability to learn quickly from user teachings, hints, and advice. - """ - logger.enter_function() - - # Load the specified data. - with open(run_dict["task_file"], "r") as file: - # The task being tested. - task = yaml.load(file, Loader=yaml.FullLoader) - task_description = task["task_description"] - expected_answer = task["expected_answer"] - with open(run_dict["advice_file"], "r") as file: - # Advice for solving such tasks. - advice = yaml.load(file, Loader=yaml.FullLoader)["advice"] - - # First test without memory. - apprentice.reset_memory() - logger.info("\nClear memory, then ask the question.") - response = await apprentice.handle_user_message(task_description) - - # Check the response. - grader = Grader(client, logger) - response_is_correct, extracted_answer = await grader.is_response_correct( - task_description, response, expected_answer - ) - logger.info("Extracted answer: {}".format(extracted_answer)) - if response_is_correct: - results_str_1 = "Answer before teaching is CORRECT." - else: - results_str_1 = "Answer before teaching is INCORRECT." - logger.info(results_str_1 + "\n") - - # Give advice that should help solve this task. - logger.info("Give the advice.") - await apprentice.handle_user_message(advice) - - # Now ask the question again to see if the advice helps. - logger.info("\nAsk the question again to see if the advice helps.") - response = await apprentice.handle_user_message(task_description) - - # Check the response. - response_is_correct, extracted_answer = await grader.is_response_correct( - task_description, response, expected_answer - ) - logger.info("Extracted answer: {}".format(extracted_answer)) - if response_is_correct: - results_str_2 = "Answer after teaching is CORRECT." - else: - results_str_2 = "Answer after teaching is INCORRECT." - logger.info(results_str_2 + "\n") - - logger.leave_function() - return "\neval_teachability\n" + results_str_1 + "\n" + results_str_2 diff --git a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py b/python/packages/ame/src/ame/eval_functions/eval_without_learning.py deleted file mode 100644 index 3e96fbc94c2a..000000000000 --- a/python/packages/ame/src/ame/eval_functions/eval_without_learning.py +++ /dev/null @@ -1,44 +0,0 @@ -from typing import Dict -import yaml - -from autogen_core.models import ( - ChatCompletionClient, -) -from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger - - -async def eval_without_learning(apprentice: Apprentice, client: ChatCompletionClient, - logger: PageLogger, settings: Dict, run_dict: Dict) -> str: - """ - Performs an evaluation without the benefit of memory. - """ - logger.enter_function() - - num_trials = settings["num_trials"] - grader = Grader(client, logger) - - # Load the specified data. - with open(run_dict["task_file"], "r") as file: - # The task being tested. - task = yaml.load(file, Loader=yaml.FullLoader) - task_description = task["task_description"] - expected_answer = task["expected_answer"] - - # Clear memory then run a baseline test. - logger.info("To get a baseline, clear memory, then assign the task.") - apprentice.reset_memory() - num_successes, num_trials = await grader.test_apprentice( - apprentice=apprentice, - task_description=task_description, - expected_answer=expected_answer, - num_trials=num_trials, - use_memory=True, - client=client, - logger=logger, - ) - success_rate = round((num_successes / num_trials) * 100) - results_str = "Success rate: {}%".format(success_rate) - logger.info("\n" + results_str) - - logger.leave_function() - return "\neval_without_learning\n" + results_str diff --git a/python/packages/ame/src/ame/settings/baseline.yaml b/python/packages/ame/src/ame/settings/baseline.yaml deleted file mode 100644 index 815e77c2b297..000000000000 --- a/python/packages/ame/src/ame/settings/baseline.yaml +++ /dev/null @@ -1,39 +0,0 @@ - -PageLogger: - level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/base - -client: - model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. - provider: trapi # openai, azure_openai, or trapi - api_key: sk- # only for openai - temperature: 0.8 - max_completion_tokens: 4096 - presence_penalty: 0.0 - frequency_penalty: 0.0 - top_p: 1.0 - max_retries: 65535 - ClientWrapper: # Provides record & replay functionality - enabled: 0 # Only works for thin_agent currently - mode: check-replay # pass-through, record, or check-replay - session_name: short-3 - -Apprentice: - AgenticMemoryController: - max_train_trials: 2 # 2-10 - max_test_trials: 1 # 1-3 - AgenticMemoryBank: - path: ~/agentic_memory_bank/temp - relevance_conversion_threshold: 1.7 - n_results: 25 - distance_threshold: 100 - AgentWrapper: - base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. - -evaluations: - - eval_function: - function_name: eval_without_learning - module_path: ame.eval_functions.eval_without_learning - num_trials: 1 # 1-10 - runs: - - task_file: data_files/tasks/10_liars.yaml diff --git a/python/packages/ame/src/ame/settings/check.yaml b/python/packages/ame/src/ame/settings/check.yaml deleted file mode 100644 index f0a4104a32d6..000000000000 --- a/python/packages/ame/src/ame/settings/check.yaml +++ /dev/null @@ -1,57 +0,0 @@ - -PageLogger: - level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/temp17 - -client: - model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. - provider: trapi # openai, azure_openai, or trapi - api_key: sk- # only for openai - temperature: 0.8 - max_completion_tokens: 4096 - presence_penalty: 0.0 - frequency_penalty: 0.0 - top_p: 1.0 - max_retries: 65535 - ClientWrapper: # Provides record & replay functionality - enabled: 1 # Only works for thin_agent currently - mode: check-replay # pass-through, record, or check-replay - session_name: short-3 - -Apprentice: - AgenticMemoryController: - max_train_trials: 2 # 2-10 - max_test_trials: 1 # 1-3 - AgenticMemoryBank: - path: ~/agentic_memory_bank/temp - relevance_conversion_threshold: 1.7 - n_results: 25 - distance_threshold: 100 - AgentWrapper: - base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. - -evaluations: - - eval_function: - function_name: eval_teachability - module_path: ame.eval_functions.eval_teachability - runs: - - task_file: data_files/tasks/autogen_package.yaml # The task being tested. - advice_file: data_files/advice/add_topic.yaml # Advice provided to help solve the task. - - - eval_function: - function_name: eval_learning_from_demonstration - module_path: ame.eval_functions.eval_learning_from_demonstration - num_trials: 1 # 1-10 - runs: - - main_task_file: data_files/tasks/cell_towers_1.yaml # The task being tested. - demo_task_file: data_files/tasks/cell_towers_2.yaml # A similar but different task. - demo_solution_file: data_files/demos/cell_towers_2_demo.yaml # A demonstration of solving the second. - - - eval_function: - function_name: eval_self_teaching - module_path: ame.eval_functions.eval_self_teaching - num_loops: 1 # 1-10 - num_final_test_trials: 1 # 1-3 - runs: - - task_file_1: data_files/tasks/10_liars.yaml # Train and test on this task. - task_file_2: data_files/tasks/100_vampires.yaml # Test generalization on this different, similar task. diff --git a/python/packages/ame/src/ame/settings/m1.yaml b/python/packages/ame/src/ame/settings/m1.yaml deleted file mode 100644 index 8c89a53d29ba..000000000000 --- a/python/packages/ame/src/ame/settings/m1.yaml +++ /dev/null @@ -1,35 +0,0 @@ - -PageLogger: - level: DEBUG # DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - path: ~/pagelogs/m1 - -client: - model: gpt-4o-2024-08-06 # gpt-4o-2024-05-13, gpt-4o-2024-08-06, o1-preview, o1, etc. - provider: trapi # openai, azure_openai, or trapi - api_key: sk- # only for openai - temperature: 0.8 - max_completion_tokens: 4096 - presence_penalty: 0.0 - frequency_penalty: 0.0 - top_p: 1.0 - max_retries: 65535 - -Apprentice: - AgenticMemoryController: - max_train_trials: 2 # 2-10 - max_test_trials: 1 # 1-3 - AgenticMemoryBank: - path: ~/agentic_memory_bank/m1 - relevance_conversion_threshold: 1.7 - n_results: 25 - distance_threshold: 100 - AgentWrapper: - base_agent: MagenticOneGroupChat # MagenticOneGroupChat, thin_agent, etc. - -evaluations: - - eval_function: - function_name: eval_without_learning - module_path: ame.eval_functions.eval_without_learning - num_trials: 1 - runs: - - task_file: data_files/tasks/10_liars.yaml From a3addc12f43243dfa43bdf00ce44292da5367283 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 28 Jan 2025 09:57:16 -0800 Subject: [PATCH 71/93] readme --- python/samples/agentic_memory/README.md | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) diff --git a/python/samples/agentic_memory/README.md b/python/samples/agentic_memory/README.md index 9ab6b69bdefe..61a7631da138 100644 --- a/python/samples/agentic_memory/README.md +++ b/python/samples/agentic_memory/README.md @@ -30,21 +30,25 @@ Execute the following commands from this (autogen_ext/agentic_memory) directory. ### Agent Learning from User Advice and Corrections -This sample first tests the agent for knowledge it currently lacks. +This sample first tests the agent (once) for knowledge it currently lacks. Then the agent is given advice to help it solve the task, and the context window is cleared. -Finally the agent is tested again to see if it can retrieve and use the advice successfully. +Finally the agent is once tested again to see if it can retrieve and use the advice successfully. `python eval_teachability.py settings/teachability.yaml` +By using memory, the agent nearly always succeeds on the second test. + ### Agent Learning from User Demonstrations -This sample asks the agent to perform a reasoning task on which it usually fails. +This sample asks the agent to perform a reasoning task (ten times) on which it usually fails. The agent is then given a demonstration of how to solve a similar but different task, and the context window is cleared. -Finally the agent is tested again to see if it can retrieve and apply the demonstration to the original task. +Finally the agent is tested 10 more times to see if it can retrieve and apply the demonstration to the original task. `python eval_learning_from_demonstration.py settings/demonstration.yaml` +By using memory, the agent's success rate is usually higher on the second set of tests. + ### Agent Learning from Its Own Experience @@ -52,6 +56,8 @@ This sample asks the agent to perform a reasoning task on which it usually fails Then the agent (running in the background) iterates through a learning loop in an effort to find a solution, which it then stores as an insight in memory. Finally the agent is tested again to see if it can retrieve and apply the insight to the original task, -as well as to a similar but different task. +as well as to a similar but different task to test generalization. `python eval_self_teaching.py settings/self_teaching.yaml` + +By using memory, the agent usually completes both tasks successfully in the second set of tests. From c6ffa430102707112a79fb36e4734aafa21fcf79 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 28 Jan 2025 10:14:41 -0800 Subject: [PATCH 72/93] comment out api_key lines --- python/samples/agentic_memory/settings/demonstration.yaml | 2 +- python/samples/agentic_memory/settings/self_teaching.yaml | 2 +- python/samples/agentic_memory/settings/teachability.yaml | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/python/samples/agentic_memory/settings/demonstration.yaml b/python/samples/agentic_memory/settings/demonstration.yaml index 6bbdec8b3743..52a62ab6c6c8 100644 --- a/python/samples/agentic_memory/settings/demonstration.yaml +++ b/python/samples/agentic_memory/settings/demonstration.yaml @@ -5,7 +5,7 @@ PageLogger: client: model: gpt-4o-2024-08-06 - api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. + # api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. temperature: 0.8 max_completion_tokens: 4096 presence_penalty: 0.0 diff --git a/python/samples/agentic_memory/settings/self_teaching.yaml b/python/samples/agentic_memory/settings/self_teaching.yaml index 60b6f2ab51d4..01673e157fa9 100644 --- a/python/samples/agentic_memory/settings/self_teaching.yaml +++ b/python/samples/agentic_memory/settings/self_teaching.yaml @@ -5,7 +5,7 @@ PageLogger: client: model: gpt-4o-2024-08-06 - api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. + # api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. temperature: 0.8 max_completion_tokens: 4096 presence_penalty: 0.0 diff --git a/python/samples/agentic_memory/settings/teachability.yaml b/python/samples/agentic_memory/settings/teachability.yaml index 382ba6ad19d8..d036ca28926c 100644 --- a/python/samples/agentic_memory/settings/teachability.yaml +++ b/python/samples/agentic_memory/settings/teachability.yaml @@ -5,7 +5,7 @@ PageLogger: client: model: gpt-4o-2024-08-06 - api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. + # api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. temperature: 0.8 max_completion_tokens: 4096 presence_penalty: 0.0 From 8f66612d0945ec150196fdde633857da327201dc Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 28 Jan 2025 10:45:40 -0800 Subject: [PATCH 73/93] Optional disabling of prefix caching (to decorrelate repeated runs) --- .../src/autogen_ext/agentic_memory/agent_wrapper.py | 11 +++++++++++ .../agentic_memory/settings/demonstration.yaml | 1 + .../agentic_memory/settings/self_teaching.yaml | 1 + .../samples/agentic_memory/settings/teachability.yaml | 1 + 4 files changed, 14 insertions(+) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py index b5f8a0c6380a..bd8360bf9e74 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py @@ -1,4 +1,5 @@ from typing import Tuple, Dict +import random, time from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.teams import MagenticOneGroupChat @@ -33,6 +34,10 @@ def __init__(self, settings: Dict, client: ChatCompletionClient, logger: PageLog self.client = client self.logger = logger self.base_agent_name = self.settings["base_agent"] + self.disable_prefix_caching = self.settings["disable_prefix_caching"] + if self.disable_prefix_caching: + self.rand = random.Random() + self.rand.seed(int(time.time() * 1000)) async def assign_task(self, task: str) -> Tuple[str, str]: """ @@ -66,6 +71,12 @@ async def _assign_task_to_thin_agent(self, task: str) -> Tuple[str, str]: 4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. 5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. 6. Finish by providing your final response in the particular format requested by the user.""" + + if self.disable_prefix_caching: + # Prepend a random int to disable prefix caching. + random_str = "({})\n\n".format(self.rand.randint(0, 1000000)) + system_message_content = random_str + system_message_content + if self.client.model_info["family"] == "o1": # No system message allowed, so pass it as the first user message. system_message = UserMessage(content=system_message_content, source="User") diff --git a/python/samples/agentic_memory/settings/demonstration.yaml b/python/samples/agentic_memory/settings/demonstration.yaml index 52a62ab6c6c8..73bde782d02f 100644 --- a/python/samples/agentic_memory/settings/demonstration.yaml +++ b/python/samples/agentic_memory/settings/demonstration.yaml @@ -24,6 +24,7 @@ Apprentice: distance_threshold: 100 AgentWrapper: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + disable_prefix_caching: 1 # Prepends a small random string to decorrelate repeated runs. test: main_task_file: data_files/tasks/cell_towers_1.yaml # The task being tested. diff --git a/python/samples/agentic_memory/settings/self_teaching.yaml b/python/samples/agentic_memory/settings/self_teaching.yaml index 01673e157fa9..167e23d3d26f 100644 --- a/python/samples/agentic_memory/settings/self_teaching.yaml +++ b/python/samples/agentic_memory/settings/self_teaching.yaml @@ -24,6 +24,7 @@ Apprentice: distance_threshold: 100 AgentWrapper: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + disable_prefix_caching: 1 # Prepends a small random string to decorrelate repeated runs. test: task_file_1: data_files/tasks/10_liars.yaml # Train and test on this task. diff --git a/python/samples/agentic_memory/settings/teachability.yaml b/python/samples/agentic_memory/settings/teachability.yaml index d036ca28926c..186e57ea2dd4 100644 --- a/python/samples/agentic_memory/settings/teachability.yaml +++ b/python/samples/agentic_memory/settings/teachability.yaml @@ -24,6 +24,7 @@ Apprentice: distance_threshold: 100 AgentWrapper: base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. + disable_prefix_caching: 1 # Prepends a small random string to decorrelate repeated runs. test: task_file: data_files/tasks/autogen_package.yaml # The task being tested. From 2ed08ae8aec657e35704ca8e71983fa0d7eb6ad1 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 28 Jan 2025 21:03:37 -0800 Subject: [PATCH 74/93] Remove unnecessary instantiation of Grader --- .../autogen-ext/src/autogen_ext/agentic_memory/grader.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py index 9810687bbe95..4e39ee0b7bf1 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py @@ -43,14 +43,13 @@ async def test_apprentice( self.logger.info("Testing the apprentice on the given task.\n") - grader = Grader(client, logger) num_successes = 0 for trial in range(num_trials): self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) self.logger.info("Try to solve the task.\n") response = await apprentice.assign_task(task_description, use_memory=use_memory) - response_is_correct, extracted_answer = await grader.is_response_correct( + response_is_correct, extracted_answer = await self.is_response_correct( task_description, response, expected_answer ) self.logger.info("Extracted answer: {}".format(extracted_answer)) From f879487ab4b735dd49cab3408c5aa716c25c39cc Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 29 Jan 2025 16:43:38 -0800 Subject: [PATCH 75/93] Updated image using git-lfs --- .../autogen-ext/imgs/agentic_memory.png | Bin 52136 -> 130 bytes 1 file changed, 0 insertions(+), 0 deletions(-) diff --git a/python/packages/autogen-ext/imgs/agentic_memory.png b/python/packages/autogen-ext/imgs/agentic_memory.png index 8fc7f3a5933401b89dfa09b710dfc308676c6f36..effeabcfd8276378097bbe84c133c7ea598cfcbb 100644 GIT binary patch literal 130 zcmWN?K@!3s3;@78uiyig7D(FkH)#mMj7mqa2Vbvy*-PKOwwEnqoPBrhUdQc``}Dtj zj!D|n^;5-Ni)l64Uf2WfE2T)XaZq3%Nc@twgusk*h@3LVLlY4pDdC)fR$+|@fU=N> MV0!(k#So)6Kk%z3S^xk5 literal 52136 zcmeFZcT`i`*Dj0&X$O&J6e-d>QNRKS2+|Rd-h)!4S1CaVsK9~HdsLeAUWFhefMNgz z1!)p$B!F}M50%*i%7Q)R^PgnC)(odJp&L&prjDP?Nz@b7pFHWMPQU!QD)@^+<*1< zS*O4?v4(0}_e9qYXH?|Q%5F~8trn1G&rNKf)CeJBmV@-oDJfP@{Prm`_TW)xkB)t< z7|_M7;YOyMh>rzq1g4OXmYFLI3i(xT8X$_2)4z}F!hQp;*|^77inhsqU?Cb(mb+VO zf~KIbrw26Z8_>&w>$MC@7`p4;3krc1jT!v{#jB5ZMv?RD^QR5cD55|VLgxY9*kB+l zV28qSMleOkoo!C_ruC;DYRM919t4^va6QE=Gpc${^22PuKYV>MHW=LJI4M1(6STED zgfrf-m0!tqhIljYqmiK-C9NLgo7)L*xr{$%O4lW1?0l7iZ|!}dA8K~JaOTU#61y?c zZDjSq?Q2P1XK9MUPGm2s$?rv3?&$B@jkS(HoT~L|e**QrHa`(;T$AP1Wbpi}Q{XCM zcBDBA5g))*=Q02O(PrLe!`Ex6xHdox0{bF5^=SH3eYrvhyeF~5lk8s6sEt=7gX(oH zs*0y(0sPm9uW+`@nkkpv&(5oMl?i_cyXQXk z<7KJ`D}Xp8hyFA$QP3koC!8l^+so4HT^?&eE)QaQ;3L~7AuA25#tb$G`1Kpf1x)=j zDfG!#?!&p{L$fW{cf^_68W&Wp>)uw$X$cb~1svp@@pH5K``w(OsanM9Olbd#@mvu- zL*pFtdDT;3+0LAt#A|)d72|<}myM@2Ubuz6dN_q#wp-j=FNN+?+>!%2Gt2eUnDIa; zqF)siuV+G&Vx!Z1Y>QY5F+GI=v4jH&diVnaFj8UkJRwgjU5o1EX<4EH_`n(O&~B(# zOiM2cG4if^GLJTG2V~+?QERfob0cAb5AlU)OC4|AbB|^oZtRjcWI(}`282@(@xGQ` z?_4E1jUQJ}p9r8Rk^fB+w&szJr-N*4-A$jZ2c`=PDyuw&!u_|L=}Z+KLmCo^brad{ z5v(%)=Xi9`ZubsCrgG>+A&*?5QyoNeQ-d{`!F#clRw9wTlS8Icv6XGv__>LU$iQ)1 zOCf%n^lVfPpTgkjL@>n`u-*Kkq6bcy3^`|Nhs9dg)YusKRw*y@>7Z5md-fk5)1&FU z$LL=HLcOm?Sme0XIK4x7$+3+T4z=oNlr8|DER2c<`GGgwudEob#?LXu@7FCZ$_4gy z8OWqw13BY6co_5ViRaCr&D@4#!Zo2MVRkvxam>sT%(m5*mK`z8hpEXtD9D)bRl%|J zv2xG0@ZL?BngC7K`mk(ej+ug0fUg+BYXyWmpjr;yjl=96InVXm+ASq`USq=Th&xZl zsmaS0&pxQ0!^EgWdA>`oUxgQfU%quuPo|d@@RO+{mDSn|G;^zp%0Rml|2LhANE|OAT!aH_>ZGQik4)K9wY(CvsMgfAu-A z{t4M#(7spIx|6TsGOq~Mwv2-mmqS9@)2HkXb8jwJqLTQ+tSwO%_cqKpH#>0vwIdkbIVd_ zTZW>9{&&fAa5jY(T(6moRYorR-^OMGNC$W;GkX4!XouziD-6R*ey(J@%k+ ztg*|0C~jYPEX8gB+gAwBk{AOe1jh{UVjE{2tHIpi6wmwtPvWg2&@r-4qJpLvLTI7R zMidzTS>U(N=!{o=PG9IQ7`^UbxdZugysko3vO|l1`rNxKI_N*P#9TGc8JY5&XZPf% z$6Xauc#zScJP$QjHbtQcko`>6f2zo!MV&YRJ2Ml4A5X zKjxO=KzP)Nrk0Id(u6UCj!xp`{d*Q8IR@j-#sqv&Q}t;}LVqmYfkbz51DT&IwJwax z(z@#4tLSOk>{3!-ae|_bydVE_j|?GiT#&#IkfI{rb7dyzo#rbvIJ!!`%LU@DGJ!@m z>mfXlQ{Yuf!qHp*lKw6|!@C>KV8l(fRoD?VG!}L)xrRl14GrDE&)#es@~eX=dtH(0sjvct%$1J^Vc$S3>gT7amWsP7LpbAOq?0n?<5b zJr@37G|tIQ-W&VWY*a}h4Qz$KY?-^fPo{$&zJK)GgY7AFRF@g7l&1a!^trw}6R9gG z&+Tx3C9$F?rAh_OF7=IP{o+>nZIPw5l&=SgkBZp{bzdj=x1Tc! zu?_RL^te`d(&pXb?0_s!8~&KFZ26-1eG8x5spgucvgD?4I*ZYg zImaH1J^0ldN>l{4$bl1GI=q#1A5;vj4YbhIEfjiye}LHwGe;OGTv_Uu@#t573)^b> z_NPW37RKz%qqQRfRk?&${q`+foH>V85Q3BAu2LQlaX9+$wu5OrzVIvZTMmtL43yD% z23;cJFpHLf-&6ct8wfGy-ZFLIvill-4v=D5d~@+=5-M#PAeS8Hd9ekBRG(|h0lN*2IwkV# ze#5&;+qqxH09jFPbm?BXjTB|3n5FuC0GanTji(U$HlSRe0^?|nJe3h+IJA_dm<$EAgtwpECq17z(@GC~Ou; zD@%5!8$n=CPz?RAHs4fF<5i7#Z7$2WQ?0*j8!@2|2@l_~52JPnn@#C`4LDJ$Sj6*A`mK{2%e>kICZ3DFUtSJQ0IqhxHu60yunwx$zH0n+z|MeV z)k@!>?|y@);;B2ub>wgTlU7WNLR5TtdqQ9LG7uVX{-%Q;z5xc5;=JB({>D?lZ=|4@ z1%mruqJzya=n;ws*M4(#okC85foNRtw*`97fCsRPjpM)V1G`E<+;KhoU)==428uV# zzglu~g#Cu;fA0Oy9Q=q}7wdW%(#(T*xJcJ%`0?^6#RCvlIN<9C`A9nX)v)%YvJSBk#u&PI^o|GrR}7@ZWti znjdnybyNLH@yyd#es2w50{fH2IgIw|;~Hpd2H|rCJZFUMKXs-S69aWSV80TCOG~%T zxpo{_dh)Cc#mlV27EYe4O+a~Rfv@bxjZEex93+_)o&HY)juwPq@|mkOt%@yZ&-_@j zQt>kAXi=BNtH2g7tv)MqyZ>6E!pC^kcdLH(c#C1JzdNjt$iLMZWkCnD^B<rD8S(!8R{JY|7!JiF`oDa|h`iA|^e-u7CM&2|9pH15Upx&o{au`Q?Wo3e@>`AqYWBC} z38zAz^l#gb+#B@7)byBTXZh;jrS_g!pNx)icg!Q}qACc?wq7_y?lsTPUKc9toB&#? z{G}zJ8-NVotan+tQL}(o)p(YA&h1K^0r?rS$S$dmk(&?!n)qGgo^5@hd2A7dc;AN| zg9Wr1+fHn+q+)zdIlfE{Su2jLX3ebTK&>u)ZYA8dePZGB`LWbwH0e;te==k$@WwV? zFs&i-NB~hg7}BM`IG?LP{#Mf~|3M+a+id3OZ+q|DnQ^=Uiub5+y!1zZRpxpL{UbA^ zur@Y=r1$iDxZ+dQk$!9D9_7qi&Ef2!TQp6rZabKmt9CMH4EVxU zBT1|WoAc2k{f-{@m`+sM$#0tYpxLiU#VF9hS9{w)Azy8vS&R4 z9;$uYQ7;@0;>5SDeTdlSc04YTn{xm7ia0+6@R8u_lyZ_!r$rRog|gTSLr%6;7_JnOx)*Ou=*d+z zt+erL)_YDdM9htd%7$t;AFT27K#r2G~A&UdX6PP|B2tsKL1Lc^2Y@sy15FfW2Qs40tw zROuRmb%^ZxSO}}OZ<7A={h1}=VpFJF+v}xrx-OerJ{-hrk@H4OqM3|06$kxdB^ph) zVUu*i^9CelYZH?3qS+~6ubnPgX1JGP1MQSE^8{p~Pqy+L3c&{q`g4SyR^zgB-{hPw z`g6f2Nc0qQMmEv4(KG?ZjC&7+%*Gd+yH@Y%Y~N{nyJ%M#n{}_nQbzK+mymY7jrt$5 zk~_VBY%7s`_e!LgMn03M_N9>t&SJX@1PpPR{vT__%GHl6-zyxF<+ICcGi`{fJgOg% z@5BoKc&9L8R>N1mb%w67xEr)TwT5EwqpVy0faH(-8hA(4Ib3fE>!8dRUyojwGPfIl z;xvqnybX+?J>cNX8|I+>Zhfs6;r9HHM1{a6c6U+@uaLSl66K(PmUr5-SPwC!LinhuaUs?(Re8CdWY-Z>*{N~kkXB^0z;8$koytU~ zAo7G(7DM~+)>CYbcD6Zp3exQ<)Op!^!76#b5J{?A(saT*iaWm>!D#lA|m z{hK#P%=;d7c!!qWbE#=p^ZGbi(vAE7Ig6exWw(>gB<&<#s9lc3Gjr%`GIw|s4~1a6 zGqh?a^9N?*swF!)%lBLKJ`J&r_o|Q``WP_?XBJck-(1*#dYUOY0)T%?Sy}rPo!kR> zb>Pk;+|5=nTeqe{TmDuKOQQRs7ji>6?RfIV>GiF{?L|g}%ByE>Nn2(FraX|nUnos+ zRD8S*Z(g!ePx$1+6AI!!^R!i;CUf>)WH|18-nK! zzq$o0ARkx|P$iCze?*?nn@Xt0sEs6>G5dw$Pu_h-f}ITUxif?G%+E*0~rZ;kXG1V1_F@R^;BL7MMPhF%zg0y?B z=BFoCf}gA1y*H5=@5)z|Q78|HH60-jG_t^zc;`y~mu>&%JGh<~HF)meCkeKqu6aJn z1|6*xBqXmE4SNo>)<^9)_(M|Y+xlgKc#8i@J7*g!m{_P#Ii7CaTO~*@dOVLL>vli* zk0hb~vZn^i)VmR*ia5X1%^&++7{dwowwO}XY)54x8X1it41_>@;uDyHRCnr8;Z|11UOfwB=;OCMp!SRzl z-}}6@$IL3AO{uzJ)*5=tXv*yqN4P(|q5N|`Nwew#Eh`H5MA^rlzVdrh2~}N!kcv!3 z|4$Qz%W%^gKrj#mJdb|5#Z$~_sG)0BB`wxpWp2T@$U72mmUECUlMImkunqxk#q2k-(Ao(`r z246{(rg=hTY|?@-KcqXZ%Rsl{8$LYFj^y5PQ|1(S>&}-OcB1JLX|#w$H6}39fPd`0 zkbRg$28M5CvTp*2p#W^#H}lC{yUO+pmB@Vv6H-}s_|hHKY*uiY0bw=9+GFtm&~tin z&{%v6-*M$$V&#V#vnSla#-iLz7nnqs?%J0t4Sc*a7r`o(X1hJCm8exqvd}_f?dnM$ zBMyo=RDW4Y9dzosn^)+jnG7nSrC(LYqT_+h__5i@lYCTXdaZSwk&I!P!Q8uk$4&*@ zX<(XG+xB^JjhQt)4*U?2QIiHU?%UhL0hK+*_pp9Wr+HEFTEc>m0TqoPgr#iNyMfi+ z_#S^4ZEdo^!XEP?)9n$5gdT%3QTG#w$U&iiH=qtr{^fHx$iim6}Nfwo1)0L0*)=aZ9)mqy1E@wuc6zz4?&Lf3Rqw zcRKlMj}2K=nB78fqizb<>xinT_)f4`3LCn(rF)s=CN8Y6_>Td`=VEajf#wP;M zXtCbG%NnD7`7c-?0!VSuijb~N{OI@Q%FHd_FXPflcI6g%2AEzKzNVSwqjSva{WwQ{ z$clIIdl&o0`4odf;uSC%rC>2c@0=K{jPRlX@=mZ4D(q3tH@|&JggHMNAd5 zH5=#MW$qwcFB1&neU@tgadGk}#3Bl`BD4y78cfmHXhdO`Sr)W*`45z!d;cW~d(OZ~ z++-PGD!&O`dF;{9&R?L9iCI-1Ivs4CERv-wD;stziWS`IZ7%YPHci^x7Qy@-1_5|U z;Y?_4GOq|I@2D!%D+%7<$=H>b%_g%N7G~3SsH>wm*R1{}#{EzfhiG60wR1I;K*Q;7 zNn$q{Gm}X{%A;cUVL#R)8=7PVk6$(*uSOiSHtaJptuXA1Mc{y+xkF6WT~lyPvT*V# zf17ZQ8Gd(QZE>%kaEF(*!e^0&ZrKFqAwc!5w!Vvhu9Sz2jtic~HUNGhT!-(#b7t0akRF zUK87Ez|!Hbs&y`de`0$fAf>a37S&;qn=GKc;?o|D(DrP(iqvyn7Ge4sVd_m|V!mjG zY1Pai0AeE^qcg_xAz93yqg5%m=T5OlFF@nH@U z4%fP7(b~Pq;n0cwnoM2lFN7_}HrAH^tZI@(o^_xfL*`Z^% zjb5j|1_V0KIx5f2D^J8PeLbkMna$Z^5p%soSW29g@+S^foQPL7)6}QcOqE__%JVi~ znGe=zHoL==7oA+Ml_6x&Y4UC-zLbr?!)GHh%4IyY)67GIs`zU<;-NHDgYOMrgd6b! zUPefpFpRlJrC%FxV(y#&5z58NQ0Ate7HO?6^VNsVy6xcV`jx`II@~hl{TxohlcWlY zx;l&?Loi_YGd<6?L`5DNuvsGksRpkoZ#WFCxQ6*@x|z$&oYNKr+?Dzn303~(nqgk$ z4em@1%~ed5y4gc}Rn0=>!hyLwU=#$#wE(C%Y~IGbrx$6TP-#H0r5bG**OaUBIS+A3 z;JDdn0%#W_28d)j9M3G>Q(sSZH^b4H#9CeS`%vc*+;1Kyy;CJ}RJ5r&&zDW?EDJ)e ze?K{s?0eLbOnXwF>dI;pc$(wn6n8uZ0(32@26~VEHH$TP)e`Nv*+lH|(2N-CPEot}H0s@<)G6Hu@;jouylw~GEc4QzOXRpY!n7Y-)7E#4454-~ zf)nJSo@+oaTkvybi>8=^4!r_D7~HlvJ3jukZJ{RH{U@eZ_(dhctMU2lp!l}){_jlE zcV-`Y%1m2U4IQW;z=Du)`Ovv9mArtTF%XYpNdqEVR~h1l(YBd>GI8MJ)LaErf4t7N zkiAueB`98m72F;VEWC#r@Zck8skqcs@V(L7%?+m|xB(37wc{Ugez}aT!g=EUON%$C zI9pdfY+|@p1luuPq%2D`rGQB z27h4tZu#S(rGl%hU}T1Vsf4t?1Q0prq4etqaw~0LU3pL?;&559rEbfqla3T9YBVtW z&{}sV{oL)&Y)#9lZt5KAewndiN3?BMWUuR&0Z#=R+P_B0do5n;#K%-$nsE&lQ7sWx zku(X%j+k2=sV%w+=~Spa2JL3vQHvS@ ztg{lRJhj#F}!c>OhLfADSiDx!|1>aC+2H=a#3$!2@selRCMsjl4r! zH>|yfCIy;<%`Vzexzg7v^XqV8=fXXkyhdTyNCPGWChMGn6pjaGq%zlwS0v563eC6^ z{U-{YmSv@TlK8UaPwqFy>rIvn%qFq&6DiBa=7RD3_So3mhgYK^dM(G7pvFpb%&Oc( z_6AinR>@!Wf~!IIMdPFRLjTyuSKdzU;%ys+1OdBmPrxrr7E~#v)!)HAK;JPvzwa?m z7oZm`h?rS(6bM{bq+ytxDf5Xj8PF(5Wh)q=O>^n!8E{Zg|}9AxdtQyNjo7>@^l*FR0*i9uLAt2GTdUFxq-`CdL| z#u~wQ*XSa$xhWKIu|5F2?C(T_J_~^<_AK)*0$sIqbkL~xsAE*M(o(sWmB|T>3HigA80x!+7Y&UN$_96Pml+EDog;|90_5tHn%pcsv=;ojRM8~kW%!7g48Y`eqraWmv_Ne z?)LkBPv8!rTK#XBsUiqowSDffzWR$4583|rA=_b?^;}>@|Ce0VtT;q@I&VF@h01-I zGh}-oBYBJCA9RY|UHsgr=E#fejF5b5tlx0PS(sk@DsCYksVR1B^v<~X6n<{TalVP+ z^v&mrRoX!L0KkzQaWdXu0_#ubxvxwF@Lt)CaGc0XSg=h6$_4(x({voe^V_(|^=P9t zUR;9_LOe7N^CuJ)_fz)$tiR`_1AJ1sfh)l_Li^1^gyGqdLbYGV!4M|~sbAL*kt6kD zBvoG{pIgKrZoP1lqtnH{xGC)#%D_V%NB z2rICw`iXL!vVq`o`(WQPFm>EnCphL>8xIrG#oHF0+x+`r6GT{q?LRsx-LBonfYENn z`lmNQYXJvAXr@`^JLmh65gchVB}_E9C0zF438^y-zQvkUs=emx7!M%u`KNM*N(7@2 z_{215Q$ws5;~zyC4CZi9trEhfiIsQz_WX3$5AsFEd*G5F|C+gX&$n|e+uH+Mo=ld9 zlrznRRa^jf^B0>-cmRD18t(A`zU z<8^jV7TVTZXG^X>#X4+B?e~TV6aX&ZNQOV!pQ}Cubu;SY#l?zaVICI!eG9=(^3VUw zWM&WjD&-$Ks4-sE8y4cYZAf|s1Kh<^s4TCny$Yh-H*+>*ma!;Aiw2}$@XFVntVv9V zyrpMt++dQ*R#y*>t`l5>5jp-=qP1wqx79k;*8vr(K}lpmAOetOgf?~9;%D`Ayo2*L zx7-PBpG;6(WW&Zo0>N6DxYSx5G{x(>vLmtVH&tWb$S0)mS8Q?zFx)l+H~8#UByA5G z>K`e}oVWbYDc}(HFUX7aj7N?by27d{u3G3uEe0CD6pG zmE0$+2J{n0A%L{)11Pt1`FEKCn@5B0L_~XLA|3_^@N3o6_21*-kBH{L6;517-vU^m z?|XF4(`XRfAdM>Tx{R?Dz)Z=1t0yuJS{pD7Z3ZJkprtoFKH=7B{xoi70g*o zny^bClpe4GHRD_E&soM8P`LKS%>5_(H(3CP)Y1U%#i+`DbL4yO13@wQ>1v0GUA7rx z?nTRB@LoDVeaaGL{8Y6_naF&CQT@%YQco{wwyP;)LIk2G)!o$(qbno~`kx@Amy7*bLuNUeM^uTtPK4&@al2cY7p^xQmGNT zUi}62iNW_d6&*bxEhl4PdOMwga4i-XLrIXnuUf&fBY-Q*`3ECJi%$B0gX@A@P*(No zZm1DOX`Y>_K{xHTt0DQHo&&Bzb@PQR=zGmq^6qV>0ax3?ES z-7@PbcTM{khU-k5fE{NABMPC`}w`m6%~wxCFtE~)Rp31;a#DR9+vx^Kl%^e<1Tv=mwz zz94HP%($ObjY?X*Yu^D+yMi>hacINJ+wTBW46vRJi~@;@u1FFmqH4-h4@}32*2nyG zXhT7(7IVx-V^qdxBYH?8UMnFqfFj0^3u46pR7}gD)Qd(Kf4liIkvj?sKTI-O>OeB; zH*%A@w$ldx&c2}Cn5rUbz@7W-Qa6Rd_x{6fnQ2~XoeqLL~HYK$+M`0Ut6v5K1ffyK>pLspts)?v#8 z_r9vG#@HGI)%i)218UK_`b_Ii55uE^?%&wCfcY~ZYo{s@ME<290}!`!thi^*M1}#E zL|@_UNG}#+n;oR*2xBFm=1h&oh@j}n0U>^fTVYgGG_58`l(&021?+D#Xs=O-FEm&* za0&9y37*eO!)RY>r;Jv=1RhN0^S4-BxC$n2Pv36tk3KC*wpgv+w=1CivMVr7R5LeCF{&Mq>yA!iReXH3rw_HJNgEn#a)ISEoMPx(b$afTB#&a4L1DDdE&#xxbxcC zl<-T}-47)C*$vQ<(gq{N$`kb|;rsd6ZM^3Q@7J!)v-y40AM?AjYI?L&z2xus#W(L_ z{WQ;nlXwG}JO-l?i8XR5&AO`P!C$NM`(4iD+R==xj;mWf;a#~z$YRikL(fW)_OKVl z%Io13de+5mssk-W<4Kdni8UF?y{e^ET5-{OXmQav!EYWtH71#DY-#9FN z2lr?+bwin;!!@P=Ztv<&Hp*p>72bW$ds`}nDUY{VkvsC_!h)JYuue_Y5yQQv;e;sD z!HR&16Y!;+lc3eJj1+>g?qghUs-@MWczDJ z(&^A1(+(A0i3zE1Z}*qlFt!QrDV0=9?xa1zWfDH5jb6a=U#~}VWPXhF3`!FlOrHyv zk2LWg^&9O&yhchf>I6l^2<`osqTAcJo#P-WTf1bYBY7AZtsBE5(*EEnd2^v>6jl?8 zS&qO}X@{1K&kgaHPDuJ&as!Gs{T>8GY-MVM-PSV@6TlYdzTrk?_!+1yzjjTQZ z_1%&FoaC{sd9RoUQ2r3`uAD|sw|?)OWc4W>hbR&P2Wnoo9m{IhoS%$$_Ge$N_7dRlU4w7U#@#1 zK{_Pm)f7%(>kBvz-pt;dPdRP!7>l!9Oh#JOFDI6$p%-kD2Gfm)qhK_3QW@dc?lW?a zYg+_{qd@d^XRX!RwS3;&qE4D0aom1u@?KgLNSt_PLC7Ph<|*@x`3Z@f&y!*v^|orq zrCFG=0C@c;R2q+_BtVJ23YcMJ8|2$)G0;VqlqY$r3G_SHh}=)FcI{gu35eVTFgI2q zlTYDpP`p5`T^w!FGfNZuoUfDT{B!Ee7GaY&c+;~0i}`3xUzjuqq~ZN=5tO45(yweI zVmZ`R4&4i5n&+KhV~85MSito0VR0iC=#od6D(x<|lfD^9P8@ONesE8{gUlVEW4AH> z*2K*@fTTNb^^6sxE#3vKee_x@QOINfdn9DFd4Sk2;n-*%2i0Ug-|A5LBFdIOj$@=# zE53f!ga@+HCS)2t8H&CC6*&c@U{pWq;CY+3o&&qT#+C6<2-t4;-9K5~DR)elKv@l#5e!^DS&gR& zHCI^c0VRBpGLF6us-G^ip$36RiHTKc`ek*$AF1dv(wMi^AsiTN`1iq@<<6GR5-PhI zIM*xe4OmhZqX0w{ZnaeThOwgNz%n1cUB}P{O)i(BE<^3<>9;80oE=VWu>BTtM@O zsB8swsnI91z&j%|mIzN7QKOYJBJEe`k?Y?~>Ll*;d5%@65i|FScH$2pSR7gNDf(&g z9V746k9DQ*9aF%VlwuKnD2S+%R;B2f7(SmW^0M{)7=taq?QKhl$_yaKIRWVQeI?6} z#d5~;2YZ*Oy)5w&XqI>rSy7yF@~ncL6S}+o7EVxKR?KH&@mNAr}CuK>q3E8;KO81Rx2=qiQ4zht7cg?>mY* zuWV_Id+_5s;(?N_%Q00-S4qhq0;3P6)>1qHT3Cj_GypkrZ|KeIiAM~F*=Afr@fL8< z#44 zT-dfcpLuW(fTRDOAfsq&)2PZX)JczPdT8m}T@kEIv3~_g#|5bmXB%1)a-{N=!_RBp(o~ z@XwCi6(!=J+zipjN5tAQAOs-`6qV}waUr`nye@{VHuw3ye(kqWy7oQ z3q*@JyHNr=z6RTF+5G`z`MocE-?0p za>2xi7_Y%s5;{Xi^Oi5e*E<2i4hw*l9zf2~qxEdEi(!*lKjuv(BbnO_qB4QEt!L%tO{0NQwU83X4O6C+`H?#hTTF^^h{cy{G9?dlWSZPSOr@Bs8@45<#TV>@^k|`>v4E`rOFQV zYO!bFscDW||ar7Gxf zATQT{4V9GftzF84aij3_kO6vu5+^7yP!xVTnG>0JL*p)1D_IB-;U!z<)f23f)j9sL z+9N`90Gl4wxYOxbi!U7T+buKagxyQGoO!cTVK5ivZ%VsOMM(ah`40enl+RiKe?Y5H z{^;k%Z7WAp!#BVIA$a}+b%W<^ZZ0e7TXLoX3|}LCOr@$~5(=$T1`b~^^jByQBNQ>w zVfF50LO^u}fu&`@ZJ3w=HPgzh0wxAK)a7OaV=1dVER$`w&>COeLOMUf9m2&&8oQGJ z#EgVf&M=Ye5@-m?-&%!K`H%kj%6~IA9k1&4Va~L+>XGuXRW+Lpab$TcHBd{pbJG$N zBV-)ME4@=GGMTrL3)J{K?&+?rvZ`~pR%x_HgWR=oL50NHz5;vGf>Un`#kGCMqwy&s z0OS3S8UW_Bg36H&QPATHF}!|A}nxKl>M1_zRBrZM6CySE-^8& zKtG^xVXr>KUEN`hD5bx`HRudJ<1do~WgO$$Px4HtwjeM`y6xR+)Jxb8+hh$uW8-(}Z{`T@A(@y8-BY>FI*+@ScahL);0&kpQ(@FlI*4 zv?!4A#=?B-RvAfu%$tOXsS4}#h5H8?jO4vO!MV0(eXVJQKm!ZGWs|6eBl|b=VZ%^! z=Vc}IEctW_;4lkv*~AkkZ4uTBnMa_`m;^`W59D7)#MQezTuyyn%c^1|U<$rD;$aP38-KAT5Ct7FMYU z1{}X;UgtecWxk-w={7>HIsuS+lS1HQ7+Ug708bLbprt2&VY{BYbZ9>|I9gN^wmJsL zCr|+Wp=_2PqMSz-9@k^|LICbAJN63+AO_&80y-~QylbqVGIZ5d#yL)Q+mLaR0wN=1DLpAZzUR837oi z|5StWf1c0y-&lB4tb?aK4<^@jd`Enae=iEECBQ)sfZ7kTL2v|W9W;LYu6d9P{0@*v zwLqOj(se*asNYIOfrR+}zpABL>gLbRQS$g+F{RDie;gO_qozv07&rrJBLPN@tb;z> ztMLnH`(m*(t;p1aWb2BRo#2oR)&qE-Ho*`oiuYZ}>?d9rJFeh#Gi2ITkFQCu0;cUk z-Bve0y!o|0^g6(Xf1B8Qm*3O-J{6R$`HhnT5C`pKnwWWTxmEMbuAt??=!cU3@KVVb z|22?N3q}2jC{xrEug%^L? z+3)8@ML{2;YabmUFjbR(qkIOGvSQ#SsPJ1`1CM|IG*+?fhJ`VDr_5Tp421g+ZQ-R?FL#n<0%Skm4(VC>0=3$IB z|IbGO%A@=H2b|e|tqMNxIbeUIeE*`gW`j1hF__{#4Zg6>1#zkj!uHz<3~w^(6>BUt z(e;Joaey8Gd}@cb<&Q@H_*bLhH$?ZjC)2;TMDd*`b}#)g`Ty=IFUlCB9rSbj5*Fs@ z`r_|Xh5zjY2Vndh7IO&;L?iN?AN^oeUk7#8``+)4-%h3=&+4iPU@d<>dab?;T3YnI z)!)yB09xg=m}XdT`Pl(ZV1AX(?4gGs!|A2SMq&WFa9L} zZzJy>&d9|4oPjW4iOs)v#=z=dlXdXP<1QiD;l~QZ>6OC~V&!BJ6BdX3=9 zIknw(NNQVaJ6LWTdl$x_VCIzTO`X^Uxvtg2rX4m{5H)n8G5)K3LeL01x$^?4tD4L zFDj>#b@#_vU%=pK`YS@Ei;YoMP)G0;Qj6k=o`RN};jH4YBI`vY@eQa9Lcx|{0 zueVmT?kzdkG;40oE!!O4uW-i!-QY)MaQ~NT+o`H(NFBU#E4Mbvm7I&{fX@XhJ1ok; zO`lKf;-9a5DBsqq7?hx*h*K($0$Hx+(WbraQ%u{Ls#ZZqp&%A`pcvBAHtW6r6x);g z(TF_Lx>E+uz&c8Mk$wH6frBDI1cM5nX~NE_6at=0Jc(=*?FIla!ldNa+uGiMh8nY4hyW(F^Il-UB2+?QOaua?yaHt$&MjZ6{4R1oM`(`+^u2<+G2^WUjCC+ml?x?S9I`=rK707Q-NbO{;-|LR# z3RzH#W1Ayl?GmuEzHWlq(7d3X=}7Htb-#$c4MZEe+($jded(Rah5mfI<>88(vPh6k z@Wm~w71Vlb?f$}Gv0e?Xy6vD@ZYEah(dMDz;ih5_(R<>B_p2?M{2BcqtZj^Vx_IN5 zOL>!ob@Ph=9PEl8s{iIw>9-u-7)Fl{R#Pkg%7%3+4&Pf|E&1cmpD2d5l9o){*yOm# z0}sQ+os|-J!D;gi53-w|i2y+~GH3RQ#mDhjLZ?@KGvjoqH+Q2J4yHkaG?BhK{E3Om7?{y0oV;~c-oRr&Ufpckgq{W$+ z#~5y=omL>!*}r9K95%5OXXZLh6G=&T8d?n$5H5VPOk$nenMq(c!JhtQ?_j$v=l(UO zOY7*(`0cAKZ!gi`%|zn3&$-tpkybrJm#u+UoLVeBTl`jDzb8$+ z$pB1qnZM@uQ2-Lv1tagg1Sfw&9-v)UcFCiL5O~_XZR%U@%7n(<1e_dtrwk$w7r%&h zYLzvr6Ow8C|FHJv@ldaC_;|}{FI%LnktM_wN(d$Uh{)K@lzj==X6!1;79vcxgzSv$ z%ve&EkTtSSLUuC@F&NA587=30KA-db{qz0)bI!a@otgLYJkNdK*LB_3Eqrg{nQ5+B zNpP)P-&FD!;Orlu+LUD&|Xg;)XOtDvRRo^=i zoOZ!izKw&GwgNxYOfS@`7RiV39!rlh>)c4)M?HvM)Ke4v=Mrrqa2%D)deF%pVp5@$H9b;Fe0bd+&KR7-s8hbn&(>^dumQ!TQ^~M|F}$Ht8`8+T(>y9MD8SCCR(Wd{}nG zDJn*Ozq+%z>`#DulNy56cwy)Ola$oR>9PWy^eIH=?|PcqZQ1^;8?qbybmhZ$P)b$YpwaKJPh~I zIjoLVmz?0hC5de+eb||SwG0~F2rJw-KVX%7Yu-r5VcNTYT<5F%_=)oOM}_LYtb8$Y zY}kb{iu>nm%LN=NMxWNN-#%@Oets~VYAtZqG2W!ABrp%?vcyk;w>fxI7j%JZf~5wt z???MKAn6&(T9qR>WK1a*O^*xb7Moddz;}fi$IeQ5kFec5gUgHI1;F_wcwWivvH{Bk z{RS0Y`35~$sCP=L#AV<)14`WZZ_(yUhMAyCm{p{NJLaa znV7|peR)`ZE^6WH*SV))i#(!g5tu&$^&~J!xo)@HX=1|>e}kHKG9vW2DPxYfA6Ze{ zQkdZU^Q0qmBis50TFs*O=4FH9#H{$*Z=S|E4BVhAY@qp9z)HZY=@v2 z-j@`D=cz~5gXL(#VDeePHRIepi(s&_idhEwos8$P??EDC^KG|8qo&V)fyy<0S8Nv_ z=o!MCtdq^7D-P7njBS=D^Ou5X-rM2MtP3i7Q$PUYZ4Dp=Gaw|=E+X+PVtj@t#*7g0 zd$JXBbc&v7LrLAI@-BdcO5T)8L>~JNS{sjm();o*xGqi`#5|=G!NGp=uX_?_jXYAF zw7l^~|M*r=rau7|v;THohw4LJz}fAot{yO6(N98n?_UtC*(&5tjk0*JKYGrvI=8D! znUnqG*})9WhdNZ}8J<(r3Y%JgL{ZD^lho233S4vySa2F`4~oBf&NX1qq{EW>J2u{^ z@wO_MH9T~@f2KjA0%KBk1~dp7>-8$z^odHf6}zwW>X`x01^L6lyAGPqB|t?&Rr38s zt?j$7orT9vf@40Sx<41E;mQi)!*lWzBNE`m=wmLNY|~O^XEK7ru7XokLw>VAhwWZd zip@dNSi<0d$jOXhbp?+gbfH<3XNM{<9N?lF*izRm9YmFC7jT41sib>kWx8wi$_LW0 zGZc60bNFqNUcM%&KC^8I6;!TSiYcg%4U78HrUHJ_X9i;)XYGv-<3Tiru}*P#e5aSN z9~4K`(<|4%c?c}>qAlqlA?#C%Vz;xgQ^EYN4i5$fsAXy-F-$4Q6}UCDB8TjmhswjQ z44$_Z!|H0vtmw#JKOUO{$_ID<}s98(atsz{Bj4lyv@V7DE?rbmZ3UY@3_JT;cCR{#$-J23zPBm;rw z@AUA$wg#xe#LOC>)h}@;PMjD1_2#|f#BHLCKxAueIe6qLHC}U>L|f8BLIfX`fhgYp zW5k9HJgcuR+Mty!L|~{vfa+;f_pf`84no>o43tw>2^YAN;m&*g^sig6>B@tqDTO(E z%@-=)XU};cNvc1I|3E93INicbhYNv*MB> zOwEf}X*CZ{8~LLg|F#;Rqk(1eg}=R7|955m(^T+(*YbZj7<>HZyV?Jr7Eu4Y760jQ z_kZ=ny%#~h<4HLoK>txUJISVl%=L5G{#O}jd-}(>)2KPI2tHqU$c6gQGLMzvA>isE zxT78kP$zL&e@>P;2Mr%zxhx3wXQT2+AmzkBmx-*4c>~#@bWE|xD!Fb5XHZMIIfe=Y zsF}xqU*L?)iW;!Ev8xCr&QF}uJ&OR%TtBp3NJ&1A?N{PG6%~hU8Ns$}yD?&;S80ze zm=Q_r{w%zHPcGmCW*fUy61@0GdVoRcpJC1iVaLO!wE9<$E4km}s?x zFH|QFQU4qo=$lYqLz|F22$ahIZSD9m#JU$6WTV4fq{||{kos5m#E1U* zGpOq|^t5Q3?!R8nlY&{rekTN=?xx*EwBkz#75kZUCZ?#-D`+hTu(ss%MEBtEDbrfo zg#3TK2nIwibS;W5oB|vR`$@*BKODMkneQQTVv~WG{(9P2t|xt$6N1{T`3f3snFrp) z|E#+rp8Q!}h4GsE1drgue$4D0ylP^4Kc7gX?h5E8LX^$a=#EW|E=c{Lz{aLh4RTHX z`rnn7r<&`TeGrCQJ2-29in^jy4SS)N#qCOg>klSFLJdpz*R4e}ZGcMYg+4OvI zmw)@4w~rO&dD^Vn?%EQ;sh=mQpKUn)6ODL%${e=ReY97EY7$gM{X9?oOpRp!u~zdx zrp~4_<$&uy@Vc@Yd>;7Mdok3q?}g^->^=mSJK(br^)ofr{>S-L`X)XHt(*OGd1q2T zXZ_m)wpI1Yg&XcJqCN4+tGd-CYX5qxc7u0Ty48N(14~KfZ2FYXO~cfGd+k}5?ejft zN`F3;6N84o{NM%ja7Xl?|cHxZ3TN0;HbGL6Fexdi0MSSV}bpulnZ+ zV5bEZBGc~`!6@Ok@i}?;Q=AwcRKC+7{tA(k7QFeN^XMk_giui^hqlXo#|5oG0!V=S zu1{Rw6FCd8zhD04-&M20g2>-p9ADCOz#5|d2z3n!=*q5tKZ#nXCS%9LrQ7qdvpJO^ zvCesK-ye0r5z1U{K0WRPsafM@$N*lse^AsY-IeZ^pNqru4XnDv_V)|gP~JD5o%zan zD$zxw6Q8M4#Ct;ZCBScb@t$+}#PJ9@BtWfptY|}{4}$>u(=z?JL=Ct&%+HIf*IVh% z`Mq6c(H(L-!+5to6Nhp<(gm@-gg)t=*Ds1V~FUjC(zGl^q`}=EF&Vm&@yL)fX zPayJ~PlbCfrrFmJe0`d`y|(#>e1^8je*9<~rm`yQm3s2)wPXlmZD>G*$AVA1HLzkD zP1d#oL3FEbb^8;!6q*22MNWH7mm8o$Zkw=w=m54JphKq{29?#EAEcqc5ZX z`TKdEDT5XQAWGf0Rv;}kKz1T1z1eb33CZR0lo6(y=W9MfKM$RC^RHDN4w*^F9K#Ch zO>~T|VY7Yh)+(`Tx&4?W>!9~?+fen_YVud<(}>AeUPPz;2ai7p4_wU~s$_w>PCP_-rXw>-CxfV^UP*rq&j2DoeGuwGg~ zO8gjgA}+<9^C!`G4tvGn7Te*(;USR+RvK6Pl4NUgrQL?JiS?|S50aYL#>3CJ1xPPp zZyGn{l2L)jhd#4kEb$+5{7B=laAOE7b8vf*Aba;%`P{=cSv;E9KF<=_e7(6oWQmNu-cX;f zwOztfsz=G+>T{ZQcQuxhWzP)PYsw!V!Q$d(RChLfnmdL+jnte>l=ITq`%d#* z2y;pfn$vM;@b}`gO#l^jN0R4kHc<$%u9k|2we;1jmfpRR0Za~wD!x`SI8v?%mfnRiJNGp48wVJqy( zMkG=hj>?jD?|CXsxZKr6Qz~r3$2Kr8=ZWqNJwqzFH61H#9jp zjG0Y1P54b9CK4ucUbptf#{k3Y98dbysr*LTE0he%=%ng4#V@M$%056?L4`$ay5C$k zEU}7BE%05*z47GJPgkyHnWyB7GJJKOLsU%X-bi!l$VBd1h z4dQ(86z+w?JVRAaZird=UZV#k8v8~!NCnI0y;v8gf`0DkyPV+g zCC}hUompCT+GUMfURI`t08bq*vH7eWkq-l&-#^by=<>xK|Bm63J_L%IE%#lM8dyob z^xh3O9`%ivvPe%kWMWpfu(Dham3*j3P#`E0lnBZM6@n^3jQ}O66WYpdJoE$_ZBE~{ zdyA0VJ*D*$$OC}4je?2CwnD&Dyn<@$I%Q-XODt0|z3wMWdM_R{A2d6{Pk%oDd+MlKle)-iaR@7SB2WQm@?_Y(5Ahu@p z9LDN{>na2%me!||51C=2SwAZ>ybkXf}J zh>Q3`cnF;GG)P(+6RW$*zGZ35?s`aacH_hFE3{*h`;0xK_+5g-mV`6q#CECl_Z)WP zOnO}9xDsJpqh|DOpPM&7>T7*FF{(MU;wvh7=AZ@`XyX1F$9f4@z$)qZmX*>U<1m~$ zN{k&bo^$YWj!xM8bb^Snazv74Iyq2~qiP^LqZkju!OXX`H4mgmMkKv;tPE}6HZijh|YHC(6%X)NI|Cq}3 z-|6+6o7$+3{k*i1C|j^%?PFN*rGQ}VHHaxI*y`N2t}XE!)Fc$F8c_;>UQO$E!Zms7xCjkM)A%jwG*%T^sWX|IvFtGUR$)jVYWYFP9m!#{0?*|Nz#t8vm> zKR&k}jXcjnQ50i<8Lp&=Tn7ic;edS>gA&7cknr5YO@xLk?kAUB4rPYWV6VpvHJ!IB zs4fGQb5je_$(AZ~{}~CjIrr6p6U3puO8T*ZLr;>~h!9e}s^ZimcCY-YRgRKM>HBNp z#I-I;rC1}%nOReIV5N59nuMz*3PnhKL~K0pVc1?X3`xF*%JupdBaI&}x2hP~+@>cF zH$@yW-Jf%FN^9!Q6l_Xw%3#W9%47=BccX87Oj?76CmeI9DHnAVlYmLC&QQEgFd@JR z2=XK1p@8E$KqKU3XKP_dXU1tJ9UaRqrj)AqS;B75 zQ1a$?{_L^z;hI^OD0$$Z7#k8kQ$r!Tu-<=rqXC~^L%}a#n_uvk_f$}oO+PuP9*z^b zo~>do_;-geE}0Q>X~f5hkTD9)*KT$n0+Cqa#O{w?X|*!`j!6)^nAKS^RcSPQv*7E* zsZEDiuhkEVM~>_X3&f;hGBI;9P=_O!VoW&(hp8QWH0VF5SgjaFq{Z`N5w0T_`ynL< zeoH-|lr#rXBl}WV!{d9Q!exgWAKKWtH3iY18%EdsNUz^WRf+wvFe3Nz)CR5Jnm>gU z;g=`~50{)gP5zv`ExtmQ7d0P!|HS@l-^FC_rC{9R`(yZ{LV3^i%V}?w`)sZBl|xs> zP5*4L=2t_p7mhb#I~iLK9mn&MObwu`SnoTio>JT)m_Rj=IQvv?xDUUyR!aOj<>(es zB`G{DWrk?kd%>397Gf)5D`%^0t7)rei?FpSBlI%hl`}`Wdrh{#si20U4gg>I9ThO3 z1k^4+y(znEtmy_%NDRN_o=Lresyo2^muPB!C54E?1FmzWUP`#yi%jz-6bQPlFT?{@ zQ0O`PDoX3hh_?%Qo3c=zE!s3W8pw&PU2A{{N@>ofx&or-f^@hg)ywr~{?{u;8aPn* z3_?v5kkh5UW9gTmt7l3SJHx)GN^2a)qOCWdQ{XSZarjK_CBILO4cFWE{E)4qEy~u< zHqbW07Hyksn^{J%1Qs9dX5q*!D`NtmEaf!!!Pq~0%*G4uIm27Ny)l;=5_UcpgSG8k z$&kTYzj7ZtQr;q%8;uWtqoJb<#B886I3@`7T%9*4qjU>oCU#>;@YKkvZ1dZ9xo)@s z$oF!!_n(A4o$$$h`IdzFmoUuobQ*qDra1YP0bWLlRIk*i6j5qcYE^1i>Nqr+t4O{#)dh?)iya<(9`Qcs)FD6j@OKNcjS)5C zhBQG2Krw>{30pS?01$iA&P24>Pkn!(sSy00F!a_s;btI>z3uwn!OdcLt2^qhZtMqTw&(6Mc98he$sSf(H=hN` zp|ChP@s~`>Vl(~B`VTE{5{&=e%3cvA=nu;C3mfgHiA2wtf%@v>Z6|*2ZH>tKRzN78 z3<_Hf3b~eg!>)HLBk?$W6NgtG1+ZQOLBSB7rj$SqL+zj-e;TQeTN4n!Le2vHs`G%Y zC<8>-iZ+^wB})$`YXFj!@dZ$4@s{)J1dAc(`*WUXYW>NTpi=SoET%t<58petdaJ6x zG{zq@{3S_xRA0hdH~H?O;VtNWgwkF^c04my&NX@3x68+DA-&AGKgeg8so24Jd3$lH zUWVqkt1!6iQ0~GM2LYCwyrIlnd8uW^V0oisxW-aLMbvasN_bg#%8i*{=a+w#a@&k4 z3u2xC`09D-@K4}h)3Sgp>PHSHD2AS@Ed63A2=efiaxAb0& z__!I5r>P#2fVVpAzv65)>>YrcO@R}MyyN&goS!tS?(Vy8F}>a8^M;~Ijv+$4I$)29 zQw}KOSw6WPakKa0mwq)?E1rZ<%fVQf0-MM8$R~6cXZb!?x=aqaMjfp$T~PdLJ4UDH zPZ%jmjB~6VZ?7S2)*myHL~_0@oRg}#b1}T(lO9p6>gvAhWz(`aPvp+Gn$KHS3(d}n zmr2*i?~H11c^|qy{>yjwjyNe5)U5o-q*>v+2Wi;!b=6d`*2Yn`^#NGXt<5PpO(woE z){(WR37743Rp|jwyHsAgm49C`f+r|IRk!cP_=t|ESdS8`{dZr=(1>Lj9yBBT(m0^g zk9&WUDmUo>6UMA<*krw#QR~k_hqoY_8dZ$@uGdVqi;B;)Q3UCBrP?3l%9U^1>fEkd;_dVTtu zEkfeHE~f-Jeoqhz)I1yq1R4%$tzLaNx$Ig{j@% zD56~Q-CXPGDCY-seF=z4z1n061$;%0Oot+MZUOboH8QBu;Whzh?W;N2332i}Arv2c zanI}i;hh%7zVY_Y`0#3|CdGI7Wm+WU*1q4HyCGED_EZWmfDS%z;{4o6IW3K=qX8l2 zD%rc#17L9=0vxU!vH%MS2`^3UZ^fn`eq?-6#4KOw;Z+wA41G;qJTP4Luh#ozJB2n-!5Grwmm5kO5rPx)1xB`qD? z6Bb6ClO-oklo5%wWPT7`#5gVkL-8!;8fIx=_}vg{3M`$^3GGcQ)M#mb1xlsQ)f=<9 zFvmHtegwVlgL7C!Kf% zFh$Mm<+lqp6ica2OzK|O);0{!jF{uss!!>ful{jGTLt~Rq;9vB4_jcr?deQ>_7V>P zb4WfNnW~_ScnguL_W}jkZqkqm?8c(50AFST{l$}f>gNq+xI9;8W17zvpiPhls&PL6 za@8cW{{4mhspAIVGzHY+39RHmN1*iC;q@vQygsvd)|31`BiFT(4s->nAFE2fw$zVPHGbu|1#(X(+0++!>#2kGYA-nO> z{%zesUc?%JKKj(*YX~pGWpWzlR@V2jr^nA|N1iPNzIWojx6%jd`9uzvCaQRUNwvUKj2)%2@;qPY6~~K!YDR#xzUmBk?*luycB#4O z==*Xz>mB=>qmeS9RLh{BFlN-n3X$tB+71u!;^FK*Z@lLBXCyfM#bN^b`>LVUbV146 zKQf6A_tQK`Q;wk7PXCJ8KkE;SuEA8Ni&O!@gN}^mqv!e12wx9Ae9h9&Eusdz^EoqK z9JJ;TjMKofHHFEBfam@Y$WgR@$Y;hG06g(UJ@zbu4noJkbP;XrtU6%Je4Zn>pax5o zwG2Ig{Y=+|Oz0r#11Wu;7)tc#;r#`}anx;Znno#WO3K8sd+l6va?I?a5PyWcJfbdP z;tnirf*gJ3jEI0%y}H4 zzklaHgV5UE{|Z83B@~k}#knd=CP9P)|PElMH@Xdj9Rri|5o_E)#pmp{<|{mUE_{7uXdfuF}09*#eB ze;ymY@E!9C-_hnhfvJab?J*A}7Ux=#X^GN!ztvY4n%2wN7dX78xXP*18fuOB;FE7c znF~Q29ezkDxQCs24EpQRg@U)Cy4(j{ZAJ3W*# z(Bb=q=*GyD?0!#bW^E*)O)AM%ry^hg6rXpR?7BPj3@n(+XLE9OpAbhlx2Bb|hv-@Q z$xU+GPGiM{8$*JZVJjdTA=Fz5-e@BEb8f!=UWu+RN^ad}Poe5SX3N~R#UHLyVSIfr z0o%V1295J@`Bz&HB5@Aw7Q`U>7!*Qf{IyWI+my!DFD6?LMy@wr+onhcGpj$;RVF_n zMvNA3{6!8WvU)*EneerJ?HtpmcAV~+ddY$=Q0mTQbrD}mKMPv^JisIO-z%NgbQK&I z1y{RTzk}@@zoAX9IOGZ9@5#B_eHkG36wgbEe_WU!D6aP3w3VALTS;a(Jpnwb0J^ANeJl5IptTk3kcM#Vs^84KpA^s$4A^`G&yValF>EQayEF z!gBkz#Q21&11azr!_YzND?RPKUwetY+islGzHiAYUR^#?)7#MDPkPV}rOA^Jg5uJX znT$Ej;$So=SUYbeTxr9DG?)m^&<9gVD7fhZljOuBREfb!$cqi;uk5JdOInrOoTYwKluDTmMQoam zZKMP9;rG_p=e6&s(4GD9 zR?Vs6#5f&syhFjTf~+vVdv_HzGl zkmMG=3PseMhk|ensV2_G8`6Jp{k)PnP!AR0k@QaGQGoHS@+!&S+-YGI5xd|2wjY`R zmrb35_KG0cA+?*%_gMjHV0}N?e&trJ@0Kb4B+m;SEGIGW%RO|L?$GpOt%9KYNeTi= zTIbn~6@1D$R;4&dlrn!z-I zi$C#!{7qL0`QNmlg(T2E#AZX(T|Zc^I86I=R~a4w%K z+@=dtHtXX=-YM$GXr||Qgd7H?OjkqbAToRQDAkbM5-_#FS2z{Tip+UwXh6v8sdKn| zcV4XH+UE=(2>M!mXgzT^;lke30Ext>dYHK1)*QmW5Y=7dce%ay5U2NDIbCX}z)!5R zC=tx9o1o&kDB;Qhll8k0)M~ym4&4W+A%}ULA&SB>^a@n(FvP^ogwz+ISv4qOG zfdqd_6Njemn_%=ZAKMhi7D0Vs0SXMTz5yQ=&+hwU4SMO^aVdo~RD`lIe7Rygd>J^S z#Dao*gaEj?YzULqeP+%!%ji`|Cl|vdpnp<();gB^GMlj;YIet*r0oDqhYzDK;^vE> zv!1=eDtP~*Y0Dn~PD8Vy6-346j)q;U2bA>^GZ`F1+$(GtKpE5(S~=0!i&T7j+_NA= z=%8w-;>2@d|AbAu-sycIqHL%Ugh?k4wp>4t3LjTI*TX$h*#$%X1rJW0A2;; z4~$iL7qdBR!(4%gC!y-(`yOFfgakWRosoPf||O zOwvn2Bv~abB%Go_^~s2Kf|n8T&C~eW`uEp@ihVP0N2L$;<$vYoJyD7(^(ytlAHs4j zo$`?_6x~W!ZS@;Dw@%SE2?JV=8W8A?kd8!7zbo5d_8Iwxod8a#>T#m!sc~UEcT*`3 zK}#2-R@Ivm8Af3bw1ChsAJ8d*Y|bi{y(~b4PA;pYyVU{D|EsY1X4QE-)=g-=I>oHu z$sU0|yvlp&+&R76f2^Ks4N#Ux&pq5iSj!h}CB8p`nv@UWD#n$F>{Zu?w(J>qDMz1o zjdgjK1C2}l=fq7VP{Ub1!_5+GY$}P%d`Ki(UAAGRhN3Rd~;FS8Nk}axOP-A4qYM5f^Izq_^N)46p5zS^DBCv6B?%Fp4W4P9<~DWbpcq z(YoMYEOeX@n*|x1M-I1bUj|hi0WsdidW-Ht_rD&x%r65vrQkqf- zDaWDAs&5keY|CwHZJTU6Z2N3SJdVYR1Fugh-6+kfXcl8VFuw;1jzj6Ep)bK%Rpg}g zW62c9mFV`xpbpxa79oqhFDb1A+=&8ov}IfX3n4OOB?M=2>Ks-hRsRM2*noW2e@0ix z`28@5o?nr4sX1>i)CV|^K4InyKc`^g2Ef$5;QSX}GQ7%CqI}`CFn|hK%RTok-TY5x(QHq4(&Z07 zCaDIRu=-G*25JuIzBdH947^@9aC9TCU0S46qB=`4H`^X^*^mSDBdbxbY2Uq_1Csg2 zx~KyA+)h;^4Co3RV+6Clf!nenNQ%uOWEfzUT%g#6_#>kfzj{oulR?gV)@diO$9B(P zReh*);~A?rN9o#FRzRS(@ZE~0;?Z7^VO*ssk;6_+^~bN*_#G*WY79YVee8s0H3}_O^C+bU=jU|Q%FBWJvrsMi-iN2W zvB9rgM)bwI1m2cH;#a>A!2l15D~b#dU^@i_8a}rILQ~6ZdCj8FMO+gVJCj)(xSJS0H)%Zibl5L@&TB!k?mEtD#Q7ODb9;>5`0#Xr@l$xQ-Oh>OG8uXagV)Ph z%h}60%el*W%lQJamo=VYK76A#fqF;b6bZwtt$(`UZ*?2^K7(!*^`7=PH89!Xa46LA8qKhGEO8c5g7&HbXE}5jXAI(BE2w-5z^V z@C)ZLi^f(|^rj3oQiix|HKE7Agxd4IH0wYXv*Jx7G4v7p;E#YS>>n7a)-Nz`?^ zvqvzx03=_;24=*k`?tSn7l+LFVql6&zzUX1jL2kC@m2pv*kY;7E;iJ#Q?}_GbM2bq zqKw|(Mc%GS?BF?g%N96<5de6)J`TDuKHH8>d=l$W_c1`P>GV}zNEM6#lobT#n+Vi4 zabdh>FMt^?z{$upN4qXKl%8%?DjE!rWLnx<@EfTHJztmUWr0D_?_Ni$e=L|ACJUOp zA>NQ6U`9m`$vFs}dsPI!)JpeBYN2N89aE!&v>b`qn?Zbzq*NytodMJGlcLt0XUP$M zn;(4hW~qq!SBinPQo3PS$_vFmY12R3_1}|IpHd%A>X{ea*c~?4&Kq&Clnd~x{(PX8 z3$_nXe|a&54|Z1VD`=9EnaeBIm|YE*+g|$=pesJhU|?%`u1ye@>tYNBTK;Md{pXj1 z5tNxRK)t&y0qP?ik~3&7Gv|YgKQAlDe{v1r9hY+(KLUU^o?bXVN<}OeG;H}5b7o6v zULyqng0CjFAO0~~;12le=S#aTb!B7eAL5KEN@_OzuE*1y|*rb$LWUSkt7XvOV(Zg*D zT$nXee|J1!W0Ee!sL=IG(0Fw+fZl=AAZX5`0`XiRBs_KZ z0O5WZu>Bu&ZtQ7$a`2)OXxeGnwNxAkM%!aIxk*dxqBeD5!4nBd*Dcv9j8&o;=oRN} z8J!ghs77~9Q%8EDd-Ir|NK1Tj8av}K>#KLX}x?)6Sc!`AP8-jNsF{o z=JC%lPKAu}8~f_R<|0zn7@GIM%ox`vYJ|tqzeY1a#u@{ad1TMgR6LlfFA955^%j&H zyB7`YFu;yX?_uodGzPgb7t?MZ4)|UXH0oKkD?-}m_F1L(J70D>#(0hKZY*k+;@ve$ z_0S)KM-Zq>(yuXF>X8x5vA_2qXkIZ?t*QbXD)Erq8hC4pr^T)5sJ%zB)Q;$8r7DG{ z^GA3?H5>ao48(8SEs&-TK?ti22$)z`a*XHy%(+vrkgwRDf$|+1iqKt}?{-_hJAVqd zr5$rlD>bR&O-ji|7k1%Us#`0{rGM>5hWSbS92R=uJkn3FnO49U!?|WmC7J6dBsK z)u#qH#(NrGFwC7aIq3Gxr)F-kr2qt&--C6jnX^DxpS~Fw_7i&1^M`fD3%-D8;AO!L z&tZlnc{cnX=d&UHYd)LV;`dj?^qqOle1JX`eoKqWetk2K?r?hWR^|6NCQ+N#j9_#b zM^m>8Jt888A;y^Bh%360gl3QLcP#BFBbIB{^t*f?%3Q>OZg;iO%jLZ6LQ~>2BOcB6v_-6iOW9OXlo3t|GFYFCU+Nrhq zE(&^I%p54%4zG6kmQ`CPnBb6Zi1ijdhM}X9-yZH4#b8PZ!JqeT@g{N)hP=))?-ada-f*o#9%oMy8hT>A z^xmn^WpsDI+HY$b5RO^_KIvHAc4eqyXFYjGwPbHs_8opQ@It`*=9zc^$Qh5SSx&0I zB5!LDMgk(W6mztoTV1o~P(jsF2xYarHmP6tBhXF6uV3K=N`m~#?RMtS0Mg9hF%=sr+3R#*Pa=S+m!0qG&F ztNq!uM)_dI923Mdgz2uo(1lxNrWtIY z%K9~QAQMb_!0z!Og(B2T_7(z$u|iXJz25hYNn}5@kzYI4jz;qP^G`exw`A&D0hO($ zo2;I2Mc9QT>c04`2ctByMMUA}3Tlbl3p?8#zIk+i>fZD%U{S_wBP6B6Yu1-8atfF| z>z#wON9X8NT7Q};>?nT%O7s^0lApJz-CUN_aC9%OpvKMw6aNA80mT*rv}XO78`-Vt zhffN@UAI3_Qih!rf6gcykidy}8J}|FHRMkhio~e8)?QxL8uU=v|7((Wk5QGe-^c}t z>B;6N>om5PNum$Fz3VB|>w)pLqTRyAqjXJ*sVoSU)LA_j_l5&x+t1Eq_wq++x=gjV zRc*ZKxhe%@m&ir+e=t5aweE#QZ?wJf59d#d$U`b(XfUC`&PwPnZwiY^#_)nq>sol4 zcHV21!!eAb&-F2lV-~}Sb|;%o<1TQag4s>bO!s0Q{>o@) zdEq?9SB8A~%ki%$6%>STpYX;sXRyWWja)F8qat3Hm&>x_RZ`s9>95sZIj6HM6qUdm zStE^ClVkPoJz*LKjr$1$9w$BnV-20c($5;w2mA$?|JD$fApU*s7@LY$a~XAh9$-_P zqm2S(Xp$f1MhDy$2m)q>5UEC6LAW<{;uyhG*ZAWnuMa;bj!mXTHni>n5-A>I!TgU! zjv@`tbJU0nE4buqVP%)ldaR#!50h1fRG!+k^o5$(G`vxcY418PeL19EgO@=*{pnakNQAaoRsSV}dC9j^ zSq!Aq)yn3`)}qIb*WOXPZ?Yltvo%q@pOm1n7X&2pT{>RL*onS#0Phs2 z5u>D#?Evtc0<^u{K!gs{k&*(?T?8VLF=*4hELr{!2xb)^ceh+GwrT`-K%Xg~%gss@ z{&$+#YJ((owp?**bkvQBfMC28Fs%IVabhyz;v~A`+N|_8$jX4**(vU%Qeo+j`EMWB zmxE(-wW9m2|7y-G@5V@jtp;eh1HLMqAh$K6$!fY&tn|5bb>?%^3K1`T z7ZBKW$6`$|Dm{i(`lAe@cjp7-eHXxD&?*C`Z2iiS0q0g>vE1m+(;`&WsP=)6KzW`x zP?nk<=sp1Iit3TDQK#*n^^c%gQ~1i)RxZU?YQWX@S05F14b*&t=Ji{oPN(8~g>0bSCcgY0*?CD)m7s#mv<@ENc6Xxn{!uZE_flt;?b{csBwg91>-?_0jFM zF)a2oEoObmU|v#|IeVF>f{JWF48)cnDf9m5o85;)uj?H^VTVIU*7b(8E!144SNj2b z#bVya0d=WQOEHd_7 zBon*7up>;0*#twd*If=KPp&P*GBt8Wwa6h7H| zt2yM@Xztp1S5mTpx5or*^ybn+XRN zV4Ku}3`3bqg@trmMswv0r?Rw3-19F*xCjJG9_k(dE;gX!yMDVW-lj)j+_Dfat7X4N42C&i8cMJ3(&)uq#XM4cZF$#b1%d{Q*%oR zP8Ct;>X<>yrp3_MGaakh>k1S2oMLA&S15Mw$lqhZM0pKHM}N-+o9&T?7mnYYf%)2eozw#UT-+5r-oMGqT8rMkm{hS*o z`8AGjH%M(RP&F)ZZ2owfF%+LKPJOV8jDBVu;2-YuDQx)xsB?0lRMHt5N)0#YPXYKMAQ3 zxfv7QJjTj%W&Y@!6}310=oaVW;jX+PsTfXA8$DFQ^wPbCXY6^G>%kaEkGtrTH52bG ze16RcYmfOgBWx%CP1boZI@6}fB%};8o-xpOkfDPkaNDRU(?JK4UeX+JyZJ^{fU$&` z%GvPLzB@(0zWAtM^7*4+F$jBgUNe7J7oJ^x{XeEP2Nec>#+9@Gb z9$wbe~L*tB0n)=QW?Xi}lmY%d{>@b=TsN`)vM*D14$_&?ZXp;PKPnl1>OK_-k7U z$O9yi1!qy*b_H4uDFft~I&Ld~Tgd+G2u#<2*Ci4b`iK0|_9JnpFeKF}nLL~qp9izUiuKHG>*k6XYNlh57XpZ*$s#?m?(>OUW&a}|B`?RF<_aeb~U^8aVB$E2kf5s44!>Q!0 z4a$E!W0e>=P{9MbSwtlBU4i+AD7*I+P2eJLQUlgBUm3kK;DgKu2~(h-RM10&R%-h7 zC#%~ze?xNuuiUj@p#`|7j`!)@Smmp(7mGTym)32iARW?Mx0&Z(4mRXy@18-y%rVVI zTJ(6mA^ds4^QqH_CutCQ(1jNk%p|1X-76MfbQvog%vv2300u*YwDzWHD;wymDO~9% z1Z2ruetgd50J6Y3#;!6^e#4bi^cTuPt=#Ej=Qbc6hJoSG+LJZQw;jD9L$ai9j#2Iq z*_W7f7$ChTrRMTp2=eP9eB8*Cksa9=El2WiI`ifW`vBQB5|o+xmP}VXLePLmDS8uB zz1%J>1{NPCBE2lL?pd)^nc>0T7$PVM(R+q~XRoyhOzynh|^15Ut zYnB(wu{&P6|B(v;*y8t#s44=lVw-{o^kEl&-i7m^O(t(nXX-OfIax8A2cR7V&3KSM z+1uGNhRY*_xveJl3tplmoie5Zd_; zr&|*(54-f@)Yq5#wAaNV`QTU@IgITL)pJ4lTrm2U1YUe~-${ZcW6>8wyM|b5CdtsM z14_NCVhmiMmP#9<`kC|_=%nIf!O)P3DF^&H7h7|Vbpl_qIb9RC zUY#?kA^}t(3t#tH2#Nd5{aU-!EL%vcgIxTiQSS|;G#9%x@{_Aq>G9v1I_RbP(AyKx ztE`di3KG%6@dPCk$h_6w_{c1GttV(=SV?xy5HO%!w`L@n=BfvLmzV5H5s@xS2qW;B z<5&y|O~F5!yTH6@5Tj>0v4!Y=fbaY7mHj6m700;BkRkhuBS{amgpeKz8l#?)@fl3BSu^~-PdPDFEBG7vPNN4BFTeHK?Sw9w1Xyn?Xnulg8?w24x&#Y;1%?WVmBIiW(wqiTpsQK^t=v%( z6Fd=(?af74zKDOj3`H|W1#(B<7JrDKVP4kDt4?D@9SVn1dcNUoVuaJL&b)bH3JHqZ z9V<&cxfp&Al?8OiJkW~L>kLLqAy&9=RR!1=v+PZ4iSwvM)L@ie$7H?;SGtpf_N+ws zZLOF9Yn@@xYl>jP2D){`Fvw6*ug4;)ER<&78n%sRjp+g~uA+%pqcVzq$xeEP>nW<> z54Va>^|`kAZaZmd6di`q<3zqFE(=zhee;9=Ks)rjrw z0x*q^3HbJ112lBqmQ?{iV}e-w)wEGy?e!!qCzeJ@35ymmp>k*eY;3uMA%oo+;$`ekz5fCBmy{<1*p8+ ztT?gHvY-9)@MF1cnZj4V`O|Nm}PRE~szRR^ykI={lsBmY$ zP?v~&&TDFcZDy)qI{B242eM6#l!r`Uli16kjXRsS17{z{V z{~;Rn!^-D5SX`z6%I9-a7kBBDL6|2mlrNBL$ zVejX|{seCLm=6~j0xvp!j>-~N$&*$S+(Wq{(=0EXKtNcC$ z3+KOeU?w~SKO7Mw$~kedE{vDbzqTNQ{=^0NLqCjFf1H%6m>aUS`In@&QyBGuYHDj-f|WxE%WES_bAUM~ z<2r%k!5>$tF9O;v_9YAVL@+mK>|H4JIZ^|2bbD4p^SX9no-2)KZq>Ffe|@Wb*#Skz zm#dFbHYUQdizN~#6%`=G^CIZAfibL|dH`)6e$?u*T#x8b4Zklx)VeyNdiRcub#>q+ ze0eVbi}e+AQbC%K?jZgN0>sjxtv-?Yp~C`BYte+ECz5McjT{A#UK@~FMN(=x=%nro z!T=vk#*nX=+aK>N@iE{CMTkG&-y3yRXAjs6wn3rbKhs((<|YdoM1wf*UO02Ot+&kG zpziLy380|26S7dwzAH2V?kr{wT7j!xd?~KDpofkTzpmUlbY2#$RQ2^HS3ax`^RK_= z4+SuxGN8Q{4nF`e3wdUsQpFbkgL)~E@CoEuKY%1fSJA^lR1zFd+cd@;BVc$(YQ6Do7rO5tl zgaB`xfC{{cT=O0b_qWsh4{9bIub_jR*RP-fmQBDL1DKTE|G1fJ(Btu2={*f z(fwko3^V#B7GBhSC7^ViTpzL=v!)o zV?4jFRHaMiH(A=mZ~)@M6Ls@KkC!sde1jHWCmFy5=3hN9Ff%C*ggbaBkGkS;-v@;7 z@Q%jwl|V1$=L+d@G)oX5)}*YyB)>HX;oj0 zH^bLNJ|pJ0Q?vKfVS14tW-$CQR~7$0*P@eB_RSvsD}9UlDQbat{60t9Gj2|)@S1Is zp@47O5^2VD>Lj5L8L5V1?QQoH~ysl2dk`OMnVFa>Bx?2Ow^{k5Qh zVeh!QP}V2glWTAt+Xzy5^p_mhx{rw~19^eGfY3YP17@q?VA_y{pu^;-Pfv6t=!l5W zL#DG9IvyV+Tq;#G#kjoj*}NUhZfN)UMWG<^X%~Knh(>K;J`PV!n|2fX;IhH{Pr5-% zSwsC}h@k@6)|eGKjAkT3aoEZyriiK{7)~;{mk2k`gz3wiAYA5`+s|B1Zv5Bz3O%T< zeqf^&{D8$CuBU$?y&-HLr%@E>l_k=(RZ{0nyFwkOG!_r`7Xvg|{~e7ppN7YY=;_FpZ=Cgm&e8n)7?4s(!a{j(l|q zpM&D>)(wDWuBFp19qBx-JT#d6{IVNp;!TsUtm=PU5*eyy*YR*EpHWjCq5?x}h6bN= z88z~rk|XU-)=GQK&0G`Ysz8jkP&UbN;g7%~W1rCp!^bW#lD@-Bj-cJwv#i0C5;NQI z;+GNN+!B3@$t4kzDa8VU@q$*TlwJ#lt?s%34eC}ZSE^8gCe|5Zz|z&51L`jO8ny-n z0>L=$=F1mgnTy1_YD|Ld6`m4dq(w=)nOk-=%4^)L|C`YeDl&fk`)JkOQs-x)Y>JuJ zmMvQr%UPL=&;S!roW*UB^fT;&%t3FgG1W{RfhNG5XxDlt+cW>GvPO) zrB0MvR7@xV@oDC|4{-TAT33tOFH(*{86}mm8oS;VutU$LO=99&kEns}IiOT@H$y#P z>S1^Ur&$DgjD8IQNdZ9F|Gb*`Zc9H3v_b7_A|4jJAgB&$Sx>jH5Yu$xA3spz0(+{X z>Tr(3!Cqjm!4LTw9!L;Sc~}ZL+B4egzT)zHn?iYH3(OB?KKrq@RT=FQ1CXDCGw+I= zTX2+-nvc=9U-=pyvQYM67$s#MkiV#V1WZ2>GzXU!KO?Ygo%Ztb;hYAxBCJrbiyL{- zRAz>^bu_NfX0+!Ru;SG){^U1=YeR-StD|vRb&`8TvS0hl58h|*4Hm2HNqroCQM4@C zX<0w)im|zL`sa)bc!z}qaPC4;_cHXC1qG2r^Q;P+{!>3=gPzPGYVi>!icuV0Epd@z zkFVhu)hN$CPj(3Q%s-@ZAtlJ}e8C%I4A1Jdcx|#Bm{I=5_zE;7D@WwH>SndCnspQBy_w3am92i| zFxg{S^;TJVP4ILfSDJ0R|BVBud7mpBo6Od1vrVjfUi@Z2ebM=eybel!Va;o(FU;*7 zun5iiebXMARmcbO>eFBp3LC_K>eA~@0-^}Tr$WUQ5h{{4z&6WVT@R6y!ULL-(|zZp zmLa=X8WP=p>(X%D6V%FNp^obC`=!<%Nl@iDDTFXR@*_&QWjk|7_?eghR(*>0uxVoW zbEWjT8GlC2>{CzgZdX-#F(#?-Y|spY^){h++>+tfW1Ts4>p>Wv^^?TS>{|t$ClsCu z4vvs5uZGcEfa)7RtUiSyVa2sDd&dv`efLeN&(!%`p_%)q0DX2CFX)t_!bx}J3)u z5LINf2>lYvah?c+g{y)t7N^XwTM6M=w>6da@Jf`c3|VNa1khNpCV1{Bo$P}aj{!#m z1XAFEYDg!yMi^U+8vzwAvLSf)j;DZKD3t!Nt%wNfrAVlt!wt$G;wn$gE7^dJMjXQE zlmQv`R+=mby&>4%C~qMCBoHiEZpUAC)S?bXp8T>SRN?9zwbr*yZnHuIO(CK`#v*z6XivR z@GZb>s#rShvHl5Nz%$y9EJsgbAFdiSFry~g7+^97%00U%!hdr(-mz44 zyJIdZ;1Y##^6|W*$S)yGNu19Ty8%lAHUNgJ`9Xjg6eQXdm4EyeR5(5Z%O!`$l|$pd z&mC(PA7|Fau_xkoYXlkZevF6quw`PH?nU3fxHLT~Xt4nMZ^)ABKo&ab69Z*PQS{QS zmamiQ!PolHEt-b-`-YN~Q$t%}uo?Xp2jnX_1z!fnI@B?rv@%41R{u5>FgzZ*Cj3uH z6N)TSGvb@e7JwcCsyec5XfGSnr3jfL;rXOUzZ{K*qtG8+b@7AOKjf76 zr%|Q2$+&@~q6={>O!jSPR7ZakdUUP2GI^#o3k6(o@#8=r9$YP#&lmip$``Bu)D(dCf!{$kP?MW^=9ev#&4LqCHg*h-H7fuEzk5j~{ z;M8#!aXL5yGP3YSodvCoy^e1LaxfC6mv;pDiqw?%_f6+|m0Idn6(F{n@6h3>h)2_t z>ey+3Pz-~CfX@0eSbpyjx`iE|9T+=QdY9VyvjF8&kj1-c3jXFYfk=Md)7o;iOROdgOPIZJsJ ztnV`;6uO)vZI}qV3uOpt3ZAE2&r+l2zg5uj>Z0!U{oWzu*@EJWi9c+Ajn&>s5CubG z`}CUe8RvsQ0XOHg|WRna(f}XA@A%kT(AXf{ZU`-F|Z=hl! zWG675EAGrRKKB+XktZTtPWM84Wfj&b%X!e#PYN3iv*6 zdi#tyNw&5DcqE1{^PpeJFLv2b%GjSE(t2+@e0k@H5!w77JNsUUa5Tj8^s2@v(%# zinzdr`}H@peQFp)oERNx%BrHL#b?sH>+exJ*HS9_D@O6>7cF^Orxrx%rO(jY#9EQd zYUjm@2&_G4qHv#|W+885)h0iOapRuIFFy=m2y*`yT5x{|S;87=DdW~>C9{w(5-_7; znBYo&p42;H58s}7a4s>&#MQ3S+gO+K<%3xViGE z9;MsO*_2$kl=J}pQ9aoIp1$Q$2YZ8EM%#Z(DDC!YU2gSiTW<4eUvBpz_s#nEA5W)b zu4eMGdJX8yzy~{l2{Wze;_JXsg*(04=*&J)W~ zXrJ=#^?C?xv=FlqPuT_!Upnz8TJpv1@DQiGYkRmFjUvv;oaFe-cv)Vtbeo4*&r}KK za4vlSSZ@;u>k&dh&MQwt2q2O^pAutk{K3^GES0nsHlPR|?T?DT3R^|-5BTrIiCS(p zLYbi~3lpOaNLt5leCu^bibvLr)X0=Q>VL`Qp{ggd7qO~mTa#jNc?aH|$Eyhr?>}#b z=k=C>&ux$EbDHsVBP+ z-c0UbUwQU$MCWIGY>m|e)RY%yU+P-e`uD(~mcqs$&7X8o#FnqNa}q%ZB>1^X4Tnu{ zk$u6mtODi2YzOWbr^R-8oz<3XweHe!F>N=de%~{m(T3g8vM!eqi)a0^xZjL%$yg)T zUu6#XlL=eIKGj*gDUn~QpYnHNtawdwRvJ#ArPDHKnY1igHZ6yiOUpyPT6l$Qi>tcT zp&M&JHsCfiGmJGP8*=*!}*^iL%` z!vKW$)T)9Dr4sJgy~Yhlu7}_SN`KNLyLE@F$K-ZZuD1{1$G(lpjeC#dJFR!R;`Zd2 zIjcFVJ8L*=t|a*yaA&1l%^%}yKO<}0Jh`W_zH@)uA-bPe0ryYB7a}cT#rN%Mcj*Iu z$y6rT-M^<89+9`JTn&?7DV}8)bD*}uGSm7z9mEf7FAAk?8@5f4ePk}G1A>NzR}FQp z-P2RewqwTiJHgov&WDu$(Orl)bR!`awpOwb&tH4 z=$_uUOF2sjf2GFt?b958N4#`W6CZ=jj^^Z6*Y7e}>6TjDyh+oXv&qI4&&Q@RV+CEblP=GHaSjcr|i zJ#_>Wi^llcDjj#*G%ehClEx!tS=L~Fj^KCO7s>x6tNL`kYghHGe+G351%ts({Ohix zQ_H`E5Uku#B`VYN>|(bfUX$;JuFYC|X^Nv8AiPB%AZlIaLf`&U;DQm`?9JJI;w+cL zmG<)ytX$l~1>*-8{m)B%>n(S`xrgDY5i}?`jJ zb?)z?Rs%AhfmlN>Dy$U1kNicYF*H^*lqY zcfW^#-u#D2nQ8agJLorNUkHzS6H)xg;J#R1L#?wjkG$@^Zfz(_W$*H+novtJLBy>u z1t~d{FMTxF^lklH^qn-@qHKkv7AIa)OjictCO0XvIK%lSe~}pJ*i`I--b$u#S0#YT#wXv6Dy<9{!P0c-TT>~CS7GRGb+9>F z59g0R{+sq<%lt}SBq~VN-1t;q%@KpkAd~^Gs;I`JJKTa@n{+P{KNA;-jGg;Ck9MB! zR6CNLeI{eCrr2+!z~lnCVes$*%GZ7u3RN+8qVpsSHU$GZnzdh1B$QT& z_IrFPJqB}34^7WWZ%Us^XU;g8p`GEJ5t@->a=m>&(V`RHd8ae1Gp;kYvvO+9Fqr-x z-~inM>?s4OZ7XLk&RNwEY0^#ijU7F85gq&c!Qu`odZ)j{nm8BSd_yR=d zm4Ub2M)fnX`*S)a+T^4+gw*aVbDueb8uq_l#UbE3eU1zen56^MMG)t+rf%uFB#-27 zjT$qZh?yWwaM4U@F*H&ZbNb12?IL?|GycB`syMy|jYX4D33bAnR4$S|-Zvz3KkRM@ zxM|^jzdD0Ld?hDM#vshJ{A#2WWs?d@v;0&S+ZBninw5|l?6o`s0Y3<0J^aJomG3$9 zXEJFiVHF88+H>g8mV})TxDQ;T=H4lF5r?bi0gBDOJu&pfYKd)l5W}B~+ToDbAj4$G zT(AP0QW@1BoqL*9%-WpajB74yuE?!=#d(0S`$D%`w??;C_oeP;ixww6`8IqfoOp*A zMvNoo5-W+#lWX1o!r2Cbi{Jt!R>AdOk+Z(f=kmn_0yVk}KETjj)=?KfxKLqTR~>8{ z=a6ZOSZr-rbsUz%2Tz~*WUerrXyr6wcE{aW+%uc;x%n4@32-Pcr1UpVm_#p)97lD$8?1U;2sz!26?&w4B84SHD`pK*Rmq~~ZEHMk8+|k#HHO14j?4IaH)QPU1zD<~0eL&7=`38T~ zM=Rs?&Z8C6aAD-)QPZlu9UwS5th8>a*fCuc64_}#XOg2rYnQ{pomI%~r*Y0x0Sl@t z$-W)HAct!qp?CwrRO^y$IwU5pA{yn8SOzQ;mL1EDJ&YB>7CO~xW$gWjv(9X>5p}E* z=Z)oC^E1rTeS1znKZaMl%bMw#_O(SkZJW(x>P+$I|=*#dCueOE<<# z&wTrT#QT68?WcGCSw`6o-i?1v@@4+M@XpZ$f-Q9ce|MuM-bW$N^+ z;(+MQOS*s2sQN3!aUx&3|nPTb-H?zgh2T@|Ca4A+IHFN zxhPAn%A4``0klL~5-pjQLQAFD=Lx}JNZh+c1@SE2e42A1e1%4$2C z%=q%L-kg?3cNqnVj!;w2JTPx#L5bs*}zTFS+-V|HLvMS?B zm|KY;mrbj|(-gQ%0dVo%RO?M&2&n77>1N4zek3_{CFWkwe?DsC??kp^xo~tZ4E8GW z?=Kd_jWDJE{p^45!T&eU!4~L|OotqDPTW6U)6LLFL@scIW8M%6D{R>WedGikk$(1} zp-*5q=*an}Vg>r-t|@d>3_>a8?E?2V{vkrDx}OJ6xj^3Z^8gI!1nAKR_GF_P+^ z-`TCK5ehcip9kAB^uR*$>%Wu+wTWK`$!h=EAA7OR8Zc15jrCw@Ds Date: Wed, 29 Jan 2025 17:56:11 -0800 Subject: [PATCH 76/93] installation fixes --- python/packages/autogen-ext/pyproject.toml | 3 ++- .../src/autogen_ext/agentic_memory/README.md | 17 +++++++---------- python/samples/agentic_memory/README.md | 4 ++-- 3 files changed, 11 insertions(+), 13 deletions(-) diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index d9588fa5e94d..2642594e0e26 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -65,7 +65,8 @@ jupyter-executor = [ "ipykernel>=6.29.5", "nbclient>=0.10.2", ] -agentic-memory = ["chromadb"] + +agentic-memory = ["chromadb>=0.6.3"] semantic-kernel-core = [ "semantic-kernel>=1.17.1", diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md index 81dac83803db..ebf7530e3dbd 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md @@ -28,21 +28,18 @@ can benefit other users in similar situations. ![agentic_memory.png](../../../imgs/agentic_memory.png) The block diagram above outlines the key components of our baseline agentic memory architecture, -which augments a base agent with the agentic memory mechanisms. +which augments an agent or team with the agentic memory mechanisms. The **Agentic Memory Controller** implements the fast-learning methods described below, and manages communication with an **Agentic Memory Bank** containing a vector DB and associated structures. -The **Apprentice** is a thin wrapper around the combination of agentic memory with some base agent. -Some applications will use the Apprentice class, and others will instantiate and use the Agentic Memory Controller directly. +The **Apprentice** is a placeholder for whatever app wraps the combination of agentic memory plus an arbitrary agent or team. +Some applications will use the Apprentice class, while others will instantiate and use the Agentic Memory Controller directly. -The **Base Agent** is any agent or team orchestrator designed to perform tasks passed to it, -perhaps by interacting with an **Environment** such as a web browser. -We’ve successfully connected and tested several different base agents: a simple LLM client, +The agent or team may interact with an **Environment** such as a web browser. +We’ve successfully run agentic memory with a simple AssistantAgent, the Magentic-One orchestrator, and the GitHub Copilot Chat agent. -The **AgentWrapper** contains the code that instantiates and connects to the selected base agent. - ## Memory Creation and Storage Each stored memory is an insight (in text form) crafted to help the agent accomplish future tasks that are similar @@ -75,9 +72,9 @@ Retrieved insights that pass the filtering steps are listed under a heading like ## Setup and Usage -After installing AutoGen-Core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: +Install AutoGen and its extension package as follows: -`pip install -e .[agentic-memory]` +`pip install "autogen-ext[agentic-memory]"` We provide [sample code](../../../../../samples/agentic_memory) to illustrate the following forms of memory-based fast learning: * Agent learning from user advice and corrections diff --git a/python/samples/agentic_memory/README.md b/python/samples/agentic_memory/README.md index 61a7631da138..b6e61915b068 100644 --- a/python/samples/agentic_memory/README.md +++ b/python/samples/agentic_memory/README.md @@ -18,9 +18,9 @@ To use _MagenticOneGroupChat_ instead, specify that in the yaml file where indic ## Setup -After installing AutoGen-Core, install its extension package from the `autogen/python/packages/autogen-ext` directory as follows: +Install AutoGen and its extension package as follows: -`pip install -e .[agentic-memory]` +`pip install "autogen-ext[agentic-memory]"` ## Running the Samples From 70db202ab41dde71df8709d92cf890e6c173c5ba Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 30 Jan 2025 18:36:27 -0800 Subject: [PATCH 77/93] Refactor to remove AgentWrapper, and use AssistantAgent as a TaskRunner agent. --- .../autogen-ext/imgs/agentic_memory.png | 4 +- .../src/autogen_ext/agentic_memory/README.md | 11 +- .../autogen_ext/agentic_memory/__init__.py | 3 +- .../agentic_memory/agent_wrapper.py | 142 ----------------- .../agentic_memory_controller.py | 25 ++- .../autogen_ext/agentic_memory/apprentice.py | 146 +++++++++++++++++- .../autogen_ext/agentic_memory/page_logger.py | 18 ++- .../settings/demonstration.yaml | 5 +- .../settings/self_teaching.yaml | 5 +- .../agentic_memory/settings/teachability.yaml | 5 +- 10 files changed, 178 insertions(+), 186 deletions(-) delete mode 100644 python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py diff --git a/python/packages/autogen-ext/imgs/agentic_memory.png b/python/packages/autogen-ext/imgs/agentic_memory.png index effeabcfd827..840fa38a4ced 100644 --- a/python/packages/autogen-ext/imgs/agentic_memory.png +++ b/python/packages/autogen-ext/imgs/agentic_memory.png @@ -1,3 +1,3 @@ version https://git-lfs.github.com/spec/v1 -oid sha256:79d49c8a3e5a239c21f8d65aa6034adfa4bed99969e960f597cae46556c19253 -size 63442 +oid sha256:deccd3a81355e32a9c130827b9f97b7ac8d6e8e9579b15aac135eddd9d1f281a +size 39030 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md index ebf7530e3dbd..4a1cbb5f831d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md @@ -28,17 +28,16 @@ can benefit other users in similar situations. ![agentic_memory.png](../../../imgs/agentic_memory.png) The block diagram above outlines the key components of our baseline agentic memory architecture, -which augments an agent or team with the agentic memory mechanisms. +which augments an agent or team with agentic memory mechanisms. The **Agentic Memory Controller** implements the fast-learning methods described below, and manages communication with an **Agentic Memory Bank** containing a vector DB and associated structures. -The **Apprentice** is a placeholder for whatever app wraps the combination of agentic memory plus an arbitrary agent or team. -Some applications will use the Apprentice class, while others will instantiate and use the Agentic Memory Controller directly. +The **Apprentice** is a minimal reference implementation that wraps the combination of agentic memory plus some agent or team. +Certain applications will use the Apprentice, +while others will directly instantiate and call the Agentic Memory Controller. -The agent or team may interact with an **Environment** such as a web browser. -We’ve successfully run agentic memory with a simple AssistantAgent, -the Magentic-One orchestrator, and the GitHub Copilot Chat agent. +We’ve successfully tested agentic memory with a simple AssistantAgent and MagenticOneGroupChat. ## Memory Creation and Storage diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index ee15fd0e08de..d08e6e37554a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,7 +1,6 @@ from .grader import Grader from .page_logger import PageLogger from .apprentice import Apprentice -from .agent_wrapper import AgentWrapper from .agentic_memory_controller import AgenticMemoryController -__all__ = ["Apprentice", "PageLogger", "Grader", "AgentWrapper", "AgenticMemoryController"] +__all__ = ["Apprentice", "PageLogger", "Grader", "AgenticMemoryController"] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py deleted file mode 100644 index bd8360bf9e74..000000000000 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agent_wrapper.py +++ /dev/null @@ -1,142 +0,0 @@ -from typing import Tuple, Dict -import random, time - -from autogen_agentchat.agents import AssistantAgent -from autogen_agentchat.teams import MagenticOneGroupChat -from autogen_agentchat.ui._console import Console -from autogen_core.models import ( - ChatCompletionClient, - SystemMessage, - UserMessage, -) - -from autogen_ext.agents.web_surfer import MultimodalWebSurfer -from autogen_ext.agents.web_surfer._utils import message_content_to_str - -from .page_logger import PageLogger - - -class AgentWrapper: - """ - Wraps the base agent to route calls to it appropriately, after instantiating it if necessary. - Users can override this class to add methods for calling other agents. - - Args: - settings: The settings for the agent. - client: The client to call the model. - logger: The logger to log the model calls. - - Methods: - assign_task: Passes the given task to the base agent. - """ - def __init__(self, settings: Dict, client: ChatCompletionClient, logger: PageLogger): - self.settings = settings - self.client = client - self.logger = logger - self.base_agent_name = self.settings["base_agent"] - self.disable_prefix_caching = self.settings["disable_prefix_caching"] - if self.disable_prefix_caching: - self.rand = random.Random() - self.rand.seed(int(time.time() * 1000)) - - async def assign_task(self, task: str) -> Tuple[str, str]: - """ - Passes the given task to the base agent. - """ - self.logger.enter_function() - - # Pass the task through to the base agent. - if self.base_agent_name == "MagenticOneGroupChat": - response, work_history = await self._assign_task_to_magentic_one(task) - elif self.base_agent_name == "thin_agent": - response, work_history = await self._assign_task_to_thin_agent(task) - else: - raise AssertionError("Invalid base agent") - - self.logger.leave_function() - return response, work_history - - async def _assign_task_to_thin_agent(self, task: str) -> Tuple[str, str]: - """ - Passes the given task directly to the model client, along with a detailed "think carefully" system prompt. - """ - self.logger.enter_function() - self.logger.info(task) - - system_message_content = """You are a helpful and thoughtful assistant. -In responding to every user message, you follow the same multi-step process given here: -1. Explain your understanding of the user message in detail, covering all the important points. -2. List as many possible responses as you can think of. -3. Carefully list and weigh the pros and cons (if any) of each possible response. -4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. -5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. -6. Finish by providing your final response in the particular format requested by the user.""" - - if self.disable_prefix_caching: - # Prepend a random int to disable prefix caching. - random_str = "({})\n\n".format(self.rand.randint(0, 1000000)) - system_message_content = random_str + system_message_content - - if self.client.model_info["family"] == "o1": - # No system message allowed, so pass it as the first user message. - system_message = UserMessage(content=system_message_content, source="User") - else: - # System message allowed. - system_message = SystemMessage(content=system_message_content) - - user_message = UserMessage(content=task, source="User") - input_messages = [system_message] + [user_message] - - response = await self.client.create(input_messages) - response_str = response.content - - # Log the model call - self.logger.log_model_call( - summary="Ask the model to complete the task", input_messages=input_messages, response=response - ) - self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) - - # Use the response as the work history as well. - work_history = response_str - - self.logger.leave_function() - return response_str, work_history - - async def _assign_task_to_magentic_one(self, task) -> Tuple[str, str]: - """ - Instantiates a MagenticOneGroupChat team, and passes the given task to it. - """ - self.logger.enter_function() - self.logger.info(task) - - general_agent = AssistantAgent( - "general_agent", - self.client, - description="A general GPT-4o AI assistant capable of performing a variety of tasks.", - ) - - web_surfer = MultimodalWebSurfer( - name="web_surfer", - model_client=self.client, - downloads_folder="logs", - debug_dir="logs", - to_save_screenshots=True, - ) - - team = MagenticOneGroupChat( - [general_agent, web_surfer], - model_client=self.client, - max_turns=20, - ) - - # Get the team's text response to the task. - stream = team.run_stream(task=task) - task_result = await Console(stream) - response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) - self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) - - # MagenticOne's response is the chat history, which we use here as the work history. - work_history = response_str - - self.logger.leave_function() - return response_str, work_history diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py index 6f4eac6870d0..df4287f162cb 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py @@ -6,7 +6,6 @@ from ._agentic_memory_bank import AgenticMemoryBank from .grader import Grader from ._prompter import Prompter -from .agent_wrapper import AgentWrapper from .page_logger import PageLogger @@ -16,27 +15,27 @@ class AgenticMemoryController: Args: settings: Settings for the memory controller. - agent: The agent to use for task completion. reset: True to clear the memory bank before starting. client: The client to call the model. + task_assignment_callback: The callback to assign a task to the agent. logger: The logger to log the model calls. Methods: reset_memory: Resets the memory bank. - train_on_task: Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. - test_on_task: Assigns a task to the completion agent, along with any relevant insights retrieved from memory. + train_on_task: Repeatedly assigns a task to the agent, and tries to learn from failures by creating useful insights as memories. + test_on_task: Assigns a task to the agent, along with any relevant insights retrieved from memory. add_insight_to_memory: Adds one insight to the memory bank, using the task (if provided) as context. add_task_solution_pair_to_memory: Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. retrieve_relevant_insights: Retrieve any insights from the DB that seem relevant to the task. assign_task: Assigns a task to the agent, along with any relevant insights/memories. handle_user_message: Handles a user message, extracting any advice and assigning a task to the agent. """ - def __init__(self, settings: Dict, agent: AgentWrapper, reset: bool, client: ChatCompletionClient, logger: PageLogger) -> None: + def __init__(self, settings: Dict, reset: bool, client: ChatCompletionClient, task_assignment_callback: Callable, logger: PageLogger) -> None: self.logger = logger self.logger.enter_function() self.settings = settings - self.agent = agent self.client = client + self.task_assignment_callback = task_assignment_callback self.prompter = Prompter(client, logger) self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], reset=reset, logger=logger) self.grader = Grader(client, logger) @@ -50,7 +49,7 @@ def reset_memory(self) -> None: async def train_on_task(self, task: str, expected_answer: str) -> None: """ - Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. + Repeatedly assigns a task to the agent, and tries to learn from failures by creating useful insights as memories. """ self.logger.enter_function() self.logger.info("Iterate on the task, possibly discovering a useful new insight.\n") @@ -66,7 +65,7 @@ async def train_on_task(self, task: str, expected_answer: str) -> None: async def test_on_task(self, task: str, expected_answer: str, num_trials=1) -> Tuple[str, int, int]: """ - Assigns a task to the completion agent, along with any relevant insights retrieved from memory. + Assigns a task to the agent, along with any relevant insights retrieved from memory. """ self.logger.enter_function() response = None @@ -86,7 +85,7 @@ async def test_on_task(self, task: str, expected_answer: str, num_trials=1) -> T # Attempt to solve the task. self.logger.info("Try to solve the task.\n") - response, _ = await self.agent.assign_task(task_plus_insights) + response, _ = await self.task_assignment_callback(task_plus_insights) # Check if the response is correct. response_is_correct, extracted_answer = await self.grader.is_response_correct( @@ -226,7 +225,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a # Attempt to solve the task. self.logger.info("Try to solve the task.") - response, work_history = await self.agent.assign_task(task_plus_insights) + response, work_history = await self.task_assignment_callback(task_plus_insights) response_is_correct, extracted_answer = await self.grader.is_response_correct( task, response, expected_answer @@ -244,7 +243,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int) -> Tuple[str, None | str]: """ - Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. + Repeatedly assigns a task to the agent, and tries to learn from failures by creating useful insights as memories. """ self.logger.enter_function() self.logger.info("\nTask description: {}".format(task)) @@ -325,9 +324,9 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo # Attempt to solve the task. self.logger.info("Try to solve the task.\n") if should_await: - response, _ = await self.agent.assign_task(task) + response, _ = await self.task_assignment_callback(task) else: - response, _ = self.agent.assign_task(task) + response, _ = self.task_assignment_callback(task) self.logger.leave_function() return response diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py index 9856365800bc..f3cc6d1cd21a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py @@ -1,10 +1,27 @@ -from .agent_wrapper import AgentWrapper +from typing import Tuple, Dict +import random, time + from .agentic_memory_controller import AgenticMemoryController +from autogen_ext.agents.web_surfer import MultimodalWebSurfer +from autogen_ext.agents.web_surfer._utils import message_content_to_str +from autogen_agentchat.agents import AssistantAgent +from autogen_agentchat.messages import TextMessage +from autogen_agentchat.teams import MagenticOneGroupChat +from autogen_agentchat.ui._console import Console +from autogen_agentchat.base import TaskResult +from autogen_core.models import ( + ChatCompletionClient, + SystemMessage, + UserMessage, +) + class Apprentice: """ - Wraps the combination of agentic memory and a base agent. + A minimal wrapper combining agentic memory with an agent or team. + Applications may use the Apprentice class, or they may directly instantiate + and call the Agentic Memory Controller using this class as an example. Args: settings: The settings for the apprentice. @@ -19,20 +36,21 @@ class Apprentice: train_on_task: Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ def __init__(self, settings, client, logger) -> None: - self.settings = settings self.client = client self.logger = logger + self.name_of_agent_or_team = settings["name_of_agent_or_team"] + self.disable_prefix_caching = settings["disable_prefix_caching"] - # Create the agent wrapper, which creates the base agent. - self.agent_settings = settings["AgentWrapper"] - self.agent = AgentWrapper(settings=self.agent_settings, client=self.client, logger=self.logger) + if self.disable_prefix_caching: + self.rand = random.Random() + self.rand.seed(int(time.time() * 1000)) # Create the AgenticMemoryController, which creates the AgenticMemoryBank. self.memory_controller = AgenticMemoryController( - settings=self.settings["AgenticMemoryController"], - agent=self.agent, + settings=settings["AgenticMemoryController"], reset=True, client=self.client, + task_assignment_callback=self.assign_task_to_agent_or_team, logger=self.logger, ) @@ -89,3 +107,115 @@ async def train_on_task(self, task: str, expected_answer: str) -> None: await self.memory_controller.train_on_task(task, expected_answer) self.logger.leave_function() + + async def assign_task_to_agent_or_team(self, task: str) -> Tuple[str, str]: + """ + Passes the given task to the target agent or team. + """ + self.logger.enter_function() + + # Pass the task through. + if self.name_of_agent_or_team == "MagenticOneGroupChat": + response, work_history = await self._assign_task_to_magentic_one(task) + elif self.name_of_agent_or_team == "SimpleAgent": + response, work_history = await self._assign_task_to_simple_agent(task) + elif self.name_of_agent_or_team == "thin_agent": + response, work_history = await self._assign_task_to_thin_agent(task) + else: + raise AssertionError("Invalid base agent") + + self.logger.leave_function() + return response, work_history + + async def _assign_task_to_simple_agent(self, task: str) -> Tuple[str, str]: + """ + Passes the given task to a newly created AssistantAgent with a generic 6-step system prompt. + """ + self.logger.enter_function() + self.logger.info(task) + + system_message_content = """You are a helpful and thoughtful assistant. +In responding to every user message, you follow the same multi-step process given here: +1. Explain your understanding of the user message in detail, covering all the important points. +2. List as many possible responses as you can think of. +3. Carefully list and weigh the pros and cons (if any) of each possible response. +4. Critique the pros and cons above, looking for any flaws in your reasoning. But don't make up flaws that don't exist. +5. Decide on the best response, looping back to step 1 if none of the responses are satisfactory. +6. Finish by providing your final response in the particular format requested by the user.""" + + if self.disable_prefix_caching: + # Prepend a random int to disable prefix caching. + random_str = "({})\n\n".format(self.rand.randint(0, 1000000)) + system_message_content = random_str + system_message_content + + if self.client.model_info["family"] == "o1": + # No system message allowed, so pass it as the first user message. + system_message = UserMessage(content=system_message_content, source="User") + else: + # System message allowed. + system_message = SystemMessage(content=system_message_content) + + user_message = UserMessage(content=task, source="User") + input_messages = [system_message] + [user_message] + + simple_agent = AssistantAgent( + "simple_agent", + self.client, + system_message=system_message_content, + ) + + # Get the agent's response to the task. + task_result: TaskResult = await simple_agent.run(task=TextMessage(content=task, source="User")) + message = task_result.messages[-1] + response_str = message.content + + # Log the model call + self.logger.log_model_call( + summary="Ask the model to complete the task", input_messages=input_messages, response=task_result + ) + self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) + + # Use the response as the work history as well. + work_history = response_str + + self.logger.leave_function() + return response_str, work_history + + async def _assign_task_to_magentic_one(self, task) -> Tuple[str, str]: + """ + Instantiates a MagenticOneGroupChat team, and passes the given task to it. + """ + self.logger.enter_function() + self.logger.info(task) + + general_agent = AssistantAgent( + "general_agent", + self.client, + description="A general GPT-4o AI assistant capable of performing a variety of tasks.", + ) + + web_surfer = MultimodalWebSurfer( + name="web_surfer", + model_client=self.client, + downloads_folder="logs", + debug_dir="logs", + to_save_screenshots=True, + ) + + team = MagenticOneGroupChat( + [general_agent, web_surfer], + model_client=self.client, + max_turns=20, + ) + + # Get the team's text response to the task. + stream = team.run_stream(task=task) + task_result = await Console(stream) + response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) + self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) + + # MagenticOne's response is the chat history, which we use here as the work history. + work_history = response_str + + self.logger.leave_function() + return response_str, work_history diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py index d355d5f0ed75..e32106c0e0b8 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py @@ -12,7 +12,9 @@ LLMMessage, SystemMessage, UserMessage, + CreateResult, ) +from autogen_agentchat.base import TaskResult from ._utils import MessageContent @@ -238,7 +240,7 @@ def log_message_content(self, message_content: MessageContent, summary: str) -> page.add_lines(self._format_message_content(page, message_content=message_content)) page.flush() - def log_model_call(self, summary: str, input_messages: List[LLMMessage], response: LLMMessage) -> Optional["Page"]: + def log_model_call(self, summary: str, input_messages: List[LLMMessage], response: Union[CreateResult, TaskResult]) -> Optional["Page"]: """ Adds a page containing all messages to or from a model, including any images. """ @@ -246,13 +248,21 @@ def log_model_call(self, summary: str, input_messages: List[LLMMessage], respons return None page = self._add_page(summary=summary, show_in_call_tree=False) self.page_stack.write_stack_to_page(page) - page.add_lines("{} prompt tokens".format(response.usage.prompt_tokens)) - page.add_lines("{} completion tokens".format(response.usage.completion_tokens)) + + if isinstance(response, TaskResult): + usage = response.messages[-1].models_usage + message = response.messages[-1] + else: + usage = response.usage + message = response + + page.add_lines("{} prompt tokens".format(usage.prompt_tokens)) + page.add_lines("{} completion tokens".format(usage.completion_tokens)) for m in input_messages: page.add_lines("\n" + self._message_source(m)) page.add_lines(self._format_message_content(page, message=m)) page.add_lines("\n" + self._decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) - page.add_lines(self._format_message_content(page, message=response)) + page.add_lines(self._format_message_content(page, message=message)) page.flush() return page diff --git a/python/samples/agentic_memory/settings/demonstration.yaml b/python/samples/agentic_memory/settings/demonstration.yaml index 73bde782d02f..67c5d1beebae 100644 --- a/python/samples/agentic_memory/settings/demonstration.yaml +++ b/python/samples/agentic_memory/settings/demonstration.yaml @@ -14,6 +14,8 @@ client: max_retries: 65535 Apprentice: + name_of_agent_or_team: SimpleAgent # SimpleAgent, MagenticOneGroupChat, etc. + disable_prefix_caching: 1 # If true, prepends a small random string to the context, to decorrelate repeated runs. AgenticMemoryController: max_train_trials: 10 max_test_trials: 3 @@ -22,9 +24,6 @@ Apprentice: relevance_conversion_threshold: 1.7 n_results: 25 distance_threshold: 100 - AgentWrapper: - base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. - disable_prefix_caching: 1 # Prepends a small random string to decorrelate repeated runs. test: main_task_file: data_files/tasks/cell_towers_1.yaml # The task being tested. diff --git a/python/samples/agentic_memory/settings/self_teaching.yaml b/python/samples/agentic_memory/settings/self_teaching.yaml index 167e23d3d26f..e0f8e5497561 100644 --- a/python/samples/agentic_memory/settings/self_teaching.yaml +++ b/python/samples/agentic_memory/settings/self_teaching.yaml @@ -14,6 +14,8 @@ client: max_retries: 65535 Apprentice: + name_of_agent_or_team: SimpleAgent # SimpleAgent, MagenticOneGroupChat, etc. + disable_prefix_caching: 1 # If true, prepends a small random string to the context, to decorrelate repeated runs. AgenticMemoryController: max_train_trials: 10 max_test_trials: 3 @@ -22,9 +24,6 @@ Apprentice: relevance_conversion_threshold: 1.7 n_results: 25 distance_threshold: 100 - AgentWrapper: - base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. - disable_prefix_caching: 1 # Prepends a small random string to decorrelate repeated runs. test: task_file_1: data_files/tasks/10_liars.yaml # Train and test on this task. diff --git a/python/samples/agentic_memory/settings/teachability.yaml b/python/samples/agentic_memory/settings/teachability.yaml index 186e57ea2dd4..07387f76ada2 100644 --- a/python/samples/agentic_memory/settings/teachability.yaml +++ b/python/samples/agentic_memory/settings/teachability.yaml @@ -14,6 +14,8 @@ client: max_retries: 65535 Apprentice: + name_of_agent_or_team: SimpleAgent # SimpleAgent, MagenticOneGroupChat, etc. + disable_prefix_caching: 1 # If true, prepends a small random string to the context, to decorrelate repeated runs. AgenticMemoryController: max_train_trials: 10 max_test_trials: 3 @@ -22,9 +24,6 @@ Apprentice: relevance_conversion_threshold: 1.7 n_results: 25 distance_threshold: 100 - AgentWrapper: - base_agent: thin_agent # MagenticOneGroupChat, thin_agent, etc. - disable_prefix_caching: 1 # Prepends a small random string to decorrelate repeated runs. test: task_file: data_files/tasks/autogen_package.yaml # The task being tested. From 5e4ad481c4607aee514f7b53ab708e234151c75d Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 31 Jan 2025 12:45:20 -0800 Subject: [PATCH 78/93] uv fixes --- .../src/autogen_ext/agentic_memory/_utils.py | 6 +++--- .../src/autogen_ext/agentic_memory/grader.py | 16 +++++++++++----- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py index 6deb2bd4c501..606e434c8f6d 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py @@ -1,7 +1,7 @@ -from typing import Any, Dict, List, Union, Tuple +from typing import List, Union from autogen_core import FunctionCall, Image -from autogen_core.models import FunctionExecutionResult, LLMMessage +from autogen_core.models import FunctionExecutionResult # Convenience types UserContent = Union[str, List[Union[str, Image]]] @@ -11,7 +11,7 @@ MessageContent = UserContent | AssistantContent | SystemContent | FunctionExecutionContent -def message_content_to_str(message_content: MessageContent) -> str: +def message_content_to_str(message_content: MessageContent | None) -> str: """ Converts the message content to a string. """ diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py index 4e39ee0b7bf1..c63430107bcd 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py @@ -1,4 +1,6 @@ -from typing import List, Tuple +from __future__ import annotations + +from typing import TYPE_CHECKING, List, Tuple from autogen_core.models import ( AssistantMessage, @@ -12,6 +14,9 @@ from .page_logger import PageLogger from ._utils import UserContent +if TYPE_CHECKING: + from .apprentice import Apprentice + class Grader: """ @@ -37,7 +42,8 @@ def __init__(self, client: ChatCompletionClient, logger: PageLogger): self._chat_history: List[LLMMessage] = [] async def test_apprentice( - self, apprentice, task_description, expected_answer, num_trials, use_memory, client, logger + self, apprentice: Apprentice, task_description: str, expected_answer: str, num_trials: int, use_memory: bool, + client: ChatCompletionClient, logger: PageLogger ) -> Tuple[int, int]: logger.enter_function() @@ -64,7 +70,7 @@ async def test_apprentice( return num_successes, num_trials async def call_model( - self, summary: str, user_content: UserContent = None, system_message_content: str = None, keep_these_messages: bool = True + self, summary: str, user_content: UserContent, system_message_content: str | None = None, keep_these_messages: bool = True ) -> str: """ Calls the model client with the given input and returns the response. @@ -115,7 +121,7 @@ async def is_response_correct(self, task_description: str, response_to_be_graded sys_message = """You are a helpful and thoughtful assistant.""" # Ask the model to extract the answer from the response. - user_message = [ + user_message: UserContent = [ """Your job is to extract a possible answer to the following question from the given text. - First review the following task. - Then review the text that follows, which may an answer, plus reasoning that led to the answer. @@ -135,7 +141,7 @@ async def is_response_correct(self, task_description: str, response_to_be_graded self.logger.info("Extracted answer: " + extracted_answer) # Ask the model to check the answer for correctness. - user_message = [ + user_message: UserContent = [ """Your job is to decide whether a given answer to a task is correct or not. - You will be given the task description and the correct, gold-standard answer, along with the answer to be graded. - In general, an answer is correct if it is equivalent to the correct answer. From b6c59aeabc8f1246ff26b089509934259ef9d3ea Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 31 Jan 2025 15:17:43 -0800 Subject: [PATCH 79/93] uv fixes --- .../src/autogen_ext/agentic_memory/grader.py | 25 +++++++++++++------ 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py index c63430107bcd..f4e46be5931e 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py @@ -11,8 +11,8 @@ UserMessage, ) -from .page_logger import PageLogger from ._utils import UserContent +from .page_logger import PageLogger if TYPE_CHECKING: from .apprentice import Apprentice @@ -31,6 +31,7 @@ class Grader: call_model: Calls the model with the given input and returns the response. is_response_correct: Determines whether the response is equivalent to the task's correct answer. """ + def __init__(self, client: ChatCompletionClient, logger: PageLogger): self.client = client self.logger = logger @@ -42,8 +43,14 @@ def __init__(self, client: ChatCompletionClient, logger: PageLogger): self._chat_history: List[LLMMessage] = [] async def test_apprentice( - self, apprentice: Apprentice, task_description: str, expected_answer: str, num_trials: int, use_memory: bool, - client: ChatCompletionClient, logger: PageLogger + self, + apprentice: Apprentice, + task_description: str, + expected_answer: str, + num_trials: int, + use_memory: bool, + client: ChatCompletionClient, + logger: PageLogger, ) -> Tuple[int, int]: logger.enter_function() @@ -70,7 +77,11 @@ async def test_apprentice( return num_successes, num_trials async def call_model( - self, summary: str, user_content: UserContent, system_message_content: str | None = None, keep_these_messages: bool = True + self, + summary: str, + user_content: UserContent, + system_message_content: str | None = None, + keep_these_messages: bool = True, ) -> str: """ Calls the model client with the given input and returns the response. @@ -112,7 +123,9 @@ def _clear_history(self) -> None: """ self._chat_history = [] - async def is_response_correct(self, task_description: str, response_to_be_graded: str, correct_answer: str) -> Tuple[bool, str]: + async def is_response_correct( + self, task_description: str, response_to_be_graded: str, correct_answer: str + ) -> Tuple[bool, str]: """ Determines whether the response is equivalent to the task's correct answer. """ @@ -165,7 +178,5 @@ async def is_response_correct(self, task_description: str, response_to_be_graded ) self.logger.info("Decision: " + decision) - if self.report_results: - self.client.report_result(decision) self.logger.leave_function() return decision == "1", extracted_answer From bef7e5de88559c969026d5b6b830e74dc96df252 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 31 Jan 2025 16:01:55 -0800 Subject: [PATCH 80/93] uv fixes --- .../autogen_ext/agentic_memory/_prompter.py | 33 +++++++++++-------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index 6f35add7e64b..2a67c4ff2a36 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -1,5 +1,5 @@ import time -from typing import List +from typing import List, Union from autogen_core import Image from autogen_core.models import ( @@ -11,8 +11,8 @@ UserMessage, ) -from .page_logger import PageLogger from ._utils import UserContent +from .page_logger import PageLogger class Prompter: @@ -32,6 +32,7 @@ class Prompter: extract_task: Returns a task found in the given text, or None if not found. extract_advice: Returns advice from the given text, or None if not found. """ + def __init__(self, client: ChatCompletionClient, logger: PageLogger): self.client = client self.logger = logger @@ -44,7 +45,11 @@ def __init__(self, client: ChatCompletionClient, logger: PageLogger): self._chat_history: List[LLMMessage] = [] async def call_model( - self, summary: str, user_content: UserContent = None, system_message_content: str = None, keep_these_messages: bool = True + self, + summary: str, + user_content: UserContent, + system_message_content: str | None = None, + keep_these_messages: bool = True, ) -> str: """ Calls the model client with the given input and returns the response. @@ -106,7 +111,7 @@ async def learn_from_failure( sys_message = """- You are a patient and thorough teacher. - Your job is to review work done by students and help them learn how to do better.""" - user_message = [] + user_message: List[Union[str, Image]] = [] user_message.append("# A team of students made a mistake on the following task:\n") user_message.extend([task_description]) @@ -159,7 +164,7 @@ async def find_index_topics(self, input_string: str) -> List[str]: """ sys_message = """You are an expert at semantic analysis.""" - user_message = [] + user_message: List[Union[str, Image]] = [] user_message.append("""- My job is to create a thorough index for a book called Task Completion, and I need your help. - Every paragraph in the book needs to be indexed by all the topics related to various kinds of tasks and strategies for completing them. - Your job is to read the text below and extract the task-completion topics that are covered. @@ -177,9 +182,9 @@ async def find_index_topics(self, input_string: str) -> List[str]: ) # Parse the topics into a list. - topic_list = [] + topic_list: List[str] = [] for line in topics.split("\n"): - if (line is not None) and (len(line) > 0): + if len(line) > 0: topic_list.append(line) return topic_list @@ -191,7 +196,7 @@ async def generalize_task(self, task_description: str) -> str: sys_message = """You are a helpful and thoughtful assistant.""" - user_message = [ + user_message: List[Union[str, Image]] = [ "We have been given a task description. Our job is not to complete the task, but merely rephrase the task in simpler, more general terms, if possible. Please reach through the following task description, then explain your understanding of the task in detail, as a single, flat list of all the important points." ] user_message.append("\n# Task description") @@ -230,8 +235,8 @@ async def validate_insight(self, insight: str, task_description: str) -> bool: sys_message = """You are a helpful and thoughtful assistant.""" - user_message = [ - """We have been given a potential insight that may or may not be useful for solving a given task. + user_message: List[Union[str, Image]] = [ + """We have been given a potential insight that may or may not be useful for solving a given task. - First review the following task. - Then review the insight that follows, and consider whether it might help solve the given task. - Do not attempt to actually solve the task. @@ -249,12 +254,12 @@ async def validate_insight(self, insight: str, task_description: str) -> bool: ) return response == "1" - async def extract_task(self, text: str) -> str: + async def extract_task(self, text: str) -> str | None: """ Returns a task found in the given text, or None if not found. """ sys_message = """You are a helpful and thoughtful assistant.""" - user_message = [ + user_message: List[Union[str, Image]] = [ """Does the following text contain a question or a some task we are being asked to perform? - If so, please reply with the full question or task description, along with any supporting information, but without adding extra commentary or formatting. - If the task is just to remember something, that doesn't count as a task, so don't include it. @@ -268,12 +273,12 @@ async def extract_task(self, text: str) -> str: ) return response if response != "None" else None - async def extract_advice(self, text: str) -> str: + async def extract_advice(self, text: str) -> str | None: """ Returns advice from the given text, or None if not found. """ sys_message = """You are a helpful and thoughtful assistant.""" - user_message = [ + user_message: List[Union[str, Image]] = [ """Does the following text contain any information or advice that might be useful later? - If so, please copy the information or advice, adding no extra commentary or formatting. - If there is no potentially useful information or advice at all, simply write "None" with no punctuation.""" From 1fb5ee47fc438f0543806a758fa98ab4d8822a9d Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 31 Jan 2025 17:11:23 -0800 Subject: [PATCH 81/93] uv fixes --- .../agentic_memory_controller.py | 2 +- .../autogen_ext/agentic_memory/apprentice.py | 44 ++++++++++--------- 2 files changed, 25 insertions(+), 21 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py index df4287f162cb..883df10728bd 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py @@ -134,7 +134,7 @@ async def add_insight_to_memory(self, insight: str, task: None | str = None) -> self.memory_bank.add_insight(insight, topics, generalized_task) self.logger.leave_function() - async def add_task_solution_pair_to_memory(self, task, solution) -> None: + async def add_task_solution_pair_to_memory(self, task: str, solution: str) -> None: """ Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. This is useful when the insight is a demonstration of how to solve a given type of task. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py index f3cc6d1cd21a..2c6d12d516c3 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py @@ -1,20 +1,23 @@ -from typing import Tuple, Dict -import random, time +import random +import time +from typing import Any, Dict, List, Tuple -from .agentic_memory_controller import AgenticMemoryController -from autogen_ext.agents.web_surfer import MultimodalWebSurfer -from autogen_ext.agents.web_surfer._utils import message_content_to_str from autogen_agentchat.agents import AssistantAgent from autogen_agentchat.messages import TextMessage from autogen_agentchat.teams import MagenticOneGroupChat from autogen_agentchat.ui._console import Console -from autogen_agentchat.base import TaskResult from autogen_core.models import ( ChatCompletionClient, + LLMMessage, SystemMessage, UserMessage, ) +from autogen_ext.agents.web_surfer import MultimodalWebSurfer +from autogen_ext.agents.web_surfer._utils import message_content_to_str + +from .agentic_memory_controller import AgenticMemoryController +from .page_logger import PageLogger class Apprentice: @@ -35,7 +38,8 @@ class Apprentice: add_task_solution_pair_to_memory: Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. train_on_task: Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ - def __init__(self, settings, client, logger) -> None: + + def __init__(self, settings: Dict[str, Any], client: ChatCompletionClient, logger: PageLogger) -> None: self.client = client self.logger = logger self.name_of_agent_or_team = settings["name_of_agent_or_team"] @@ -58,8 +62,7 @@ def reset_memory(self) -> None: """ Resets the memory bank. """ - if self.memory_controller is not None: - self.memory_controller.reset_memory() + self.memory_controller.reset_memory() async def handle_user_message(self, text: str, should_await: bool = True) -> str: """ @@ -73,7 +76,7 @@ async def handle_user_message(self, text: str, should_await: bool = True) -> str self.logger.leave_function() return response - async def add_task_solution_pair_to_memory(self, task, solution) -> None: + async def add_task_solution_pair_to_memory(self, task: str, solution: str) -> None: """ Adds a task-solution pair to the memory bank, to be retrieved together later as a combined insight. This is useful when the insight is a demonstration of how to solve a given type of task. @@ -119,15 +122,13 @@ async def assign_task_to_agent_or_team(self, task: str) -> Tuple[str, str]: response, work_history = await self._assign_task_to_magentic_one(task) elif self.name_of_agent_or_team == "SimpleAgent": response, work_history = await self._assign_task_to_simple_agent(task) - elif self.name_of_agent_or_team == "thin_agent": - response, work_history = await self._assign_task_to_thin_agent(task) else: raise AssertionError("Invalid base agent") self.logger.leave_function() return response, work_history - async def _assign_task_to_simple_agent(self, task: str) -> Tuple[str, str]: + async def _assign_task_to_simple_agent(self, task: str) -> Tuple[Any, Any]: """ Passes the given task to a newly created AssistantAgent with a generic 6-step system prompt. """ @@ -150,13 +151,15 @@ async def _assign_task_to_simple_agent(self, task: str) -> Tuple[str, str]: if self.client.model_info["family"] == "o1": # No system message allowed, so pass it as the first user message. - system_message = UserMessage(content=system_message_content, source="User") + system_message: LLMMessage = UserMessage(content=system_message_content, source="User") else: # System message allowed. - system_message = SystemMessage(content=system_message_content) + system_message: LLMMessage = SystemMessage(content=system_message_content) - user_message = UserMessage(content=task, source="User") - input_messages = [system_message] + [user_message] + user_message: LLMMessage = UserMessage(content=task, source="User") + system_message_list: List[LLMMessage] = [system_message] + user_message_list: List[LLMMessage] = [user_message] + input_messages: List[LLMMessage] = system_message_list + user_message_list simple_agent = AssistantAgent( "simple_agent", @@ -165,8 +168,9 @@ async def _assign_task_to_simple_agent(self, task: str) -> Tuple[str, str]: ) # Get the agent's response to the task. - task_result: TaskResult = await simple_agent.run(task=TextMessage(content=task, source="User")) - message = task_result.messages[-1] + task_result = await simple_agent.run(task=TextMessage(content=task, source="User")) + messages = task_result.messages + message = messages[-1] response_str = message.content # Log the model call @@ -181,7 +185,7 @@ async def _assign_task_to_simple_agent(self, task: str) -> Tuple[str, str]: self.logger.leave_function() return response_str, work_history - async def _assign_task_to_magentic_one(self, task) -> Tuple[str, str]: + async def _assign_task_to_magentic_one(self, task: str) -> Tuple[str, str]: """ Instantiates a MagenticOneGroupChat team, and passes the given task to it. """ From 1d7f4eb390afefabdf6adc69f0eac2b6862e1c08 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 31 Jan 2025 17:45:17 -0800 Subject: [PATCH 82/93] uv fixes --- .../agentic_memory/_string_similarity_map.py | 43 +++++++++++-------- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py index c3252980977a..1444a043039a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py @@ -1,8 +1,11 @@ import os import pickle -from typing import Optional, Union, Dict, List, Tuple +from typing import Dict, List, Tuple, Union import chromadb +from chromadb.api.types import ( + QueryResult, +) from chromadb.config import Settings from .page_logger import PageLogger @@ -27,6 +30,7 @@ class StringSimilarityMap: - reset_db: Forces immediate deletion of the DB's contents, in memory and on disk. - save_string_pairs: Saves the string-pair dict to disk. """ + def __init__(self, reset: bool, path_to_db_dir: str, logger: PageLogger) -> None: self.logger = logger self.path_to_db_dir = path_to_db_dir @@ -40,7 +44,7 @@ def __init__(self, reset: bool, path_to_db_dir: str, logger: PageLogger) -> None # Load or create the associated string-pair dict on disk. self.path_to_dict = os.path.join(path_to_db_dir, "uid_text_dict.pkl") - self.uid_text_dict = {} + self.uid_text_dict: Dict[str, Tuple[str, str]] = {} self.last_string_pair_id = 0 if (not reset) and os.path.exists(self.path_to_dict): self.logger.debug("\nLOADING STRING SIMILARITY MAP FROM DISK {}".format(self.path_to_dict)) @@ -96,28 +100,29 @@ def add_input_output_pair(self, input_text: str, output_text: str) -> None: ) # self._log_string_pairs() # For deeper debugging, uncomment to log all string pairs after each addition. - def get_related_string_pairs(self, query_text: str, n_results: int, threshold: Union[int, float]) -> List[Tuple[str, str, float]]: + def get_related_string_pairs( + self, query_text: str, n_results: int, threshold: Union[int, float] + ) -> List[Tuple[str, str, float]]: """ Retrieves up to n string pairs that are related to the given query text within the specified distance threshold. """ + string_pairs_with_distances: List[Tuple[str, str, float]] = [] if n_results > len(self.uid_text_dict): n_results = len(self.uid_text_dict) if n_results > 0: - results = self.vec_db.query(query_texts=[query_text], n_results=n_results) + results: QueryResult = self.vec_db.query(query_texts=[query_text], n_results=n_results) num_results = len(results["ids"][0]) - else: - results = [] - num_results = 0 - string_pairs = [] - for i in range(num_results): - uid, input_text, distance = results["ids"][0][i], results["documents"][0][i], results["distances"][0][i] - if distance < threshold: - input_text_2, output_text = self.uid_text_dict[uid] - assert input_text == input_text_2 - self.logger.debug( - "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( - input_text, output_text, distance + for i in range(num_results): + uid = results["ids"][0][i] + input_text = results["documents"][0][i] if results["documents"] else "" + distance = results["distances"][0][i] if results["distances"] else 0.0 + if distance < threshold: + input_text_2, output_text = self.uid_text_dict[uid] + assert input_text == input_text_2 + self.logger.debug( + "\nINPUT-OUTPUT PAIR RETRIEVED FROM VECTOR DATABASE:\n INPUT1\n {}\n OUTPUT\n {}\n DISTANCE\n {}".format( + input_text, output_text, distance + ) ) - ) - string_pairs.append((input_text, output_text, distance)) - return string_pairs + string_pairs_with_distances.append((input_text, output_text, distance)) + return string_pairs_with_distances From 516e6890efb7a4b646135a11e5f19c7390dff8ba Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 31 Jan 2025 17:58:43 -0800 Subject: [PATCH 83/93] uv fixes --- .../agentic_memory/_agentic_memory_bank.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index 4be509754b1a..fdd6505470be 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -1,7 +1,7 @@ import os import pickle from dataclasses import dataclass -from typing import Dict, List, Optional, Union +from typing import Any, Dict, List, Optional, Tuple from ._string_similarity_map import StringSimilarityMap from .page_logger import PageLogger @@ -12,9 +12,10 @@ class Insight: """ Represents a task-completion insight, which is a string that may help solve a task. """ + id: str insight_str: str - task_str: str + task_str: str | None topics: List[str] @@ -35,7 +36,8 @@ class AgenticMemoryBank: - add_task_with_solution: Adds a task-insight pair to the memory bank, to be retrieved together later. - get_relevant_insights: Returns any insights from the memory bank that appear sufficiently relevant to the given """ - def __init__(self, settings: Dict, reset: bool, logger: PageLogger) -> None: + + def __init__(self, settings: Dict[str, Any], reset: bool, logger: PageLogger) -> None: self.settings = settings self.logger = logger self.logger.enter_function() @@ -78,7 +80,7 @@ def _reset_insights(self) -> None: """ Forces immediate deletion of the insights, in memory and on disk. """ - self.uid_insight_dict = {} + self.uid_insight_dict: Dict[str, Insight] = {} self.save_insights() def save_insights(self) -> None: @@ -133,8 +135,8 @@ def get_relevant_insights(self, task_topics: List[str]) -> Dict[str, float]: Returns any insights from the memory bank that appear sufficiently relevant to the given task topics. """ # Process the matching topics to build a dict of insight-relevance pairs. - matches = [] # Each match is a tuple: (topic, insight, distance) - insight_relevance_dict = {} + matches: List[Tuple[str, str, float]] = [] # Each match is a tuple: (topic, insight, distance) + insight_relevance_dict: Dict[str, float] = {} for topic in task_topics: matches.extend(self.string_map.get_related_string_pairs(topic, self.n_results, self.distance_threshold)) for match in matches: From ffe719a0321feb12fc88aae5d12535e062db9883 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Fri, 31 Jan 2025 18:27:23 -0800 Subject: [PATCH 84/93] uv fixes --- .../agentic_memory_controller.py | 47 ++++++++++++------- 1 file changed, 29 insertions(+), 18 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py index 883df10728bd..e11155596cdf 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py @@ -1,11 +1,12 @@ -from typing import Callable, Dict, List, Optional, Union, Tuple +from typing import Any, Awaitable, Callable, Dict, List, Tuple from autogen_core.models import ( ChatCompletionClient, ) + from ._agentic_memory_bank import AgenticMemoryBank -from .grader import Grader from ._prompter import Prompter +from .grader import Grader from .page_logger import PageLogger @@ -30,7 +31,15 @@ class AgenticMemoryController: assign_task: Assigns a task to the agent, along with any relevant insights/memories. handle_user_message: Handles a user message, extracting any advice and assigning a task to the agent. """ - def __init__(self, settings: Dict, reset: bool, client: ChatCompletionClient, task_assignment_callback: Callable, logger: PageLogger) -> None: + + def __init__( + self, + settings: Dict[str, Any], + reset: bool, + client: ChatCompletionClient, + task_assignment_callback: Callable[[str], Awaitable[Tuple[str, str]]], + logger: PageLogger, + ) -> None: self.logger = logger self.logger.enter_function() self.settings = settings @@ -63,12 +72,12 @@ async def train_on_task(self, task: str, expected_answer: str) -> None: await self.add_insight_to_memory(insight, task) self.logger.leave_function() - async def test_on_task(self, task: str, expected_answer: str, num_trials=1) -> Tuple[str, int, int]: + async def test_on_task(self, task: str, expected_answer: str, num_trials: int = 1) -> Tuple[str, int, int]: """ Assigns a task to the agent, along with any relevant insights retrieved from memory. """ self.logger.enter_function() - response = None + response = "" num_successes = 0 for trial in range(num_trials): @@ -109,7 +118,7 @@ async def add_insight_to_memory(self, insight: str, task: None | str = None) -> """ self.logger.enter_function() - generalized_task = None + generalized_task = "" if task is not None: self.logger.info("\nGIVEN TASK:") self.logger.info(task) @@ -176,14 +185,14 @@ async def retrieve_relevant_insights(self, task: str) -> List[str]: # Retrieve relevant insights from the memory bank. relevant_insights_and_relevances = self.memory_bank.get_relevant_insights(task_topics=task_topics) - relevant_insights = [] + relevant_insights: List[str] = [] self.logger.info("\n{} POTENTIALLY RELEVANT INSIGHTS".format(len(relevant_insights_and_relevances))) for insight, relevance in relevant_insights_and_relevances.items(): self.logger.info("\n INSIGHT: {}\n RELEVANCE: {:.3f}".format(insight, relevance)) relevant_insights.append(insight) # Apply a final validation stage to keep only the insights that the LLM concludes are relevant. - validated_insights = [] + validated_insights: List[str] = [] for insight in relevant_insights: if await self.prompter.validate_insight(insight, task): validated_insights.append(insight) @@ -198,7 +207,7 @@ async def retrieve_relevant_insights(self, task: str) -> List[str]: self.logger.leave_function() return validated_insights - def _format_memory_section(self, memories) -> str: + def _format_memory_section(self, memories: List[str]) -> str: """ Formats a list of memories as a section for appending to a task description. """ @@ -209,7 +218,9 @@ def _format_memory_section(self, memories) -> str: memory_section += "- " + mem + "\n" return memory_section - async def _test_for_failure(self, task: str, task_plus_insights: str, expected_answer: str, num_trials: int) -> Tuple[bool, str, str]: + async def _test_for_failure( + self, task: str, task_plus_insights: str, expected_answer: str, num_trials: int + ) -> Tuple[bool, str, str]: """ Attempts to solve the given task multiple times to find a failure case to learn from. """ @@ -218,7 +229,7 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a self.logger.info("\nExpected answer: {}\n".format(expected_answer)) failure_found = False - response, work_history = None, None + response, work_history = "", "" for trial in range(num_trials): self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) @@ -241,7 +252,9 @@ async def _test_for_failure(self, task: str, task_plus_insights: str, expected_a self.logger.leave_function() return failure_found, response, work_history - async def _iterate_on_task(self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int) -> Tuple[str, None | str]: + async def _iterate_on_task( + self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int + ) -> Tuple[str, None | str]: """ Repeatedly assigns a task to the agent, and tries to learn from failures by creating useful insights as memories. """ @@ -249,9 +262,9 @@ async def _iterate_on_task(self, task: str, expected_answer: str, max_train_tria self.logger.info("\nTask description: {}".format(task)) self.logger.info("\nExpected answer: {}\n".format(expected_answer)) - final_response = None + final_response = "" old_insights = await self.retrieve_relevant_insights(task) - new_insights = [] + new_insights: List[str] = [] last_insight = None insight = None successful_insight = None @@ -323,10 +336,8 @@ async def assign_task(self, task: str, use_memory: bool = True, should_await: bo # Attempt to solve the task. self.logger.info("Try to solve the task.\n") - if should_await: - response, _ = await self.task_assignment_callback(task) - else: - response, _ = self.task_assignment_callback(task) + assert should_await + response, _ = await self.task_assignment_callback(task) self.logger.leave_function() return response From ba14e7898e4cf2b22ef238a665f829fe48564746 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Sun, 2 Feb 2025 20:58:16 -0800 Subject: [PATCH 85/93] uv fixes --- .../autogen_ext/agentic_memory/apprentice.py | 37 ++- .../autogen_ext/agentic_memory/page_logger.py | 253 +++++++++++------- 2 files changed, 178 insertions(+), 112 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py index 2c6d12d516c3..66463286a8e9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py @@ -1,11 +1,11 @@ import random import time -from typing import Any, Dict, List, Tuple +from typing import Any, Dict, List, Sequence, Tuple from autogen_agentchat.agents import AssistantAgent -from autogen_agentchat.messages import TextMessage +from autogen_agentchat.base import TaskResult +from autogen_agentchat.messages import AgentEvent, ChatMessage, TextMessage from autogen_agentchat.teams import MagenticOneGroupChat -from autogen_agentchat.ui._console import Console from autogen_core.models import ( ChatCompletionClient, LLMMessage, @@ -14,7 +14,6 @@ ) from autogen_ext.agents.web_surfer import MultimodalWebSurfer -from autogen_ext.agents.web_surfer._utils import message_content_to_str from .agentic_memory_controller import AgenticMemoryController from .page_logger import PageLogger @@ -168,14 +167,14 @@ async def _assign_task_to_simple_agent(self, task: str) -> Tuple[Any, Any]: ) # Get the agent's response to the task. - task_result = await simple_agent.run(task=TextMessage(content=task, source="User")) - messages = task_result.messages - message = messages[-1] + task_result: TaskResult = await simple_agent.run(task=TextMessage(content=task, source="User")) + messages: Sequence[AgentEvent | ChatMessage] = task_result.messages + message: AgentEvent | ChatMessage = messages[-1] response_str = message.content # Log the model call - self.logger.log_model_call( - summary="Ask the model to complete the task", input_messages=input_messages, response=task_result + self.logger.log_model_task( + summary="Ask the model to complete the task", input_messages=input_messages, task_result=task_result ) self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) @@ -212,10 +211,22 @@ async def _assign_task_to_magentic_one(self, task: str) -> Tuple[str, str]: max_turns=20, ) - # Get the team's text response to the task. - stream = team.run_stream(task=task) - task_result = await Console(stream) - response_str = "\n".join([message_content_to_str(message.content) for message in task_result.messages]) + # Get the team's response to the task. + task_result: TaskResult = await team.run(task=task) + + assert isinstance(task_result, TaskResult) + messages = task_result.messages + + response_str_list: List[str] = [] + for message in messages: + content = message.content + if isinstance(content, str): + content_str = content + else: + content_str = "Not a string." + response_str_list.append(content_str) + response_str = "\n".join(response_str_list) + self.logger.info("\n----- RESPONSE -----\n\n{}\n".format(response_str)) # MagenticOne's response is the chat history, which we use here as the work history. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py index e32106c0e0b8..ecc73d227afe 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py @@ -2,19 +2,21 @@ import json import os import shutil -from typing import Dict, List, Union, Optional +from typing import Any, Dict, List, Optional, Sequence, Union -from autogen_core import FunctionCall, Image +from autogen_agentchat.base import TaskResult +from autogen_agentchat.messages import AgentEvent, ChatMessage +from autogen_core import Image from autogen_core.models import ( AssistantMessage, - FunctionExecutionResult, + CreateResult, FunctionExecutionResultMessage, LLMMessage, + RequestUsage, SystemMessage, UserMessage, - CreateResult, ) -from autogen_agentchat.base import TaskResult + from ._utils import MessageContent @@ -45,6 +47,25 @@ def html_closing() -> str: return """""" +def decorate_text(text: str, color: str, weight: str = "bold", demarcate: bool = False) -> str: + """ + Returns a string of text with HTML styling for weight and color. + """ + if demarcate: + text = f"<<<<< {text} >>>>>" + return f'{text}' + + +def link_to_image(image_path: str, description: str) -> str: + """ + Returns an HTML string defining a thumbnail link to an image. + """ + # To avoid a bug in heml rendering aht displays underscores to the left of thumbnails, + # define the following string on a single line. + link = f"""{description}""" + return link + + class PageLogger: """ Logs text and images to a set of HTML pages, one per function/method, linked to each other in a call tree. @@ -55,20 +76,22 @@ class PageLogger: - path: The path to the directory where the log files will be saved. Methods: - debug: Adds text to the current page if debugging level <= DEBUG. - info: Adds text to the current page if debugging level <= INFO. - warning: Adds text to the current page if debugging level <= WARNING. - error: Adds text to the current page if debugging level <= ERROR. - critical: Adds text to the current page if debugging level <= CRITICAL. - log_message_content: Adds a page containing the message's content, including any images, if debugging level <= INFO. - log_model_call: Adds a page containing all messages to or from a model, including any images, if debugging level <= INFO. + debug: Adds DEBUG text to the current page if debugging level <= DEBUG. + info: Adds INFO text to the current page if debugging level <= INFO. + warning: Adds WARNING text to the current page if debugging level <= WARNING. + error: Adds ERROR text to the current page if debugging level <= ERROR. + critical: Adds CRITICAL text to the current page if debugging level <= CRITICAL. + log_message_content: Adds a page containing the message's content, including any images. + log_model_call: Logs messages sent to a model and the CreateResult response to a new page. + log_model_task: Logs messages sent to a model and the TaskResult response to a new page. log_link_to_local_file: Returns a link to a local file in the log. + add_link_to_image: Inserts a thumbnail link to an image to the page. flush: Writes the current state of the log to disk. - enter_function: Adds a new page corresponding to the current function call, if debugging level <= INFO. - leave_function: Finishes the page corresponding to the current function, if debugging level <= INFO + enter_function: Adds a new page corresponding to the current function call. + leave_function: Finishes the page corresponding to the current function call. """ - def __init__(self, settings: Dict) -> None: + def __init__(self, settings: Dict[str, Any]) -> None: self.levels = { "DEBUG": 10, "INFO": 20, @@ -82,7 +105,7 @@ def __init__(self, settings: Dict) -> None: return self.log_dir = os.path.expanduser(settings["path"]) self.page_stack = PageStack() - self.pages = [] + self.pages: List[Page] = [] self.last_page_id = 0 self.name = "0 Call Tree" self._create_run_dir() @@ -120,14 +143,14 @@ def _add_page(self, summary: str, show_in_call_tree: bool = True, finished: bool def _log_text(self, text: str) -> None: """ - Adds text to the current page, depending on the current logging level. + Adds text to the current page. """ page = self.page_stack.top() page.add_lines(text, flush=True) def debug(self, line: str) -> None: """ - Adds text to the current page if debugging level <= DEBUG. + Adds DEBUG text to the current page if debugging level <= DEBUG. """ if self.level <= self.levels["DEBUG"]: self._log_text(line) @@ -162,7 +185,7 @@ def critical(self, line: str) -> None: def _message_source(self, message: LLMMessage) -> str: """ - Returns a string indicating the source of a message. + Returns a decorated string indicating the source of a message. """ source = "UNKNOWN" color = "black" @@ -178,29 +201,15 @@ def _message_source(self, message: LLMMessage) -> str: elif isinstance(message, FunctionExecutionResultMessage): source = "FUNCTION" color = "red" - return self._decorate_text(source, color, demarcate=True) + return decorate_text(source, color, demarcate=True) - def _decorate_text(self, text: str, color: str, weight: str = "bold", demarcate: bool = False) -> str: + def _format_message_content(self, message_content: MessageContent) -> str: """ - Returns a string of text with HTML styling for weight and color. - """ - if demarcate: - text = f"<<<<< {text} >>>>>" - return f'{text}' - - def _format_message_content( - self, page: "Page", message: LLMMessage | None = None, message_content: MessageContent | None = None - ) -> str: - """ - Formats the message content for logging. Either message or message_content must not be None. + Formats the message content for logging. """ # Start by converting the message content to a list of strings. - content = None - content_list = [] - if message_content is not None: - content = message_content - if message is not None: - content = message.content + content_list: List[Union[MessageContent, None]] = [] + content = message_content if isinstance(content, str): content_list.append(content) elif isinstance(content, list): @@ -213,7 +222,7 @@ def _format_message_content( image_path = os.path.join(self.log_dir, image_filename) item.image.save(image_path) # Add a link to the image. - content_list.append(page.link_to_image(image_filename, "message_image")) + content_list.append(link_to_image(image_filename, "message_image")) elif isinstance(item, Dict): # Add a dictionary to the log. json_str = json.dumps(item, indent=4) @@ -237,35 +246,68 @@ def log_message_content(self, message_content: MessageContent, summary: str) -> return None page = self._add_page(summary=summary, show_in_call_tree=False) self.page_stack.write_stack_to_page(page) - page.add_lines(self._format_message_content(page, message_content=message_content)) + page.add_lines(self._format_message_content(message_content=message_content)) page.flush() - def log_model_call(self, summary: str, input_messages: List[LLMMessage], response: Union[CreateResult, TaskResult]) -> Optional["Page"]: + def _log_model_messages( + self, summary: str, input_messages: List[LLMMessage], response_str: str, usage: RequestUsage | None + ) -> Optional["Page"]: """ - Adds a page containing all messages to or from a model, including any images. + Adds a page containing the messages to a model (including any input images) and its response. """ - if self.level > self.levels["INFO"]: - return None page = self._add_page(summary=summary, show_in_call_tree=False) self.page_stack.write_stack_to_page(page) - if isinstance(response, TaskResult): - usage = response.messages[-1].models_usage - message = response.messages[-1] - else: - usage = response.usage - message = response - - page.add_lines("{} prompt tokens".format(usage.prompt_tokens)) - page.add_lines("{} completion tokens".format(usage.completion_tokens)) + if usage is not None: + page.add_lines("{} prompt tokens".format(usage.prompt_tokens)) + page.add_lines("{} completion tokens".format(usage.completion_tokens)) for m in input_messages: page.add_lines("\n" + self._message_source(m)) - page.add_lines(self._format_message_content(page, message=m)) - page.add_lines("\n" + self._decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) - page.add_lines(self._format_message_content(page, message=message)) + page.add_lines(self._format_message_content(message_content=m.content)) + page.add_lines("\n" + decorate_text("ASSISTANT RESPONSE", "green", demarcate=True)) + page.add_lines("\n" + response_str + "\n") page.flush() return page + def log_model_call( + self, summary: str, input_messages: List[LLMMessage], response: CreateResult + ) -> Optional["Page"]: + """ + Logs messages sent to a model and the TaskResult response to a new page. + """ + if self.level > self.levels["INFO"]: + return None + + response_str = response.content + if not isinstance(response_str, str): + response_str = "??" + + page = self._log_model_messages(summary, input_messages, response_str, response.usage) + return page + + def log_model_task( + self, summary: str, input_messages: List[LLMMessage], task_result: TaskResult + ) -> Optional["Page"]: + """ + Logs messages sent to a model and the TaskResult response to a new page. + """ + if self.level > self.levels["INFO"]: + return None + + messages: Sequence[AgentEvent | ChatMessage] = task_result.messages + message = messages[-1] + response_str = message.content + if not isinstance(response_str, str): + response_str = "??" + + if hasattr(message, "models_usage"): + usage: RequestUsage | None = message.models_usage + else: + usage = RequestUsage(prompt_tokens=0, completion_tokens=0) + + page = self._log_model_messages(summary, input_messages, response_str, usage) + return page + def log_link_to_local_file(self, file_path: str) -> str: """ Returns a link to a local file in the log. @@ -274,6 +316,19 @@ def log_link_to_local_file(self, file_path: str) -> str: link = f'{file_name}' return link + def add_link_to_image(self, description: str, source_image_path: str) -> None: + """ + Inserts a thumbnail link to an image to the page. + """ + # Remove every character from the string 'description' that is not alphanumeric or a space. + description = "".join(e for e in description if e.isalnum() or e.isspace()) + target_image_filename = str(self._get_next_page_id()) + " - " + description + # Copy the image to the log directory. + local_image_path = os.path.join(self.log_dir, target_image_filename) + shutil.copyfile(source_image_path, local_image_path) + self._log_text("\n" + description) + self._log_text(link_to_image(target_image_filename, description)) + def flush(self, finished: bool = False) -> None: """ Writes the current state of the log to disk. @@ -298,27 +353,31 @@ def enter_function(self) -> Optional["Page"]: """ if self.level > self.levels["INFO"]: return None - frame = inspect.currentframe().f_back # Get the calling frame - # Check if it's a method by looking for 'self' or 'cls' in f_locals - if "self" in frame.f_locals: - class_name = type(frame.f_locals["self"]).__name__ - elif "cls" in frame.f_locals: - class_name = frame.f_locals["cls"].__name__ - else: - class_name = None # Not part of a class + page = None + frame_type = inspect.currentframe() + if frame_type is not None: + frame = frame_type.f_back # Get the calling frame + if frame is not None: + # Check if it's a method by looking for 'self' or 'cls' in f_locals + if "self" in frame.f_locals: + class_name = type(frame.f_locals["self"]).__name__ + elif "cls" in frame.f_locals: + class_name = frame.f_locals["cls"].__name__ + else: + class_name = None # Not part of a class - if class_name is None: # Not part of a class - caller_name = frame.f_code.co_name - else: - caller_name = class_name + "." + frame.f_code.co_name + if class_name is None: # Not part of a class + caller_name = frame.f_code.co_name + else: + caller_name = class_name + "." + frame.f_code.co_name - # Create a new page for this function. - page = self._add_page(summary=caller_name, show_in_call_tree=True, finished=False) - self.page_stack.push(page) - self.page_stack.write_stack_to_page(page) + # Create a new page for this function. + page = self._add_page(summary=caller_name, show_in_call_tree=True, finished=False) + self.page_stack.push(page) + self.page_stack.write_stack_to_page(page) - page.add_lines("\nENTER {}".format(caller_name), flush=True) + page.add_lines("\nENTER {}".format(caller_name), flush=True) return page def leave_function(self) -> None: @@ -336,6 +395,18 @@ def leave_function(self) -> None: class Page: """ Represents a single HTML page in the logger output. + + Args: + page_logger: The PageLogger object that created this page. + index: The index of the page. + summary: A brief summary of the page's contents for display. + indent_level: The level of indentation in the call tree. + show_in_call_tree: Whether to display the page in the call tree. + finished: Whether the page is complete. + + Methods: + add_lines: Adds one or more lines to the page. + flush: Writes the HTML """ def __init__( @@ -357,17 +428,17 @@ def __init__( self.show_in_call_tree = show_in_call_tree self.finished = finished self.file_title = self.index_str + " " + self.summary - self.indentation_text = "| "*self.indent_level + self.indentation_text = "| " * self.indent_level self.full_link = f'{self.file_title}' self.line_text = self.indentation_text + self.full_link - self.lines = [] + self.lines: List[str] = [] self.flush() def add_lines(self, lines: str, flush: bool = False) -> None: """ Adds one or more lines to the page. """ - lines_to_add = [] + lines_to_add: List[str] = [] if "\n" in lines: lines_to_add = lines.split("\n") else: @@ -376,28 +447,6 @@ def add_lines(self, lines: str, flush: bool = False) -> None: if flush: self.flush() - def link_to_image(self, image_path: str, description: str) -> str: - """ - Returns an HTML string defining a thumbnail link to an image. - """ - # To avoid a bug in heml rendering aht displays underscores to the left of thumbnails, - # define the following string on a single line. - link = f"""{description}""" - return link - - def add_link_to_image(self, description: str, source_image_path: str) -> None: - """ - Inserts a thumbnail link to an image to the page. - """ - # Remove every character from the string 'description' that is not alphanumeric or a space. - description = "".join(e for e in description if e.isalnum() or e.isspace()) - target_image_filename = str(self.page_logger._get_next_page_id()) + " - " + description - # Copy the image to the log directory. - local_image_path = os.path.join(self.page_logger.log_dir, target_image_filename) - shutil.copyfile(source_image_path, local_image_path) - self.add_lines("\n" + description) - self.add_lines(self.link_to_image(target_image_filename, description), flush=True) - def flush(self) -> None: """ Writes the HTML page to disk. @@ -418,10 +467,16 @@ def flush(self) -> None: class PageStack: """ A call stack containing a list of currently active function pages in the order they called each other. + + Methods: + push: Adds a page to the top of the stack. + pop: Removes and returns the top page from the stack. + top: Returns the top page from the stack without removing it. + write_stack_to_page: Logs a properly indented string displaying the current call stack """ def __init__(self): - self.stack = [] + self.stack: List[Page] = [] def push(self, page: Page) -> None: """Adds a page to the top of the stack.""" From 2eb817e858f4808acc9f5a4aa662e1d3c792cc89 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 3 Feb 2025 15:15:10 -0800 Subject: [PATCH 86/93] uv fixes --- .../src/autogen_ext/agentic_memory/README.md | 54 +++++++++---------- .../autogen_ext/agentic_memory/__init__.py | 4 +- 2 files changed, 29 insertions(+), 29 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md index 4a1cbb5f831d..95a86e29a866 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/README.md @@ -1,9 +1,9 @@ # Agentic Memory -This AutoGen extension provides an implementation of agentic memory, which we define as a -broad ability for AI agents to accomplish tasks more effectively by learning quickly and continually (over the long term). -This is distinct from what RAG or long context windows can provide. -While still under active research and development, this implementation of agentic memory +This AutoGen extension provides an implementation of agentic memory, which we define as a +broad ability for AI agents to accomplish tasks more effectively by learning quickly and continually (over the long term). +This is distinct from what RAG or long context windows can provide. +While still under active research and development, this implementation of agentic memory can be attached to virtually any unmodified AI agent, and is designed to enable agents that: * Remember guidance, corrections, and demonstrations provided by users. @@ -14,46 +14,46 @@ The implementation is also intended to: * Be general purpose, unconstrained by types and schemas required by standard databases. * Augment rather than interfere with an agent’s special capabilities, such as powerful reasoning, long-horizon autonomy, and tool handling. -* Operate in both foreground and background modes, so that an agent can discuss tasks with a user (in the foreground) +* Operate in both foreground and background modes, so that an agent can discuss tasks with a user (in the foreground) then work productively on those tasks (in the background) while the user does other things. * Allow for fine-grained transparency and auditing of individual memories by human users or other agents. -* Allow agents to be personalized (to a single user) as well as specialized (to a subject, domain or project). -The benefits of personalization scale linearly with the number of users, but the benefits of domain specialization -can scale quadratically with the number of users working in that domain, as insights gained from interactions with one user +* Allow agents to be personalized (to a single user) as well as specialized (to a subject, domain or project). +The benefits of personalization scale linearly with the number of users, but the benefits of domain specialization +can scale quadratically with the number of users working in that domain, as insights gained from interactions with one user can benefit other users in similar situations. -* Support multiple memory banks dynamically attached to an agent at runtime. +* Support multiple memory banks dynamically attached to an agent at runtime. * Enable enforcement of security boundaries at the level of individual memory banks. * Allow users to download and port memory banks between agents and systems. ![agentic_memory.png](../../../imgs/agentic_memory.png) -The block diagram above outlines the key components of our baseline agentic memory architecture, -which augments an agent or team with agentic memory mechanisms. +The block diagram above outlines the key components of our baseline agentic memory architecture, +which augments an agent or team with agentic memory mechanisms. -The **Agentic Memory Controller** implements the fast-learning methods described below, -and manages communication with an **Agentic Memory Bank** containing a vector DB and associated structures. +The **Agentic Memory Controller** implements the fast-learning methods described below, +and manages communication with an **Agentic Memory Bank** containing a vector DB and associated structures. -The **Apprentice** is a minimal reference implementation that wraps the combination of agentic memory plus some agent or team. -Certain applications will use the Apprentice, +The **Apprentice** is a minimal reference implementation that wraps the combination of agentic memory plus some agent or team. +Certain applications will use the Apprentice, while others will directly instantiate and call the Agentic Memory Controller. -We’ve successfully tested agentic memory with a simple AssistantAgent and MagenticOneGroupChat. +We’ve successfully tested agentic memory with a simple AssistantAgent and MagenticOneGroupChat. ## Memory Creation and Storage -Each stored memory is an insight (in text form) crafted to help the agent accomplish future tasks that are similar -to some task encountered in the past. If the user provides advice for solving a given task, -the advice is extracted and stored as an insight. If the user demonstrates how to perform a task, -the task and demonstration are stored together as an insight that could be applied to similar but different tasks. -If the agent is given a task (free of side-effects) and some means of determining success or failure, +Each stored memory is an insight (in text form) crafted to help the agent accomplish future tasks that are similar +to some task encountered in the past. If the user provides advice for solving a given task, +the advice is extracted and stored as an insight. If the user demonstrates how to perform a task, +the task and demonstration are stored together as an insight that could be applied to similar but different tasks. +If the agent is given a task (free of side-effects) and some means of determining success or failure, the memory controller repeats the following learning loop in the background some number of times: -1. Test the agent on the task a few times to check for a failure. +1. Test the agent on the task a few times to check for a failure. 2. If a failure is found, analyze the agent’s response in order to: - 1. Diagnose the failure of reasoning or missing information, + 1. Diagnose the failure of reasoning or missing information, 2. Phrase a general piece of advice, such as what a teacher might give to a student, - 3. Temporarily append this advice to the task description, - 4. Return to step 1. + 3. Temporarily append this advice to the task description, + 4. Return to step 1. 5. If some piece of advice succeeds in helping the agent solve the task a number of times, add the advice as an insight to memory. 3. For each insight to be stored in memory, an LLM is prompted to generate a set of free-form, multi-word topics related to the insight. Each topic is embedded to a fixed-length vector and stored in a vector DB mapping it to the topic’s related insight. @@ -66,7 +66,7 @@ When the agent is given a task, the following steps are performed by the memory 4. These candidate insights are filtered by the aggregate similarity of their stored topics to the query topics. 5. In the final filtering stage, an LLM is prompted to return only those insights that seem potentially useful in solving the task at hand. -Retrieved insights that pass the filtering steps are listed under a heading like +Retrieved insights that pass the filtering steps are listed under a heading like “Important insights that may help solve tasks like this”, then appended to the task description before it is passed to the agent as usual. ## Setup and Usage @@ -77,5 +77,5 @@ Install AutoGen and its extension package as follows: We provide [sample code](../../../../../samples/agentic_memory) to illustrate the following forms of memory-based fast learning: * Agent learning from user advice and corrections -* Agent learning from user demonstrations +* Agent learning from user demonstrations * Agent learning from its own experience diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py index d08e6e37554a..226f6ba7131c 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/__init__.py @@ -1,6 +1,6 @@ +from .agentic_memory_controller import AgenticMemoryController +from .apprentice import Apprentice from .grader import Grader from .page_logger import PageLogger -from .apprentice import Apprentice -from .agentic_memory_controller import AgenticMemoryController __all__ = ["Apprentice", "PageLogger", "Grader", "AgenticMemoryController"] From 6633169dff7efe46e3df448b6286ac767abb1a3c Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Mon, 3 Feb 2025 17:39:41 -0800 Subject: [PATCH 87/93] uv fixes --- python/samples/agentic_memory/README.md | 3 + .../eval_learning_from_demonstration.py | 59 +++++++----------- .../agentic_memory/eval_self_teaching.py | 62 +++++++++---------- .../agentic_memory/eval_teachability.py | 56 ++++++++--------- .../settings/demonstration.yaml | 1 - .../settings/self_teaching.yaml | 1 - .../agentic_memory/settings/teachability.yaml | 1 - python/samples/agentic_memory/utils.py | 32 ++++++++++ python/samples/agentic_memory/utils/client.py | 31 ---------- 9 files changed, 112 insertions(+), 134 deletions(-) create mode 100644 python/samples/agentic_memory/utils.py delete mode 100644 python/samples/agentic_memory/utils/client.py diff --git a/python/samples/agentic_memory/README.md b/python/samples/agentic_memory/README.md index b6e61915b068..342c04e416aa 100644 --- a/python/samples/agentic_memory/README.md +++ b/python/samples/agentic_memory/README.md @@ -22,6 +22,9 @@ Install AutoGen and its extension package as follows: `pip install "autogen-ext[agentic-memory]"` +Assign your OpenAI key to the environment variable OPENAI_API_KEY, +or else modify `utils/client.py` as appropriate for the model you choose. + ## Running the Samples diff --git a/python/samples/agentic_memory/eval_learning_from_demonstration.py b/python/samples/agentic_memory/eval_learning_from_demonstration.py index bd7d1d9eb21c..f5d85e0ff76b 100644 --- a/python/samples/agentic_memory/eval_learning_from_demonstration.py +++ b/python/samples/agentic_memory/eval_learning_from_demonstration.py @@ -1,18 +1,17 @@ import asyncio import sys -from typing import Dict -import yaml +from typing import Any, Dict from autogen_core.models import ( ChatCompletionClient, ) from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger +from utils import create_oai_client, load_yaml_file -from utils.client import create_oai_client - -async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatCompletionClient, - logger: PageLogger, settings: Dict) -> str: +async def eval_learning_from_demonstration( + apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict[str, Any] +) -> str: """ Evaluates the ability to learn quickly from demonstrations. """ @@ -22,17 +21,11 @@ async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatC grader = Grader(client, logger) # Load the specified data. - with open(settings["main_task_file"], "r") as file: - # The task being tested. - main_task = yaml.load(file, Loader=yaml.FullLoader) - task_description = main_task["task_description"] - expected_answer = main_task["expected_answer"] - with open(settings["demo_task_file"], "r") as file: - # A similar but different task. - demo_task = yaml.load(file, Loader=yaml.FullLoader)["task_description"] - with open(settings["demo_solution_file"], "r") as file: - # A demonstration of solving the second task. - demo_solution = yaml.load(file, Loader=yaml.FullLoader)["demo"] + main_task = load_yaml_file(settings["main_task_file"]) + task_description = main_task["task_description"] + expected_answer = main_task["expected_answer"] + demo_task = load_yaml_file(settings["demo_task_file"])["task_description"] + demo_solution = load_yaml_file(settings["demo_solution_file"])["demo"] # Start by clearing memory then running a baseline test. logger.info("To get a baseline, clear memory, then assign the task.") @@ -73,27 +66,23 @@ async def eval_learning_from_demonstration(apprentice: Apprentice, client: ChatC return "\neval_learning_from_demonstration\n" + results_str_1 + "\n" + results_str_2 -async def run_example(settings_filepath) -> None: +async def run_example(settings_filepath: str) -> None: """ Runs the code example with the necessary components. """ - with open(settings_filepath, "r") as file: - # Create the necessary components. - settings = yaml.load(file, Loader=yaml.FullLoader) - logger = PageLogger(settings["PageLogger"]) - client = create_oai_client(settings["client"], logger) - apprentice = Apprentice(settings["Apprentice"], client, logger) - - # Call the example function. - results = await eval_learning_from_demonstration(apprentice, client, logger, settings["test"]) - - if hasattr(client, "finalize"): - # If this is a client wrapper, it needs to be finalized. - client.finalize() - - # Finish up. - logger.flush(finished=True) - print(results) + settings = load_yaml_file(settings_filepath) + + # Create the necessary components. + logger = PageLogger(settings["PageLogger"]) + client = create_oai_client(settings["client"]) + apprentice = Apprentice(settings["Apprentice"], client, logger) + + # Call the example function. + results = await eval_learning_from_demonstration(apprentice, client, logger, settings["test"]) + + # Finish up. + logger.flush(finished=True) + print(results) if __name__ == "__main__": diff --git a/python/samples/agentic_memory/eval_self_teaching.py b/python/samples/agentic_memory/eval_self_teaching.py index 26fceeab6e0d..a3e76fd81d5c 100644 --- a/python/samples/agentic_memory/eval_self_teaching.py +++ b/python/samples/agentic_memory/eval_self_teaching.py @@ -1,18 +1,18 @@ import asyncio import sys -from typing import Dict -import yaml +from typing import Any, Dict from autogen_core.models import ( ChatCompletionClient, ) from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger -from utils.client import create_oai_client +from utils import create_oai_client, load_yaml_file -async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClient, - logger: PageLogger, settings: Dict) -> str: +async def eval_self_teaching( + apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict[str, Any] +) -> str: """ Evaluates the ability of an agent to learn quickly from its own trial and error. """ @@ -23,16 +23,14 @@ async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClien grader = Grader(client, logger) # Load the specified data. - with open(settings["task_file_1"], "r") as file: - # Train and test on this task. - task_1 = yaml.load(file, Loader=yaml.FullLoader) - task_description_1 = task_1["task_description"] - expected_answer_1 = task_1["expected_answer"] - with open(settings["task_file_2"], "r") as file: - # Test generalization on this different, similar task. - task_2 = yaml.load(file, Loader=yaml.FullLoader) - task_description_2 = task_2["task_description"] - expected_answer_2 = task_2["expected_answer"] + task_dict_1 = load_yaml_file(settings["task_file_1"]) + task_description_1 = task_dict_1["task_description"] + expected_answer_1 = task_dict_1["expected_answer"] + + # Test generalization on this different, similar task. + task_dict_2 = load_yaml_file(settings["task_file_2"]) + task_description_2 = task_dict_2["task_description"] + expected_answer_2 = task_dict_2["expected_answer"] # Start the test with empty memory. apprentice.reset_memory() @@ -40,7 +38,7 @@ async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClien total_num_successes_1 = 0 total_num_successes_2 = 0 total_num_trials = 0 - for i in range(num_loops): + for _ in range(num_loops): # Train on the first task. await apprentice.train_on_task(task=task_description_1, expected_answer=expected_answer_1) @@ -85,27 +83,23 @@ async def eval_self_teaching(apprentice: Apprentice, client: ChatCompletionClien return "\neval_self_teaching\n" + results_str_1 + "\n" + results_str_2 -async def run_example(settings_filepath) -> None: +async def run_example(settings_filepath: str) -> None: """ Runs the code example with the necessary components. """ - with open(settings_filepath, "r") as file: - # Create the necessary components. - settings = yaml.load(file, Loader=yaml.FullLoader) - logger = PageLogger(settings["PageLogger"]) - client = create_oai_client(settings["client"], logger) - apprentice = Apprentice(settings["Apprentice"], client, logger) - - # Call the example function. - results = await eval_self_teaching(apprentice, client, logger, settings["test"]) - - if hasattr(client, "finalize"): - # If this is a client wrapper, it needs to be finalized. - client.finalize() - - # Finish up. - logger.flush(finished=True) - print(results) + settings = load_yaml_file(settings_filepath) + + # Create the necessary components. + logger = PageLogger(settings["PageLogger"]) + client = create_oai_client(settings["client"]) + apprentice = Apprentice(settings["Apprentice"], client, logger) + + # Call the example function. + results = await eval_self_teaching(apprentice, client, logger, settings["test"]) + + # Finish up. + logger.flush(finished=True) + print(results) if __name__ == "__main__": diff --git a/python/samples/agentic_memory/eval_teachability.py b/python/samples/agentic_memory/eval_teachability.py index 0c49e4d0550b..6973f5d54db0 100644 --- a/python/samples/agentic_memory/eval_teachability.py +++ b/python/samples/agentic_memory/eval_teachability.py @@ -1,32 +1,30 @@ import asyncio import sys -from typing import Dict -import yaml +from typing import Any, Dict from autogen_core.models import ( ChatCompletionClient, ) - from autogen_ext.agentic_memory import Apprentice, Grader, PageLogger -from utils.client import create_oai_client +from utils import create_oai_client, load_yaml_file -async def eval_teachability(apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict) -> str: +async def eval_teachability( + apprentice: Apprentice, client: ChatCompletionClient, logger: PageLogger, settings: Dict[str, Any] +) -> str: """ Evalutes the ability to learn quickly from user teachings, hints, and advice. """ logger.enter_function() # Load the specified data. - with open(settings["task_file"], "r") as file: - # The task being tested. - task = yaml.load(file, Loader=yaml.FullLoader) - task_description = task["task_description"] - expected_answer = task["expected_answer"] - with open(settings["advice_file"], "r") as file: - # Advice for solving such tasks. - advice = yaml.load(file, Loader=yaml.FullLoader)["advice"] + task_dict = load_yaml_file(settings["task_file"]) + task_description = task_dict["task_description"] + expected_answer = task_dict["expected_answer"] + + advice_dict = load_yaml_file(settings["advice_file"]) + advice = advice_dict["advice"] # First test without memory. apprentice.reset_memory() @@ -68,27 +66,23 @@ async def eval_teachability(apprentice: Apprentice, client: ChatCompletionClient return "\neval_teachability\n" + results_str_1 + "\n" + results_str_2 -async def run_example(settings_filepath) -> None: +async def run_example(settings_filepath: str) -> None: """ Runs the code example with the necessary components. """ - with open(settings_filepath, "r") as file: - # Create the necessary components. - settings = yaml.load(file, Loader=yaml.FullLoader) - logger = PageLogger(settings["PageLogger"]) - client = create_oai_client(settings["client"], logger) - apprentice = Apprentice(settings["Apprentice"], client, logger) - - # Call the example function. - results = await eval_teachability(apprentice, client, logger, settings["test"]) - - if hasattr(client, "finalize"): - # If this is a client wrapper, it needs to be finalized. - client.finalize() - - # Finish up. - logger.flush(finished=True) - print(results) + settings = load_yaml_file(settings_filepath) + + # Create the necessary components. + logger = PageLogger(settings["PageLogger"]) + client = create_oai_client(settings["client"]) + apprentice = Apprentice(settings["Apprentice"], client, logger) + + # Call the example function. + results = await eval_teachability(apprentice, client, logger, settings["test"]) + + # Finish up. + logger.flush(finished=True) + print(results) if __name__ == "__main__": diff --git a/python/samples/agentic_memory/settings/demonstration.yaml b/python/samples/agentic_memory/settings/demonstration.yaml index 67c5d1beebae..04d36810cd93 100644 --- a/python/samples/agentic_memory/settings/demonstration.yaml +++ b/python/samples/agentic_memory/settings/demonstration.yaml @@ -5,7 +5,6 @@ PageLogger: client: model: gpt-4o-2024-08-06 - # api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. temperature: 0.8 max_completion_tokens: 4096 presence_penalty: 0.0 diff --git a/python/samples/agentic_memory/settings/self_teaching.yaml b/python/samples/agentic_memory/settings/self_teaching.yaml index e0f8e5497561..de4567263ed9 100644 --- a/python/samples/agentic_memory/settings/self_teaching.yaml +++ b/python/samples/agentic_memory/settings/self_teaching.yaml @@ -5,7 +5,6 @@ PageLogger: client: model: gpt-4o-2024-08-06 - # api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. temperature: 0.8 max_completion_tokens: 4096 presence_penalty: 0.0 diff --git a/python/samples/agentic_memory/settings/teachability.yaml b/python/samples/agentic_memory/settings/teachability.yaml index 07387f76ada2..139f1a35b9a5 100644 --- a/python/samples/agentic_memory/settings/teachability.yaml +++ b/python/samples/agentic_memory/settings/teachability.yaml @@ -5,7 +5,6 @@ PageLogger: client: model: gpt-4o-2024-08-06 - # api_key: sk- # Supply your API key here. Or specify it in the environment variable OPENAI_API_KEY. temperature: 0.8 max_completion_tokens: 4096 presence_penalty: 0.0 diff --git a/python/samples/agentic_memory/utils.py b/python/samples/agentic_memory/utils.py new file mode 100644 index 000000000000..c721f7462cb0 --- /dev/null +++ b/python/samples/agentic_memory/utils.py @@ -0,0 +1,32 @@ +from typing import Any, Dict +import yaml + +from autogen_core.models import ( + ChatCompletionClient, +) +from autogen_ext.models.openai import OpenAIChatCompletionClient + + +def create_oai_client(settings: Dict[str, Any]) -> ChatCompletionClient: + """ + Creates a chat completion client from OpenAI. + """ + client = OpenAIChatCompletionClient( + model=settings["model"], + max_tokens=settings["max_completion_tokens"], + max_retries=settings["max_retries"], + temperature=settings["temperature"], + presence_penalty=settings["presence_penalty"], + frequency_penalty=settings["frequency_penalty"], + top_p=settings["top_p"], + ) + return client + + +def load_yaml_file(file_path: str) -> Any: + """ + Opens a file and returns its contents. + """ + with open(file_path, "r") as file: + return yaml.load(file, Loader=yaml.FullLoader) + diff --git a/python/samples/agentic_memory/utils/client.py b/python/samples/agentic_memory/utils/client.py deleted file mode 100644 index 1c13e6616488..000000000000 --- a/python/samples/agentic_memory/utils/client.py +++ /dev/null @@ -1,31 +0,0 @@ -from autogen_core.models import ( - ChatCompletionClient, -) -from autogen_ext.models.openai import OpenAIChatCompletionClient - - -def create_oai_client(settings, logger) -> ChatCompletionClient: - """ - Creates a chat completion client from OpenAI. - """ - logger.enter_function() - args = {} - args["model"] = settings["model"] - args["max_completion_tokens"] = settings["max_completion_tokens"] - args["max_retries"] = settings["max_retries"] - if not args["model"].startswith("o1"): - args["temperature"] = settings["temperature"] - args["presence_penalty"] = settings["presence_penalty"] - args["frequency_penalty"] = settings["frequency_penalty"] - args["top_p"] = settings["top_p"] - if "api_key" in settings: - args["api_key"] = settings["api_key"] - - # Instantiate the client. - client = OpenAIChatCompletionClient(**args) - - # Log some details. - logger.info("Client: {}".format(client._resolved_model)) - logger.info(" created through OpenAI") - logger.leave_function() - return client From b4ea0ce9d16568818cf5ec41f6beb54226d7e8c9 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 4 Feb 2025 16:27:21 -0800 Subject: [PATCH 88/93] uv fixes --- python/packages/autogen-ext/pyproject.toml | 2 +- python/samples/agentic_memory/README.md | 8 +- python/uv.lock | 347 ++++++++++++++++++++- 3 files changed, 341 insertions(+), 16 deletions(-) diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml index 55fae285772a..90f829c9a5d9 100644 --- a/python/packages/autogen-ext/pyproject.toml +++ b/python/packages/autogen-ext/pyproject.toml @@ -128,7 +128,7 @@ dev = [ [tool.ruff] extend = "../../pyproject.toml" include = ["src/**", "tests/*.py"] -exclude = ["src/autogen_ext/agents/web_surfer/*.js", "src/autogen_ext/runtimes/grpc/protos", "tests/protos"] +exclude = ["src/autogen_ext/agents/web_surfer/*.js", "src/autogen_ext/runtimes/grpc/protos", "tests/protos", "README.md"] [tool.pyright] extends = "../../pyproject.toml" diff --git a/python/samples/agentic_memory/README.md b/python/samples/agentic_memory/README.md index 342c04e416aa..8201509993d2 100644 --- a/python/samples/agentic_memory/README.md +++ b/python/samples/agentic_memory/README.md @@ -2,14 +2,14 @@ This directory contains code samples that illustrate the following forms of memory-based fast learning: * Agent learning from user advice and corrections -* Agent learning from user demonstrations +* Agent learning from user demonstrations * Agent learning from its own experience Each sample is contained in a separate python script, using data and settings stored in yaml files. Note that since agent behavior is non-deterministic, results will vary between runs. -To watch operations live in a browser and see how agentic memory works, -open the HTML page at the location specified at the top of the settings file, +To watch operations live in a browser and see how agentic memory works, +open the HTML page at the location specified at the top of the settings file, such as: `~/pagelogs/teachability/0 Call Tree.html` The settings files specify a _thin agent_ by default, which is just the model client plus a canned system prompt. @@ -56,7 +56,7 @@ By using memory, the agent's success rate is usually higher on the second set of ### Agent Learning from Its Own Experience This sample asks the agent to perform a reasoning task on which it usually fails. -Then the agent (running in the background) iterates through a learning loop in an effort to find a solution, +Then the agent (running in the background) iterates through a learning loop in an effort to find a solution, which it then stores as an insight in memory. Finally the agent is tested again to see if it can retrieve and apply the insight to the original task, as well as to a similar but different task to test generalization. diff --git a/python/uv.lock b/python/uv.lock index 87c5320f0343..7841a3367d9e 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -354,6 +354,18 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/f8/ed/e97229a566617f2ae958a6b13e7cc0f585470eac730a73e9e82c32a3cdd2/arrow-1.3.0-py3-none-any.whl", hash = "sha256:c728b120ebc00eb84e01882a6f5e7927a53960aa990ce7dd2b10f39005a67f80", size = 66419 }, ] +[[package]] +name = "asgiref" +version = "3.8.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "typing-extensions", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/29/38/b3395cc9ad1b56d2ddac9970bc8f4141312dbaec28bc7c218b0dfafd0f42/asgiref-3.8.1.tar.gz", hash = "sha256:c343bd80a0bec947a9860adb4c432ffa7db769836c64238fc34bdc3fec84d590", size = 35186 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/39/e3/893e8757be2612e6c266d9bb58ad2e3651524b5b40cf56761e985a28b13e/asgiref-3.8.1-py3-none-any.whl", hash = "sha256:3e1e3ecc849832fe52ccf2cb6686b7a55f82bb1d6aee72a58826471390335e47", size = 23828 }, +] + [[package]] name = "asttokens" version = "2.4.1" @@ -576,6 +588,9 @@ dependencies = [ ] [package.optional-dependencies] +agentic-memory = [ + { name = "chromadb" }, +] azure = [ { name = "azure-ai-inference" }, { name = "azure-core" }, @@ -688,6 +703,7 @@ requires-dist = [ { name = "azure-ai-inference", marker = "extra == 'azure'", specifier = ">=1.0.0b7" }, { name = "azure-core", marker = "extra == 'azure'" }, { name = "azure-identity", marker = "extra == 'azure'" }, + { name = "chromadb", marker = "extra == 'agentic-memory'", specifier = ">=0.6.3" }, { name = "diskcache", marker = "extra == 'diskcache'", specifier = ">=5.6.3" }, { name = "docker", marker = "extra == 'docker'", specifier = "~=7.0" }, { name = "ffmpeg-python", marker = "extra == 'video-surfer'" }, @@ -749,7 +765,6 @@ requires-dist = [ [[package]] name = "autogenstudio" -version = "0.4.0" source = { editable = "packages/autogen-studio" } dependencies = [ { name = "aiofiles" }, @@ -934,6 +949,47 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ed/20/bc79bc575ba2e2a7f70e8a1155618bb1301eaa5132a8271373a6903f73f8/babel-2.16.0-py3-none-any.whl", hash = "sha256:368b5b98b37c06b7daf6696391c3240c938b37767d4584413e8438c5c435fa8b", size = 9587599 }, ] +[[package]] +name = "backoff" +version = "2.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/d7/5bbeb12c44d7c4f2fb5b56abce497eb5ed9f34d85701de869acedd602619/backoff-2.2.1.tar.gz", hash = "sha256:03f829f5bb1923180821643f8753b0502c3b682293992485b0eef2807afa5cba", size = 17001 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/73/b6e24bd22e6720ca8ee9a85a0c4a2971af8497d8f3193fa05390cbd46e09/backoff-2.2.1-py3-none-any.whl", hash = "sha256:63579f9a0628e06278f7e47b7d7d5b6ce20dc65c5e96a6f3ca99a6adca0396e8", size = 15148 }, +] + +[[package]] +name = "bcrypt" +version = "4.2.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/56/8c/dd696962612e4cd83c40a9e6b3db77bfe65a830f4b9af44098708584686c/bcrypt-4.2.1.tar.gz", hash = "sha256:6765386e3ab87f569b276988742039baab087b2cdb01e809d74e74503c2faafe", size = 24427 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bc/ca/e17b08c523adb93d5f07a226b2bd45a7c6e96b359e31c1e99f9db58cb8c3/bcrypt-4.2.1-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:1340411a0894b7d3ef562fb233e4b6ed58add185228650942bdc885362f32c17", size = 489982 }, + { url = "https://files.pythonhosted.org/packages/6a/be/e7c6e0fd6087ee8fc6d77d8d9e817e9339d879737509019b9a9012a1d96f/bcrypt-4.2.1-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:b1ee315739bc8387aa36ff127afc99120ee452924e0df517a8f3e4c0187a0f5f", size = 273108 }, + { url = "https://files.pythonhosted.org/packages/d6/53/ac084b7d985aee1a5f2b086d501f550862596dbf73220663b8c17427e7f2/bcrypt-4.2.1-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8dbd0747208912b1e4ce730c6725cb56c07ac734b3629b60d4398f082ea718ad", size = 278733 }, + { url = "https://files.pythonhosted.org/packages/8e/ab/b8710a3d6231c587e575ead0b1c45bb99f5454f9f579c9d7312c17b069cc/bcrypt-4.2.1-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:aaa2e285be097050dba798d537b6efd9b698aa88eef52ec98d23dcd6d7cf6fea", size = 273856 }, + { url = "https://files.pythonhosted.org/packages/9d/e5/2fd1ea6395358ffdfd4afe370d5b52f71408f618f781772a48971ef3b92b/bcrypt-4.2.1-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:76d3e352b32f4eeb34703370e370997065d28a561e4a18afe4fef07249cb4396", size = 279067 }, + { url = "https://files.pythonhosted.org/packages/4e/ef/f2cb7a0f7e1ed800a604f8ab256fb0afcf03c1540ad94ff771ce31e794aa/bcrypt-4.2.1-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:b7703ede632dc945ed1172d6f24e9f30f27b1b1a067f32f68bf169c5f08d0425", size = 306851 }, + { url = "https://files.pythonhosted.org/packages/de/cb/578b0023c6a5ca16a177b9044ba6bd6032277bd3ef020fb863eccd22e49b/bcrypt-4.2.1-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:89df2aea2c43be1e1fa066df5f86c8ce822ab70a30e4c210968669565c0f4685", size = 310793 }, + { url = "https://files.pythonhosted.org/packages/98/bc/9d501ee9d754f63d4b1086b64756c284facc3696de9b556c146279a124a5/bcrypt-4.2.1-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:04e56e3fe8308a88b77e0afd20bec516f74aecf391cdd6e374f15cbed32783d6", size = 320957 }, + { url = "https://files.pythonhosted.org/packages/a1/25/2ec4ce5740abc43182bfc064b9acbbf5a493991246985e8b2bfe231ead64/bcrypt-4.2.1-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:cfdf3d7530c790432046c40cda41dfee8c83e29482e6a604f8930b9930e94139", size = 339958 }, + { url = "https://files.pythonhosted.org/packages/6d/64/fd67788f64817727897d31e9cdeeeba3941eaad8540733c05c7eac4aa998/bcrypt-4.2.1-cp37-abi3-win32.whl", hash = "sha256:adadd36274510a01f33e6dc08f5824b97c9580583bd4487c564fc4617b328005", size = 160912 }, + { url = "https://files.pythonhosted.org/packages/00/8f/fe834eaa54abbd7cab8607e5020fa3a0557e929555b9e4ca404b4adaab06/bcrypt-4.2.1-cp37-abi3-win_amd64.whl", hash = "sha256:8c458cd103e6c5d1d85cf600e546a639f234964d0228909d8f8dbeebff82d526", size = 152981 }, + { url = "https://files.pythonhosted.org/packages/4a/57/23b46933206daf5384b5397d9878746d2249fe9d45efaa8e1467c87d3048/bcrypt-4.2.1-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:8ad2f4528cbf0febe80e5a3a57d7a74e6635e41af1ea5675282a33d769fba413", size = 489842 }, + { url = "https://files.pythonhosted.org/packages/fd/28/3ea8a39ddd4938b6c6b6136816d72ba5e659e2d82b53d843c8c53455ac4d/bcrypt-4.2.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:909faa1027900f2252a9ca5dfebd25fc0ef1417943824783d1c8418dd7d6df4a", size = 272500 }, + { url = "https://files.pythonhosted.org/packages/77/7f/b43622999f5d4de06237a195ac5501ac83516adf571b907228cd14bac8fe/bcrypt-4.2.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:cde78d385d5e93ece5479a0a87f73cd6fa26b171c786a884f955e165032b262c", size = 278368 }, + { url = "https://files.pythonhosted.org/packages/50/68/f2e3959014b4d8874c747e6e171d46d3e63a3a39aaca8417a8d837eda0a8/bcrypt-4.2.1-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:533e7f3bcf2f07caee7ad98124fab7499cb3333ba2274f7a36cf1daee7409d99", size = 273335 }, + { url = "https://files.pythonhosted.org/packages/d6/c3/4b4bad4da852924427c651589d464ad1aa624f94dd904ddda8493b0a35e5/bcrypt-4.2.1-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:687cf30e6681eeda39548a93ce9bfbb300e48b4d445a43db4298d2474d2a1e54", size = 278614 }, + { url = "https://files.pythonhosted.org/packages/6e/5a/ee107961e84c41af2ac201d0460f962b6622ff391255ffd46429e9e09dc1/bcrypt-4.2.1-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:041fa0155c9004eb98a232d54da05c0b41d4b8e66b6fc3cb71b4b3f6144ba837", size = 306464 }, + { url = "https://files.pythonhosted.org/packages/5c/72/916e14fa12d2b1d1fc6c26ea195337419da6dd23d0bf53ac61ef3739e5c5/bcrypt-4.2.1-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:f85b1ffa09240c89aa2e1ae9f3b1c687104f7b2b9d2098da4e923f1b7082d331", size = 310674 }, + { url = "https://files.pythonhosted.org/packages/97/92/3dc76d8bfa23300591eec248e950f85bd78eb608c96bd4747ce4cc06acdb/bcrypt-4.2.1-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:c6f5fa3775966cca251848d4d5393ab016b3afed251163c1436fefdec3b02c84", size = 320577 }, + { url = "https://files.pythonhosted.org/packages/5d/ab/a6c0da5c2cf86600f74402a72b06dfe365e1a1d30783b1bbeec460fd57d1/bcrypt-4.2.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:807261df60a8b1ccd13e6599c779014a362ae4e795f5c59747f60208daddd96d", size = 339836 }, + { url = "https://files.pythonhosted.org/packages/b4/b4/e75b6e9a72a030a04362034022ebe317c5b735d04db6ad79237101ae4a5c/bcrypt-4.2.1-cp39-abi3-win32.whl", hash = "sha256:b588af02b89d9fad33e5f98f7838bf590d6d692df7153647724a7f20c186f6bf", size = 160911 }, + { url = "https://files.pythonhosted.org/packages/76/b9/d51d34e6cd6d887adddb28a8680a1d34235cc45b9d6e238ce39b98199ca0/bcrypt-4.2.1-cp39-abi3-win_amd64.whl", hash = "sha256:e84e0e6f8e40a242b11bce56c313edc2be121cec3e0ec2d76fce01f6af33c07c", size = 153078 }, + { url = "https://files.pythonhosted.org/packages/4e/6e/7193067042de23af3d71882f898c8c0bd2b18e6ee44a4f76e395dfadb5a8/bcrypt-4.2.1-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:76132c176a6d9953cdc83c296aeaed65e1a708485fd55abf163e0d9f8f16ce0e", size = 270069 }, + { url = "https://files.pythonhosted.org/packages/3b/05/2546085c6dc07a45627460a39e6291b82382b434fff2bd0167ff3bc31eb1/bcrypt-4.2.1-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:e158009a54c4c8bc91d5e0da80920d048f918c61a581f0a63e4e93bb556d362f", size = 274652 }, +] + [[package]] name = "beartype" version = "0.18.5" @@ -1013,6 +1069,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3e/05/43bae794c8e5f42d79e1c24205bc0c7447b3909a446de46cf231fa6b39dd/botocore-1.36.8-py3-none-any.whl", hash = "sha256:59d3fdfbae6d916b046e973bebcbeb70a102f9e570ca86d5ba512f1854b78fc2", size = 13318382 }, ] +[[package]] +name = "build" +version = "1.2.2.post1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "colorama", marker = "(os_name == 'nt' and platform_machine != 'aarch64' and sys_platform == 'linux') or (os_name == 'nt' and sys_platform != 'darwin' and sys_platform != 'linux')" }, + { name = "importlib-metadata", marker = "python_full_version < '3.10.2'" }, + { name = "packaging" }, + { name = "pyproject-hooks" }, + { name = "tomli", marker = "python_full_version < '3.11'" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/7d/46/aeab111f8e06793e4f0e421fcad593d547fb8313b50990f31681ee2fb1ad/build-1.2.2.post1.tar.gz", hash = "sha256:b36993e92ca9375a219c99e606a122ff365a760a2d4bba0caa09bd5278b608b7", size = 46701 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/84/c2/80633736cd183ee4a62107413def345f7e6e3c01563dbca1417363cf957e/build-1.2.2.post1-py3-none-any.whl", hash = "sha256:1d61c0887fa860c01971625baae8bdd338e517b836a2f70dd1f7aa3a6b2fc5b5", size = 22950 }, +] + [[package]] name = "cachetools" version = "5.5.1" @@ -1182,6 +1254,70 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/52/93/342cc62a70ab727e093ed98e02a725d85b746345f05d2b5e5034649f4ec8/chevron-0.14.0-py3-none-any.whl", hash = "sha256:fbf996a709f8da2e745ef763f482ce2d311aa817d287593a5b990d6d6e4f0443", size = 11595 }, ] +[[package]] +name = "chroma-hnswlib" +version = "0.7.6" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "numpy" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/73/09/10d57569e399ce9cbc5eee2134996581c957f63a9addfa6ca657daf006b8/chroma_hnswlib-0.7.6.tar.gz", hash = "sha256:4dce282543039681160259d29fcde6151cc9106c6461e0485f57cdccd83059b7", size = 32256 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a8/74/b9dde05ea8685d2f8c4681b517e61c7887e974f6272bb24ebc8f2105875b/chroma_hnswlib-0.7.6-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:f35192fbbeadc8c0633f0a69c3d3e9f1a4eab3a46b65458bbcbcabdd9e895c36", size = 195821 }, + { url = "https://files.pythonhosted.org/packages/fd/58/101bfa6bc41bc6cc55fbb5103c75462a7bf882e1704256eb4934df85b6a8/chroma_hnswlib-0.7.6-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:6f007b608c96362b8f0c8b6b2ac94f67f83fcbabd857c378ae82007ec92f4d82", size = 183854 }, + { url = "https://files.pythonhosted.org/packages/17/ff/95d49bb5ce134f10d6aa08d5f3bec624eaff945f0b17d8c3fce888b9a54a/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:456fd88fa0d14e6b385358515aef69fc89b3c2191706fd9aee62087b62aad09c", size = 2358774 }, + { url = "https://files.pythonhosted.org/packages/3a/6d/27826180a54df80dbba8a4f338b022ba21c0c8af96fd08ff8510626dee8f/chroma_hnswlib-0.7.6-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5dfaae825499c2beaa3b75a12d7ec713b64226df72a5c4097203e3ed532680da", size = 2392739 }, + { url = "https://files.pythonhosted.org/packages/d6/63/ee3e8b7a8f931918755faacf783093b61f32f59042769d9db615999c3de0/chroma_hnswlib-0.7.6-cp310-cp310-win_amd64.whl", hash = "sha256:2487201982241fb1581be26524145092c95902cb09fc2646ccfbc407de3328ec", size = 150955 }, + { url = "https://files.pythonhosted.org/packages/f5/af/d15fdfed2a204c0f9467ad35084fbac894c755820b203e62f5dcba2d41f1/chroma_hnswlib-0.7.6-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:81181d54a2b1e4727369486a631f977ffc53c5533d26e3d366dda243fb0998ca", size = 196911 }, + { url = "https://files.pythonhosted.org/packages/0d/19/aa6f2139f1ff7ad23a690ebf2a511b2594ab359915d7979f76f3213e46c4/chroma_hnswlib-0.7.6-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:4b4ab4e11f1083dd0a11ee4f0e0b183ca9f0f2ed63ededba1935b13ce2b3606f", size = 185000 }, + { url = "https://files.pythonhosted.org/packages/79/b1/1b269c750e985ec7d40b9bbe7d66d0a890e420525187786718e7f6b07913/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:53db45cd9173d95b4b0bdccb4dbff4c54a42b51420599c32267f3abbeb795170", size = 2377289 }, + { url = "https://files.pythonhosted.org/packages/c7/2d/d5663e134436e5933bc63516a20b5edc08b4c1b1588b9680908a5f1afd04/chroma_hnswlib-0.7.6-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:5c093f07a010b499c00a15bc9376036ee4800d335360570b14f7fe92badcdcf9", size = 2411755 }, + { url = "https://files.pythonhosted.org/packages/3e/79/1bce519cf186112d6d5ce2985392a89528c6e1e9332d680bf752694a4cdf/chroma_hnswlib-0.7.6-cp311-cp311-win_amd64.whl", hash = "sha256:0540b0ac96e47d0aa39e88ea4714358ae05d64bbe6bf33c52f316c664190a6a3", size = 151888 }, + { url = "https://files.pythonhosted.org/packages/93/ac/782b8d72de1c57b64fdf5cb94711540db99a92768d93d973174c62d45eb8/chroma_hnswlib-0.7.6-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:e87e9b616c281bfbe748d01705817c71211613c3b063021f7ed5e47173556cb7", size = 197804 }, + { url = "https://files.pythonhosted.org/packages/32/4e/fd9ce0764228e9a98f6ff46af05e92804090b5557035968c5b4198bc7af9/chroma_hnswlib-0.7.6-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:ec5ca25bc7b66d2ecbf14502b5729cde25f70945d22f2aaf523c2d747ea68912", size = 185421 }, + { url = "https://files.pythonhosted.org/packages/d9/3d/b59a8dedebd82545d873235ef2d06f95be244dfece7ee4a1a6044f080b18/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:305ae491de9d5f3c51e8bd52d84fdf2545a4a2bc7af49765cda286b7bb30b1d4", size = 2389672 }, + { url = "https://files.pythonhosted.org/packages/74/1e/80a033ea4466338824974a34f418e7b034a7748bf906f56466f5caa434b0/chroma_hnswlib-0.7.6-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:822ede968d25a2c88823ca078a58f92c9b5c4142e38c7c8b4c48178894a0a3c5", size = 2436986 }, +] + +[[package]] +name = "chromadb" +version = "0.6.3" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "bcrypt" }, + { name = "build" }, + { name = "chroma-hnswlib" }, + { name = "fastapi" }, + { name = "grpcio" }, + { name = "httpx" }, + { name = "importlib-resources" }, + { name = "kubernetes" }, + { name = "mmh3" }, + { name = "numpy" }, + { name = "onnxruntime" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-exporter-otlp-proto-grpc" }, + { name = "opentelemetry-instrumentation-fastapi" }, + { name = "opentelemetry-sdk" }, + { name = "orjson" }, + { name = "overrides" }, + { name = "posthog" }, + { name = "pydantic" }, + { name = "pypika" }, + { name = "pyyaml" }, + { name = "rich" }, + { name = "tenacity" }, + { name = "tokenizers" }, + { name = "tqdm" }, + { name = "typer" }, + { name = "typing-extensions" }, + { name = "uvicorn", extra = ["standard"] }, +] +sdist = { url = "https://files.pythonhosted.org/packages/39/cd/f0f2de3f466ff514fb6b58271c14f6d22198402bb5b71b8d890231265946/chromadb-0.6.3.tar.gz", hash = "sha256:c8f34c0b704b9108b04491480a36d42e894a960429f87c6516027b5481d59ed3", size = 29297929 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/8e/5c186c77bf749b6fe0528385e507e463f1667543328d76fd00a49e1a4e6a/chromadb-0.6.3-py3-none-any.whl", hash = "sha256:4851258489a3612b558488d98d09ae0fe0a28d5cad6bd1ba64b96fdc419dc0e5", size = 611129 }, +] + [[package]] name = "chromedriver-autoinstaller" version = "0.6.4" @@ -1626,6 +1762,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/8f/d7/9322c609343d929e75e7e5e6255e614fcc67572cfd083959cdef3b7aad79/docutils-0.21.2-py3-none-any.whl", hash = "sha256:dafca5b9e384f0e419294eb4d2ff9fa826435bf15f15b7bd45723e8ad76811b2", size = 587408 }, ] +[[package]] +name = "durationpy" +version = "0.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/31/e9/f49c4e7fccb77fa5c43c2480e09a857a78b41e7331a75e128ed5df45c56b/durationpy-0.9.tar.gz", hash = "sha256:fd3feb0a69a0057d582ef643c355c40d2fa1c942191f914d12203b1a01ac722a", size = 3186 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/4c/a3/ac312faeceffd2d8f86bc6dcb5c401188ba5a01bc88e69bed97578a0dfcd/durationpy-0.9-py3-none-any.whl", hash = "sha256:e65359a7af5cedad07fb77a2dd3f390f8eb0b74cb845589fa6c057086834dd38", size = 3461 }, +] + [[package]] name = "email-validator" version = "2.2.0" @@ -2637,6 +2782,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/d9/a1e041c5e7caa9a05c925f4bdbdfb7f006d1f74996af53467bc394c97be7/importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b", size = 26514 }, ] +[[package]] +name = "importlib-resources" +version = "6.5.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cf/8c/f834fbf984f691b4f7ff60f50b514cc3de5cc08abfc3295564dd89c5e2e7/importlib_resources-6.5.2.tar.gz", hash = "sha256:185f87adef5bcc288449d98fb4fba07cea78bc036455dd44c5fc4a2fe78fed2c", size = 44693 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a4/ed/1f1afb2e9e7f38a545d628f864d562a5ae64fe6f7a10e28ffb9b185b4e89/importlib_resources-6.5.2-py3-none-any.whl", hash = "sha256:789cfdc3ed28c78b67a06acb8126751ced69a3d5f79c095a98298cd8a760ccec", size = 37461 }, +] + [[package]] name = "iniconfig" version = "2.0.0" @@ -3000,6 +3154,28 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3a/1d/50ad811d1c5dae091e4cf046beba925bcae0a610e79ae4c538f996f63ed5/kiwisolver-1.4.8-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:65ea09a5a3faadd59c2ce96dc7bf0f364986a315949dc6374f04396b0d60e09b", size = 71762 }, ] +[[package]] +name = "kubernetes" +version = "32.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "certifi" }, + { name = "durationpy" }, + { name = "google-auth" }, + { name = "oauthlib" }, + { name = "python-dateutil" }, + { name = "pyyaml" }, + { name = "requests" }, + { name = "requests-oauthlib" }, + { name = "six" }, + { name = "urllib3" }, + { name = "websocket-client" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/bc/7f/15bcdf96c91f7a7b74d524c1bd058e0a2ef37eb6128cf16dca5c8b613aa0/kubernetes-32.0.0.tar.gz", hash = "sha256:319fa840345a482001ac5d6062222daeb66ec4d1bcb3087402aed685adf0aecb", size = 945530 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/df/14/a59acfe4b3095f2a4fd8d13b348853a69c8f1ed4bce9af00d1b31351a88e/kubernetes-32.0.0-py2.py3-none-any.whl", hash = "sha256:60fd8c29e8e43d9c553ca4811895a687426717deba9c0a66fb2dcc3f5ef96692", size = 1987229 }, +] + [[package]] name = "lancedb" version = "0.17.0" @@ -3890,6 +4066,71 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/58/e7/7147c75c383a975c58c33f8e7ee7dbbb0e7390fbcb1ecd321f63e4c73efd/mistralai-1.5.0-py3-none-any.whl", hash = "sha256:9372537719f87bd6f9feef4747d0bf1f4fbe971f8c02945ca4b4bf3c94571c97", size = 271559 }, ] +[[package]] +name = "mmh3" +version = "5.1.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/47/1b/1fc6888c74cbd8abad1292dde2ddfcf8fc059e114c97dd6bf16d12f36293/mmh3-5.1.0.tar.gz", hash = "sha256:136e1e670500f177f49ec106a4ebf0adf20d18d96990cc36ea492c651d2b406c", size = 33728 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/a1/01/9d06468928661765c0fc248a29580c760a4a53a9c6c52cf72528bae3582e/mmh3-5.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:eaf4ac5c6ee18ca9232238364d7f2a213278ae5ca97897cafaa123fcc7bb8bec", size = 56095 }, + { url = "https://files.pythonhosted.org/packages/e4/d7/7b39307fc9db867b2a9a20c58b0de33b778dd6c55e116af8ea031f1433ba/mmh3-5.1.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:48f9aa8ccb9ad1d577a16104834ac44ff640d8de8c0caed09a2300df7ce8460a", size = 40512 }, + { url = "https://files.pythonhosted.org/packages/4f/85/728ca68280d8ccc60c113ad119df70ff1748fbd44c89911fed0501faf0b8/mmh3-5.1.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:d4ba8cac21e1f2d4e436ce03a82a7f87cda80378691f760e9ea55045ec480a3d", size = 40110 }, + { url = "https://files.pythonhosted.org/packages/e4/96/beaf0e301472ffa00358bbbf771fe2d9c4d709a2fe30b1d929e569f8cbdf/mmh3-5.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d69281c281cb01994f054d862a6bb02a2e7acfe64917795c58934b0872b9ece4", size = 100151 }, + { url = "https://files.pythonhosted.org/packages/c3/ee/9381f825c4e09ffafeffa213c3865c4bf7d39771640de33ab16f6faeb854/mmh3-5.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4d05ed3962312fbda2a1589b97359d2467f677166952f6bd410d8c916a55febf", size = 106312 }, + { url = "https://files.pythonhosted.org/packages/67/dc/350a54bea5cf397d357534198ab8119cfd0d8e8bad623b520f9c290af985/mmh3-5.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:78ae6a03f4cff4aa92ddd690611168856f8c33a141bd3e5a1e0a85521dc21ea0", size = 104232 }, + { url = "https://files.pythonhosted.org/packages/b2/5d/2c6eb4a4ec2f7293b98a9c07cb8c64668330b46ff2b6511244339e69a7af/mmh3-5.1.0-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:95f983535b39795d9fb7336438faae117424c6798f763d67c6624f6caf2c4c01", size = 91663 }, + { url = "https://files.pythonhosted.org/packages/f1/ac/17030d24196f73ecbab8b5033591e5e0e2beca103181a843a135c78f4fee/mmh3-5.1.0-cp310-cp310-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d46fdd80d4c7ecadd9faa6181e92ccc6fe91c50991c9af0e371fdf8b8a7a6150", size = 99166 }, + { url = "https://files.pythonhosted.org/packages/b9/ed/54ddc56603561a10b33da9b12e95a48a271d126f4a4951841bbd13145ebf/mmh3-5.1.0-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:0f16e976af7365ea3b5c425124b2a7f0147eed97fdbb36d99857f173c8d8e096", size = 101555 }, + { url = "https://files.pythonhosted.org/packages/1c/c3/33fb3a940c9b70908a5cc9fcc26534aff8698180f9f63ab6b7cc74da8bcd/mmh3-5.1.0-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:6fa97f7d1e1f74ad1565127229d510f3fd65d931fdedd707c1e15100bc9e5ebb", size = 94813 }, + { url = "https://files.pythonhosted.org/packages/61/88/c9ff76a23abe34db8eee1a6fa4e449462a16c7eb547546fc5594b0860a72/mmh3-5.1.0-cp310-cp310-musllinux_1_2_ppc64le.whl", hash = "sha256:4052fa4a8561bd62648e9eb993c8f3af3bdedadf3d9687aa4770d10e3709a80c", size = 109611 }, + { url = "https://files.pythonhosted.org/packages/0b/8e/27d04f40e95554ebe782cac7bddda2d158cf3862387298c9c7b254fa7beb/mmh3-5.1.0-cp310-cp310-musllinux_1_2_s390x.whl", hash = "sha256:3f0e8ae9f961037f812afe3cce7da57abf734285961fffbeff9a4c011b737732", size = 100515 }, + { url = "https://files.pythonhosted.org/packages/7b/00/504ca8f462f01048f3c87cd93f2e1f60b93dac2f930cd4ed73532a9337f5/mmh3-5.1.0-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:99297f207db967814f1f02135bb7fe7628b9eacb046134a34e1015b26b06edce", size = 100177 }, + { url = "https://files.pythonhosted.org/packages/6f/1d/2efc3525fe6fdf8865972fcbb884bd1f4b0f923c19b80891cecf7e239fa5/mmh3-5.1.0-cp310-cp310-win32.whl", hash = "sha256:2e6c8dc3631a5e22007fbdb55e993b2dbce7985c14b25b572dd78403c2e79182", size = 40815 }, + { url = "https://files.pythonhosted.org/packages/38/b5/c8fbe707cb0fea77a6d2d58d497bc9b67aff80deb84d20feb34d8fdd8671/mmh3-5.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:e4e8c7ad5a4dddcfde35fd28ef96744c1ee0f9d9570108aa5f7e77cf9cfdf0bf", size = 41479 }, + { url = "https://files.pythonhosted.org/packages/a1/f1/663e16134f913fccfbcea5b300fb7dc1860d8f63dc71867b013eebc10aec/mmh3-5.1.0-cp310-cp310-win_arm64.whl", hash = "sha256:45da549269883208912868a07d0364e1418d8292c4259ca11699ba1b2475bd26", size = 38883 }, + { url = "https://files.pythonhosted.org/packages/56/09/fda7af7fe65928262098382e3bf55950cfbf67d30bf9e47731bf862161e9/mmh3-5.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:0b529dcda3f951ff363a51d5866bc6d63cf57f1e73e8961f864ae5010647079d", size = 56098 }, + { url = "https://files.pythonhosted.org/packages/0c/ab/84c7bc3f366d6f3bd8b5d9325a10c367685bc17c26dac4c068e2001a4671/mmh3-5.1.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:4db1079b3ace965e562cdfc95847312f9273eb2ad3ebea983435c8423e06acd7", size = 40513 }, + { url = "https://files.pythonhosted.org/packages/4f/21/25ea58ca4a652bdc83d1528bec31745cce35802381fb4fe3c097905462d2/mmh3-5.1.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:22d31e3a0ff89b8eb3b826d6fc8e19532998b2aa6b9143698043a1268da413e1", size = 40112 }, + { url = "https://files.pythonhosted.org/packages/bd/78/4f12f16ae074ddda6f06745254fdb50f8cf3c85b0bbf7eaca58bed84bf58/mmh3-5.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2139bfbd354cd6cb0afed51c4b504f29bcd687a3b1460b7e89498329cc28a894", size = 102632 }, + { url = "https://files.pythonhosted.org/packages/48/11/8f09dc999cf2a09b6138d8d7fc734efb7b7bfdd9adb9383380941caadff0/mmh3-5.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:8c8105c6a435bc2cd6ea2ef59558ab1a2976fd4a4437026f562856d08996673a", size = 108884 }, + { url = "https://files.pythonhosted.org/packages/bd/91/e59a66538a3364176f6c3f7620eee0ab195bfe26f89a95cbcc7a1fb04b28/mmh3-5.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:57730067174a7f36fcd6ce012fe359bd5510fdaa5fe067bc94ed03e65dafb769", size = 106835 }, + { url = "https://files.pythonhosted.org/packages/25/14/b85836e21ab90e5cddb85fe79c494ebd8f81d96a87a664c488cc9277668b/mmh3-5.1.0-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:bde80eb196d7fdc765a318604ded74a4378f02c5b46c17aa48a27d742edaded2", size = 93688 }, + { url = "https://files.pythonhosted.org/packages/ac/aa/8bc964067df9262740c95e4cde2d19f149f2224f426654e14199a9e47df6/mmh3-5.1.0-cp311-cp311-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e9c8eddcb441abddeb419c16c56fd74b3e2df9e57f7aa2903221996718435c7a", size = 101569 }, + { url = "https://files.pythonhosted.org/packages/70/b6/1fb163cbf919046a64717466c00edabebece3f95c013853fec76dbf2df92/mmh3-5.1.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:99e07e4acafbccc7a28c076a847fb060ffc1406036bc2005acb1b2af620e53c3", size = 98483 }, + { url = "https://files.pythonhosted.org/packages/70/49/ba64c050dd646060f835f1db6b2cd60a6485f3b0ea04976e7a29ace7312e/mmh3-5.1.0-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:9e25ba5b530e9a7d65f41a08d48f4b3fedc1e89c26486361166a5544aa4cad33", size = 96496 }, + { url = "https://files.pythonhosted.org/packages/9e/07/f2751d6a0b535bb865e1066e9c6b80852571ef8d61bce7eb44c18720fbfc/mmh3-5.1.0-cp311-cp311-musllinux_1_2_ppc64le.whl", hash = "sha256:bb9bf7475b4d99156ce2f0cf277c061a17560c8c10199c910a680869a278ddc7", size = 105109 }, + { url = "https://files.pythonhosted.org/packages/b7/02/30360a5a66f7abba44596d747cc1e6fb53136b168eaa335f63454ab7bb79/mmh3-5.1.0-cp311-cp311-musllinux_1_2_s390x.whl", hash = "sha256:2a1b0878dd281ea3003368ab53ff6f568e175f1b39f281df1da319e58a19c23a", size = 98231 }, + { url = "https://files.pythonhosted.org/packages/8c/60/8526b0c750ff4d7ae1266e68b795f14b97758a1d9fcc19f6ecabf9c55656/mmh3-5.1.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:25f565093ac8b8aefe0f61f8f95c9a9d11dd69e6a9e9832ff0d293511bc36258", size = 97548 }, + { url = "https://files.pythonhosted.org/packages/6d/4c/26e1222aca65769280d5427a1ce5875ef4213449718c8f03958d0bf91070/mmh3-5.1.0-cp311-cp311-win32.whl", hash = "sha256:1e3554d8792387eac73c99c6eaea0b3f884e7130eb67986e11c403e4f9b6d372", size = 40810 }, + { url = "https://files.pythonhosted.org/packages/98/d5/424ba95062d1212ea615dc8debc8d57983f2242d5e6b82e458b89a117a1e/mmh3-5.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:8ad777a48197882492af50bf3098085424993ce850bdda406a358b6ab74be759", size = 41476 }, + { url = "https://files.pythonhosted.org/packages/bd/08/0315ccaf087ba55bb19a6dd3b1e8acd491e74ce7f5f9c4aaa06a90d66441/mmh3-5.1.0-cp311-cp311-win_arm64.whl", hash = "sha256:f29dc4efd99bdd29fe85ed6c81915b17b2ef2cf853abf7213a48ac6fb3eaabe1", size = 38880 }, + { url = "https://files.pythonhosted.org/packages/f4/47/e5f452bdf16028bfd2edb4e2e35d0441e4a4740f30e68ccd4cfd2fb2c57e/mmh3-5.1.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:45712987367cb9235026e3cbf4334670522a97751abfd00b5bc8bfa022c3311d", size = 56152 }, + { url = "https://files.pythonhosted.org/packages/60/38/2132d537dc7a7fdd8d2e98df90186c7fcdbd3f14f95502a24ba443c92245/mmh3-5.1.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b1020735eb35086ab24affbea59bb9082f7f6a0ad517cb89f0fc14f16cea4dae", size = 40564 }, + { url = "https://files.pythonhosted.org/packages/c0/2a/c52cf000581bfb8d94794f58865658e7accf2fa2e90789269d4ae9560b16/mmh3-5.1.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:babf2a78ce5513d120c358722a2e3aa7762d6071cd10cede026f8b32452be322", size = 40104 }, + { url = "https://files.pythonhosted.org/packages/83/33/30d163ce538c54fc98258db5621447e3ab208d133cece5d2577cf913e708/mmh3-5.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:d4f47f58cd5cbef968c84a7c1ddc192fef0a36b48b0b8a3cb67354531aa33b00", size = 102634 }, + { url = "https://files.pythonhosted.org/packages/94/5c/5a18acb6ecc6852be2d215c3d811aa61d7e425ab6596be940877355d7f3e/mmh3-5.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:2044a601c113c981f2c1e14fa33adc9b826c9017034fe193e9eb49a6882dbb06", size = 108888 }, + { url = "https://files.pythonhosted.org/packages/1f/f6/11c556324c64a92aa12f28e221a727b6e082e426dc502e81f77056f6fc98/mmh3-5.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c94d999c9f2eb2da44d7c2826d3fbffdbbbbcde8488d353fee7c848ecc42b968", size = 106968 }, + { url = "https://files.pythonhosted.org/packages/5d/61/ca0c196a685aba7808a5c00246f17b988a9c4f55c594ee0a02c273e404f3/mmh3-5.1.0-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:a015dcb24fa0c7a78f88e9419ac74f5001c1ed6a92e70fd1803f74afb26a4c83", size = 93771 }, + { url = "https://files.pythonhosted.org/packages/b4/55/0927c33528710085ee77b808d85bbbafdb91a1db7c8eaa89cac16d6c513e/mmh3-5.1.0-cp312-cp312-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:457da019c491a2d20e2022c7d4ce723675e4c081d9efc3b4d8b9f28a5ea789bd", size = 101726 }, + { url = "https://files.pythonhosted.org/packages/49/39/a92c60329fa470f41c18614a93c6cd88821412a12ee78c71c3f77e1cfc2d/mmh3-5.1.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:71408579a570193a4ac9c77344d68ddefa440b00468a0b566dcc2ba282a9c559", size = 98523 }, + { url = "https://files.pythonhosted.org/packages/81/90/26adb15345af8d9cf433ae1b6adcf12e0a4cad1e692de4fa9f8e8536c5ae/mmh3-5.1.0-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:8b3a04bc214a6e16c81f02f855e285c6df274a2084787eeafaa45f2fbdef1b63", size = 96628 }, + { url = "https://files.pythonhosted.org/packages/8a/4d/340d1e340df972a13fd4ec84c787367f425371720a1044220869c82364e9/mmh3-5.1.0-cp312-cp312-musllinux_1_2_ppc64le.whl", hash = "sha256:832dae26a35514f6d3c1e267fa48e8de3c7b978afdafa0529c808ad72e13ada3", size = 105190 }, + { url = "https://files.pythonhosted.org/packages/d3/7c/65047d1cccd3782d809936db446430fc7758bda9def5b0979887e08302a2/mmh3-5.1.0-cp312-cp312-musllinux_1_2_s390x.whl", hash = "sha256:bf658a61fc92ef8a48945ebb1076ef4ad74269e353fffcb642dfa0890b13673b", size = 98439 }, + { url = "https://files.pythonhosted.org/packages/72/d2/3c259d43097c30f062050f7e861075099404e8886b5d4dd3cebf180d6e02/mmh3-5.1.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:3313577453582b03383731b66447cdcdd28a68f78df28f10d275d7d19010c1df", size = 97780 }, + { url = "https://files.pythonhosted.org/packages/29/29/831ea8d4abe96cdb3e28b79eab49cac7f04f9c6b6e36bfc686197ddba09d/mmh3-5.1.0-cp312-cp312-win32.whl", hash = "sha256:1d6508504c531ab86c4424b5a5ff07c1132d063863339cf92f6657ff7a580f76", size = 40835 }, + { url = "https://files.pythonhosted.org/packages/12/dd/7cbc30153b73f08eeac43804c1dbc770538a01979b4094edbe1a4b8eb551/mmh3-5.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:aa75981fcdf3f21759d94f2c81b6a6e04a49dfbcdad88b152ba49b8e20544776", size = 41509 }, + { url = "https://files.pythonhosted.org/packages/80/9d/627375bab4c90dd066093fc2c9a26b86f87e26d980dbf71667b44cbee3eb/mmh3-5.1.0-cp312-cp312-win_arm64.whl", hash = "sha256:a4c1a76808dfea47f7407a0b07aaff9087447ef6280716fd0783409b3088bb3c", size = 38888 }, +] + +[[package]] +name = "monotonic" +version = "1.6" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ea/ca/8e91948b782ddfbd194f323e7e7d9ba12e5877addf04fb2bf8fca38e86ac/monotonic-1.6.tar.gz", hash = "sha256:3a55207bcfed53ddd5c5bae174524062935efed17792e9de2ad0205ce9ad63f7", size = 7615 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/9a/67/7e8406a29b6c45be7af7740456f7f37025f0506ae2e05fb9009a53946860/monotonic-1.6-py2.py3-none-any.whl", hash = "sha256:68687e19a14f11f26d140dd5c86f3dba4bf5df58003000ed467e0e2a69bca96c", size = 8154 }, +] + [[package]] name = "more-itertools" version = "10.6.0" @@ -4306,7 +4547,6 @@ name = "nvidia-cublas-cu12" version = "12.4.5.8" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 }, { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 }, ] @@ -4315,7 +4555,6 @@ name = "nvidia-cuda-cupti-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 }, { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 }, ] @@ -4324,7 +4563,6 @@ name = "nvidia-cuda-nvrtc-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 }, { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 }, ] @@ -4333,7 +4571,6 @@ name = "nvidia-cuda-runtime-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 }, { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 }, ] @@ -4356,7 +4593,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 }, { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, ] @@ -4365,7 +4601,6 @@ name = "nvidia-curand-cu12" version = "10.3.5.147" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 }, { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 }, ] @@ -4379,7 +4614,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 }, { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, ] @@ -4391,7 +4625,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 }, { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 }, ] @@ -4408,7 +4641,6 @@ name = "nvidia-nvjitlink-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 }, { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 }, ] @@ -4417,10 +4649,18 @@ name = "nvidia-nvtx-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 }, { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 }, ] +[[package]] +name = "oauthlib" +version = "3.2.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/6d/fa/fbf4001037904031639e6bfbfc02badfc7e12f137a8afa254df6c4c8a670/oauthlib-3.2.2.tar.gz", hash = "sha256:9859c40929662bec5d64f34d01c99e093149682a3f38915dc0655d5a633dd918", size = 177352 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/7e/80/cab10959dc1faead58dc8384a781dfbf93cb4d33d50988f7a69f1b7c9bbe/oauthlib-3.2.2-py3-none-any.whl", hash = "sha256:8139f29aac13e25d502680e9e19963e83f16838d48a0d71c287fe40e7067fbca", size = 151688 }, +] + [[package]] name = "ollama" version = "0.4.7" @@ -4689,6 +4929,38 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/ff/b1/55a77152a83ec8998e520a3a575f44af1020cfe4bdc000b7538583293b85/opentelemetry_instrumentation-0.50b0-py3-none-any.whl", hash = "sha256:b8f9fc8812de36e1c6dffa5bfc6224df258841fb387b6dfe5df15099daa10630", size = 30728 }, ] +[[package]] +name = "opentelemetry-instrumentation-asgi" +version = "0.50b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "asgiref" }, + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/49/cc/a7b2fd243c6d2621803092eba62e450071b6752dfe4f64f530bbfd91a328/opentelemetry_instrumentation_asgi-0.50b0.tar.gz", hash = "sha256:3ca4cb5616ae6a3e8ce86e7d5c360a8d8cc8ed722cf3dc8a5e44300774e87d49", size = 24105 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/d2/81/0899c6b56b1023835f266d909250d439174afa0c34ed5944c5021d3da263/opentelemetry_instrumentation_asgi-0.50b0-py3-none-any.whl", hash = "sha256:2ba1297f746e55dec5a17fe825689da0613662fb25c004c3965a6c54b1d5be22", size = 16304 }, +] + +[[package]] +name = "opentelemetry-instrumentation-fastapi" +version = "0.50b0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "opentelemetry-api" }, + { name = "opentelemetry-instrumentation" }, + { name = "opentelemetry-instrumentation-asgi" }, + { name = "opentelemetry-semantic-conventions" }, + { name = "opentelemetry-util-http" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/8d/f8/1917b0b3e414e23c7d71c9a33f0ce020f94bc47d22a30f54ace704e07588/opentelemetry_instrumentation_fastapi-0.50b0.tar.gz", hash = "sha256:16b9181682136da210295def2bb304a32fb9bdee9a935cdc9da43567f7c1149e", size = 19214 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cb/d6/37784bb30b213e2dd6838b9f96c2940907022c1b75ef1ff18a99afe42433/opentelemetry_instrumentation_fastapi-0.50b0-py3-none-any.whl", hash = "sha256:8f03b738495e4705fbae51a2826389c7369629dace89d0f291c06ffefdff5e52", size = 12079 }, +] + [[package]] name = "opentelemetry-proto" version = "1.29.0" @@ -4728,6 +5000,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/da/fb/dc15fad105450a015e913cfa4f5c27b6a5f1bea8fb649f8cae11e699c8af/opentelemetry_semantic_conventions-0.50b0-py3-none-any.whl", hash = "sha256:e87efba8fdb67fb38113efea6a349531e75ed7ffc01562f65b802fcecb5e115e", size = 166602 }, ] +[[package]] +name = "opentelemetry-util-http" +version = "0.50b0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/69/10/ce3f0d1157cedbd819194f0b27a6bbb7c19a8bceb3941e4a4775014076cf/opentelemetry_util_http-0.50b0.tar.gz", hash = "sha256:dc4606027e1bc02aabb9533cc330dd43f874fca492e4175c31d7154f341754af", size = 7859 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/64/8a/9e1b54f50d1fddebbeac9a9b0632f8db6ece7add904fb593ee2e268ee4de/opentelemetry_util_http-0.50b0-py3-none-any.whl", hash = "sha256:21f8aedac861ffa3b850f8c0a6c373026189eb8630ac6e14a2bf8c55695cc090", size = 6942 }, +] + [[package]] name = "orjson" version = "3.10.15" @@ -5077,6 +5358,22 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/9b/fb/a70a4214956182e0d7a9099ab17d50bfcba1056188e9b14f35b9e2b62a0d/portalocker-2.10.1-py3-none-any.whl", hash = "sha256:53a5984ebc86a025552264b459b46a2086e269b21823cb572f8f28ee759e45bf", size = 18423 }, ] +[[package]] +name = "posthog" +version = "3.11.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "backoff" }, + { name = "monotonic" }, + { name = "python-dateutil" }, + { name = "requests" }, + { name = "six" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/a1/f9/ffb682dfcfe43ff38c501791b8b4c01ba25f772c5d16bdb8c0f992f099fd/posthog-3.11.0.tar.gz", hash = "sha256:42a1f88cbcddeceaf6e8900a528db62d84fc56f6e5809f3d6dfb40e6f743091e", size = 61344 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e6/21/a7975b832603fed31930860108e12f7680ad829d74ce05eab2df1a17ae2d/posthog-3.11.0-py2.py3-none-any.whl", hash = "sha256:8cbd52c26bcdfbe65c4ea84a8090cfa2e046879d6b6d71da68e279a5b4aedb46", size = 72005 }, +] + [[package]] name = "pot" version = "0.9.5" @@ -5580,6 +5877,21 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/3e/6e/9aa158121eb5a6af5537af0bde9e38092a97c40a5a0ecaec7cc9688b2c2e/pypdf-5.2.0-py3-none-any.whl", hash = "sha256:d107962ec45e65e3bd10c1d9242bdbbedaa38193c9e3a6617bd6d996e5747b19", size = 298686 }, ] +[[package]] +name = "pypika" +version = "0.48.9" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/c7/2c/94ed7b91db81d61d7096ac8f2d325ec562fc75e35f3baea8749c85b28784/PyPika-0.48.9.tar.gz", hash = "sha256:838836a61747e7c8380cd1b7ff638694b7a7335345d0f559b04b2cd832ad5378", size = 67259 } + +[[package]] +name = "pyproject-hooks" +version = "1.2.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/e7/82/28175b2414effca1cdac8dc99f76d660e7a4fb0ceefa4b4ab8f5f6742925/pyproject_hooks-1.2.0.tar.gz", hash = "sha256:1e859bd5c40fae9448642dd871adf459e5e2084186e8d2c2a79a824c970da1f8", size = 19228 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/bd/24/12818598c362d7f300f18e74db45963dbcb85150324092410c8b49405e42/pyproject_hooks-1.2.0-py3-none-any.whl", hash = "sha256:9e5c6bfa8dcc30091c74b0cf803c81fdd29d94f01992a7707bc97babb1141913", size = 10216 }, +] + [[package]] name = "pyreadline3" version = "3.5.4" @@ -5980,6 +6292,19 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/d7/25/dd878a121fcfdf38f52850f11c512e13ec87c2ea72385933818e5b6c15ce/requests_file-2.1.0-py2.py3-none-any.whl", hash = "sha256:cf270de5a4c5874e84599fc5778303d496c10ae5e870bfa378818f35d21bda5c", size = 4244 }, ] +[[package]] +name = "requests-oauthlib" +version = "2.0.0" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "oauthlib" }, + { name = "requests" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/42/f2/05f29bc3913aea15eb670be136045bf5c5bbf4b99ecb839da9b422bb2c85/requests-oauthlib-2.0.0.tar.gz", hash = "sha256:b3dffaebd884d8cd778494369603a9e7b58d29111bf6b41bdc2dcd87203af4e9", size = 55650 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/3b/5d/63d4ae3b9daea098d5d6f5da83984853c1bbacd5dc826764b249fe119d24/requests_oauthlib-2.0.0-py2.py3-none-any.whl", hash = "sha256:7dd8a5c40426b779b0868c404bdef9768deccf22749cde15852df527e6269b36", size = 24179 }, +] + [[package]] name = "requests-toolbelt" version = "1.0.0" From 6a0485173757a882fc4872d9d1cb17b1099bb513 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Tue, 4 Feb 2025 17:37:50 -0800 Subject: [PATCH 89/93] Add line to autogenstudio section of uv.lock version = "0.4.0" --- python/uv.lock | 1 + 1 file changed, 1 insertion(+) diff --git a/python/uv.lock b/python/uv.lock index 7841a3367d9e..3b1d3b36928b 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -765,6 +765,7 @@ requires-dist = [ [[package]] name = "autogenstudio" +version = "0.4.0" source = { editable = "packages/autogen-studio" } dependencies = [ { name = "aiofiles" }, From ad514eb2043939211ff9098a428f718bbd2989c0 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 5 Feb 2025 14:23:19 -0800 Subject: [PATCH 90/93] poe check fixes --- .../agentic_memory/_agentic_memory_bank.py | 4 ++-- .../autogen_ext/agentic_memory/_prompter.py | 5 +++-- .../autogen_ext/agentic_memory/apprentice.py | 5 +++-- .../src/autogen_ext/agentic_memory/grader.py | 18 +++++++++++------- .../autogen_ext/agentic_memory/page_logger.py | 6 +++--- 5 files changed, 22 insertions(+), 16 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index fdd6505470be..37802edcb47c 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -53,7 +53,7 @@ def __init__(self, settings: Dict[str, Any], reset: bool, logger: PageLogger) -> self.string_map = StringSimilarityMap(reset=reset, path_to_db_dir=path_to_db_dir, logger=self.logger) # Load or create the associated insight dict on disk. - self.uid_insight_dict = {} + self.uid_insight_dict: Dict[str, Insight] = {} self.last_insight_id = 0 if (not reset) and os.path.exists(self.path_to_dict): self.logger.info("\nLOADING INSIGHTS FROM DISK {}".format(self.path_to_dict)) @@ -80,7 +80,7 @@ def _reset_insights(self) -> None: """ Forces immediate deletion of the insights, in memory and on disk. """ - self.uid_insight_dict: Dict[str, Insight] = {} + self.uid_insight_dict = {} self.save_insights() def save_insights(self) -> None: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index 2a67c4ff2a36..556a3b2e6fc9 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -57,6 +57,7 @@ async def call_model( # Prepare the input message list if system_message_content is None: system_message_content = self.default_system_message_content + system_message: LLMMessage if self.client.model_info["family"] == "o1": # No system message allowed, so pass it as the first user message. system_message = UserMessage(content=system_message_content, source="User") @@ -96,7 +97,7 @@ async def call_model( # Return the response as a string for now return response_string - def _clear_history(self): + def _clear_history(self) -> None: """ Empties the message list containing the chat history. """ @@ -104,7 +105,7 @@ def _clear_history(self): async def learn_from_failure( self, task_description: str, memory_section: str, final_response: str, expected_answer: str, work_history: str - ): + ) -> str: """ Tries to create an insight to help avoid the given failure in the future. """ diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py index 66463286a8e9..d62078e88811 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py @@ -148,12 +148,13 @@ async def _assign_task_to_simple_agent(self, task: str) -> Tuple[Any, Any]: random_str = "({})\n\n".format(self.rand.randint(0, 1000000)) system_message_content = random_str + system_message_content + system_message: LLMMessage if self.client.model_info["family"] == "o1": # No system message allowed, so pass it as the first user message. - system_message: LLMMessage = UserMessage(content=system_message_content, source="User") + system_message = UserMessage(content=system_message_content, source="User") else: # System message allowed. - system_message: LLMMessage = SystemMessage(content=system_message_content) + system_message = SystemMessage(content=system_message_content) user_message: LLMMessage = UserMessage(content=task, source="User") system_message_list: List[LLMMessage] = [system_message] diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py index f4e46be5931e..a045ee9afd98 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py @@ -1,7 +1,8 @@ from __future__ import annotations -from typing import TYPE_CHECKING, List, Tuple +from typing import TYPE_CHECKING, List, Tuple, Union +from autogen_core import Image from autogen_core.models import ( AssistantMessage, ChatCompletionClient, @@ -89,6 +90,7 @@ async def call_model( # Prepare the input message list if system_message_content is None: system_message_content = "You are a helpful assistant." + system_message: LLMMessage if self.client.model_info["family"] == "o1": # No system message allowed, so pass it as the first user message. system_message = UserMessage(content=system_message_content, source="User") @@ -134,27 +136,29 @@ async def is_response_correct( sys_message = """You are a helpful and thoughtful assistant.""" # Ask the model to extract the answer from the response. - user_message: UserContent = [ - """Your job is to extract a possible answer to the following question from the given text. + user_message: List[Union[str, Image]] = [] + user_message.append("""Your job is to extract a possible answer to the following question from the given text. - First review the following task. - Then review the text that follows, which may an answer, plus reasoning that led to the answer. - Do not attempt to actually solve the task yourself. - Don't try to judge whether the reasoning steps were correct. - Simply respond by summarizing the answer described in the text, omitting any other parts of the text. -- If no answer is present can be extracted from the text, simply reply "None".""" - ] +- If no answer is present can be extracted from the text, simply reply "None".""") user_message.append("\n# Task description") user_message.append(task_description) user_message.append("\n# Text that may contain an answer") user_message.append(response_to_be_graded) + user_message_arg: UserContent = user_message self._clear_history() extracted_answer = await self.call_model( - summary="Ask the model to extract the answer", system_message_content=sys_message, user_content=user_message + summary="Ask the model to extract the answer", + system_message_content=sys_message, + user_content=user_message_arg, ) self.logger.info("Extracted answer: " + extracted_answer) # Ask the model to check the answer for correctness. - user_message: UserContent = [ + user_message = [ """Your job is to decide whether a given answer to a task is correct or not. - You will be given the task description and the correct, gold-standard answer, along with the answer to be graded. - In general, an answer is correct if it is equivalent to the correct answer. diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py index ecc73d227afe..a8e4e7c68165 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py @@ -2,7 +2,7 @@ import json import os import shutil -from typing import Any, Dict, List, Optional, Sequence, Union +from typing import Any, Dict, List, Optional, Sequence from autogen_agentchat.base import TaskResult from autogen_agentchat.messages import AgentEvent, ChatMessage @@ -208,7 +208,7 @@ def _format_message_content(self, message_content: MessageContent) -> str: Formats the message content for logging. """ # Start by converting the message content to a list of strings. - content_list: List[Union[MessageContent, None]] = [] + content_list: List[str] = [] content = message_content if isinstance(content, str): content_list.append(content) @@ -475,7 +475,7 @@ class PageStack: write_stack_to_page: Logs a properly indented string displaying the current call stack """ - def __init__(self): + def __init__(self) -> None: self.stack: List[Page] = [] def push(self, page: Page) -> None: From 18ae4dcaef9fa0ded04ae0f99d98a8b452ee4f09 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Wed, 5 Feb 2025 15:27:36 -0800 Subject: [PATCH 91/93] uv --- python/uv.lock | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/python/uv.lock b/python/uv.lock index 87c5320f0343..b6c8dda7f020 100644 --- a/python/uv.lock +++ b/python/uv.lock @@ -4306,7 +4306,6 @@ name = "nvidia-cublas-cu12" version = "12.4.5.8" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/7f/7f/7fbae15a3982dc9595e49ce0f19332423b260045d0a6afe93cdbe2f1f624/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0f8aa1706812e00b9f19dfe0cdb3999b092ccb8ca168c0db5b8ea712456fd9b3", size = 363333771 }, { url = "https://files.pythonhosted.org/packages/ae/71/1c91302526c45ab494c23f61c7a84aa568b8c1f9d196efa5993957faf906/nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl", hash = "sha256:2fc8da60df463fdefa81e323eef2e36489e1c94335b5358bcb38360adf75ac9b", size = 363438805 }, ] @@ -4315,7 +4314,6 @@ name = "nvidia-cuda-cupti-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/93/b5/9fb3d00386d3361b03874246190dfec7b206fd74e6e287b26a8fcb359d95/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:79279b35cf6f91da114182a5ce1864997fd52294a87a16179ce275773799458a", size = 12354556 }, { url = "https://files.pythonhosted.org/packages/67/42/f4f60238e8194a3106d06a058d494b18e006c10bb2b915655bd9f6ea4cb1/nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:9dec60f5ac126f7bb551c055072b69d85392b13311fcc1bcda2202d172df30fb", size = 13813957 }, ] @@ -4324,7 +4322,6 @@ name = "nvidia-cuda-nvrtc-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/77/aa/083b01c427e963ad0b314040565ea396f914349914c298556484f799e61b/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:0eedf14185e04b76aa05b1fea04133e59f465b6f960c0cbf4e37c3cb6b0ea198", size = 24133372 }, { url = "https://files.pythonhosted.org/packages/2c/14/91ae57cd4db3f9ef7aa99f4019cfa8d54cb4caa7e00975df6467e9725a9f/nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a178759ebb095827bd30ef56598ec182b85547f1508941a3d560eb7ea1fbf338", size = 24640306 }, ] @@ -4333,7 +4330,6 @@ name = "nvidia-cuda-runtime-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/a1/aa/b656d755f474e2084971e9a297def515938d56b466ab39624012070cb773/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:961fe0e2e716a2a1d967aab7caee97512f71767f852f67432d572e36cb3a11f3", size = 894177 }, { url = "https://files.pythonhosted.org/packages/ea/27/1795d86fe88ef397885f2e580ac37628ed058a92ed2c39dc8eac3adf0619/nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:64403288fa2136ee8e467cdc9c9427e0434110899d07c779f25b5c068934faa5", size = 883737 }, ] @@ -4356,7 +4352,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/7a/8a/0e728f749baca3fbeffad762738276e5df60851958be7783af121a7221e7/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_aarch64.whl", hash = "sha256:5dad8008fc7f92f5ddfa2101430917ce2ffacd86824914c82e28990ad7f00399", size = 211422548 }, { url = "https://files.pythonhosted.org/packages/27/94/3266821f65b92b3138631e9c8e7fe1fb513804ac934485a8d05776e1dd43/nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl", hash = "sha256:f083fc24912aa410be21fa16d157fed2055dab1cc4b6934a0e03cba69eb242b9", size = 211459117 }, ] @@ -4365,7 +4360,6 @@ name = "nvidia-curand-cu12" version = "10.3.5.147" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/80/9c/a79180e4d70995fdf030c6946991d0171555c6edf95c265c6b2bf7011112/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_aarch64.whl", hash = "sha256:1f173f09e3e3c76ab084aba0de819c49e56614feae5c12f69883f4ae9bb5fad9", size = 56314811 }, { url = "https://files.pythonhosted.org/packages/8a/6d/44ad094874c6f1b9c654f8ed939590bdc408349f137f9b98a3a23ccec411/nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl", hash = "sha256:a88f583d4e0bb643c49743469964103aa59f7f708d862c3ddb0fc07f851e3b8b", size = 56305206 }, ] @@ -4379,7 +4373,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/46/6b/a5c33cf16af09166845345275c34ad2190944bcc6026797a39f8e0a282e0/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_aarch64.whl", hash = "sha256:d338f155f174f90724bbde3758b7ac375a70ce8e706d70b018dd3375545fc84e", size = 127634111 }, { url = "https://files.pythonhosted.org/packages/3a/e1/5b9089a4b2a4790dfdea8b3a006052cfecff58139d5a4e34cb1a51df8d6f/nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl", hash = "sha256:19e33fa442bcfd085b3086c4ebf7e8debc07cfe01e11513cc6d332fd918ac260", size = 127936057 }, ] @@ -4391,7 +4384,6 @@ dependencies = [ { name = "nvidia-nvjitlink-cu12", marker = "(platform_machine != 'aarch64' and sys_platform == 'linux') or (sys_platform != 'darwin' and sys_platform != 'linux')" }, ] wheels = [ - { url = "https://files.pythonhosted.org/packages/96/a9/c0d2f83a53d40a4a41be14cea6a0bf9e668ffcf8b004bd65633f433050c0/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_aarch64.whl", hash = "sha256:9d32f62896231ebe0480efd8a7f702e143c98cfaa0e8a76df3386c1ba2b54df3", size = 207381987 }, { url = "https://files.pythonhosted.org/packages/db/f7/97a9ea26ed4bbbfc2d470994b8b4f338ef663be97b8f677519ac195e113d/nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl", hash = "sha256:ea4f11a2904e2a8dc4b1833cc1b5181cde564edd0d5cd33e3c168eff2d1863f1", size = 207454763 }, ] @@ -4408,7 +4400,6 @@ name = "nvidia-nvjitlink-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/02/45/239d52c05074898a80a900f49b1615d81c07fceadd5ad6c4f86a987c0bc4/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:4abe7fef64914ccfa909bc2ba39739670ecc9e820c83ccc7a6ed414122599b83", size = 20552510 }, { url = "https://files.pythonhosted.org/packages/ff/ff/847841bacfbefc97a00036e0fce5a0f086b640756dc38caea5e1bb002655/nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:06b3b9b25bf3f8af351d664978ca26a16d2c5127dbd53c0497e28d1fb9611d57", size = 21066810 }, ] @@ -4417,7 +4408,6 @@ name = "nvidia-nvtx-cu12" version = "12.4.127" source = { registry = "https://pypi.org/simple" } wheels = [ - { url = "https://files.pythonhosted.org/packages/06/39/471f581edbb7804b39e8063d92fc8305bdc7a80ae5c07dbe6ea5c50d14a5/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7959ad635db13edf4fc65c06a6e9f9e55fc2f92596db928d169c0bb031e88ef3", size = 100417 }, { url = "https://files.pythonhosted.org/packages/87/20/199b8713428322a2f22b722c62b8cc278cc53dffa9705d744484b5035ee9/nvidia_nvtx_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl", hash = "sha256:781e950d9b9f60d8241ccea575b32f5105a5baf4c2351cab5256a24869f12a1a", size = 99144 }, ] From 0e01720c320086c404d02b5585931b0defaedf58 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 6 Feb 2025 12:08:39 -0800 Subject: [PATCH 92/93] hash output for detecting log changes --- .../agentic_memory/_agentic_memory_bank.py | 2 +- .../src/autogen_ext/agentic_memory/_utils.py | 32 ++++++++++++++++++- .../autogen_ext/agentic_memory/page_logger.py | 13 +++++++- 3 files changed, 44 insertions(+), 3 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index 37802edcb47c..0f41e6908d3a 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -34,7 +34,7 @@ class AgenticMemoryBank: - contains_insights: Returns True if the memory bank contains any insights. - add_insight: Adds an insight to the memory bank, given topics related to the insight, and optionally the task. - add_task_with_solution: Adds a task-insight pair to the memory bank, to be retrieved together later. - - get_relevant_insights: Returns any insights from the memory bank that appear sufficiently relevant to the given + - get_relevant_insights: Returns any insights from the memory bank that appear sufficiently relevant to the given task topics. """ def __init__(self, settings: Dict[str, Any], reset: bool, logger: PageLogger) -> None: diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py index 606e434c8f6d..f1c9aed6f41c 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_utils.py @@ -1,4 +1,6 @@ -from typing import List, Union +import hashlib +import os +from typing import List, Tuple, Union from autogen_core import FunctionCall, Image from autogen_core.models import FunctionExecutionResult @@ -64,3 +66,31 @@ def single_image_from_user_content(user_content: UserContent) -> Union[Image, No else: raise AssertionError("Unexpected response type.") return image_to_return + + +def hash_directory(directory: str, hash_algo: str = "sha256") -> Tuple[str, int, int]: + """Computes a hash representing the state of a directory, including its structure and file contents.""" + hash_func = hashlib.new(hash_algo) + + # Also count the number of files and sub-directories + num_files = 0 + num_subdirs = 0 + + for root, dirs, files in sorted(os.walk(directory)): # Ensure order for consistent hashing + num_files += len(files) + num_subdirs += len(dirs) + for dir_name in sorted(dirs): + hash_func.update(dir_name.encode()) # Hash directory names + + for file_name in sorted(files): + file_path = os.path.join(root, file_name) + hash_func.update(file_name.encode()) # Hash file names + + try: + with open(file_path, "rb") as f: + while chunk := f.read(4096): # Read in chunks + hash_func.update(chunk) + except Exception: + pass + + return hash_func.hexdigest(), num_files, num_subdirs diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py index a8e4e7c68165..5203f4f71312 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py @@ -17,7 +17,7 @@ UserMessage, ) -from ._utils import MessageContent +from ._utils import MessageContent, hash_directory def html_opening(file_title: str, finished: bool = False) -> str: @@ -111,6 +111,17 @@ def __init__(self, settings: Dict[str, Any]) -> None: self._create_run_dir() self.flush() + def __del__(self) -> None: + # Writes a hash of the log directory to a file for change detection. + hash_str, num_files, num_subdirs = hash_directory(self.log_dir) + filename = "00 hash-{}.txt".format(hash_str[-5:]) + hash_path = os.path.join(self.log_dir, filename) + with open(hash_path, "w") as f: + f.write(hash_str) + f.write("\n") + f.write("{} files\n".format(num_files)) + f.write("{} subdirectories\n".format(num_subdirs)) + def _get_next_page_id(self) -> int: """Returns the next page id and increments the counter.""" self.last_page_id += 1 From 0bc0500ac73558955f9ded4688d657cff7c0c831 Mon Sep 17 00:00:00 2001 From: Ricky Loynd Date: Thu, 6 Feb 2025 18:03:23 -0800 Subject: [PATCH 93/93] Make logger and config args optional --- .../agentic_memory/_agentic_memory_bank.py | 41 ++++++++++++--- .../autogen_ext/agentic_memory/_prompter.py | 9 ++-- .../agentic_memory/_string_similarity_map.py | 6 ++- .../agentic_memory_controller.py | 52 +++++++++++++------ .../autogen_ext/agentic_memory/apprentice.py | 39 +++++++++++--- .../src/autogen_ext/agentic_memory/grader.py | 13 ++--- .../autogen_ext/agentic_memory/page_logger.py | 25 +++++++-- .../eval_learning_from_demonstration.py | 4 +- .../agentic_memory/eval_self_teaching.py | 4 +- .../agentic_memory/eval_teachability.py | 2 +- 10 files changed, 140 insertions(+), 55 deletions(-) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py index 0f41e6908d3a..9c4c511ba57b 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_agentic_memory_bank.py @@ -24,9 +24,13 @@ class AgenticMemoryBank: Stores task-completion insights in a vector DB for later retrieval. Args: - - settings: Settings for the memory bank. - reset: True to clear the DB before starting. - - logger: The PageLogger object to use for logging. + - config: An optional dict that can be used to override the following values: + - path: The path to the directory where the memory bank files are stored. + - relevance_conversion_threshold: The threshold used to normalize relevance. + - n_results: The maximum number of most relevant results to return for any given topic. + - distance_threshold: The maximum topic-insight distance for an insight to be retrieved. + - logger: An optional logger. If None, no logging will be performed. Methods: - reset: Forces immediate deletion of all contents, in memory and on disk. @@ -37,15 +41,36 @@ class AgenticMemoryBank: - get_relevant_insights: Returns any insights from the memory bank that appear sufficiently relevant to the given task topics. """ - def __init__(self, settings: Dict[str, Any], reset: bool, logger: PageLogger) -> None: - self.settings = settings + def __init__( + self, + reset: bool, + config: Dict[str, Any] | None = None, + logger: PageLogger | None = None, + ) -> None: + if logger is None: + logger = PageLogger() # Nothing will be logged by this object. self.logger = logger self.logger.enter_function() - memory_dir_path = os.path.expanduser(self.settings["path"]) - self.relevance_conversion_threshold = self.settings["relevance_conversion_threshold"] - self.n_results = self.settings["n_results"] - self.distance_threshold = self.settings["distance_threshold"] + # Assign default values that can be overridden by config. + memory_dir_path = os.path.expanduser("~/agentic_memory/temp") + self.relevance_conversion_threshold = 1.7 + self.n_results = 25 + self.distance_threshold = 100 + + if config is not None: + # Apply any overrides from the config. + for key in config: + if key == "path": + memory_dir_path = os.path.expanduser(config[key]) + elif key == "relevance_conversion_threshold": + self.relevance_conversion_threshold = config[key] + elif key == "n_results": + self.n_results = config[key] + elif key == "distance_threshold": + self.distance_threshold = config[key] + else: + self.logger.error('Unexpected item in config: ["{}"] = {}'.format(key, config[key])) path_to_db_dir = os.path.join(memory_dir_path, "string_map") self.path_to_dict = os.path.join(memory_dir_path, "uid_insight_dict.pkl") diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py index 556a3b2e6fc9..033f8b318956 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_prompter.py @@ -21,7 +21,7 @@ class Prompter: Args: client: The client to call the model. - logger: The logger to log the model calls. + logger: An optional logger. If None, no logging will be performed. Methods: call_model: Calls the model client with the given input and returns the response. @@ -33,9 +33,12 @@ class Prompter: extract_advice: Returns advice from the given text, or None if not found. """ - def __init__(self, client: ChatCompletionClient, logger: PageLogger): - self.client = client + def __init__(self, client: ChatCompletionClient, logger: PageLogger | None = None) -> None: + if logger is None: + logger = PageLogger() # Nothing will be logged by this object. self.logger = logger + + self.client = client self.default_system_message_content = "You are a helpful assistant." self.time_spent_in_model_calls = 0.0 self.num_model_calls = 0 diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py index 1444a043039a..7562e586d28f 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/_string_similarity_map.py @@ -22,7 +22,7 @@ class StringSimilarityMap: Args: - reset: True to clear the DB immediately after creation. - path_to_db_dir: Path to the directory where the DB is stored. - - logger: The PageLogger object to use for logging. + - logger: An optional logger. If None, no logging will be performed. Methods: - add_input_output_pair: Adds one input-output string pair to the DB. @@ -31,7 +31,9 @@ class StringSimilarityMap: - save_string_pairs: Saves the string-pair dict to disk. """ - def __init__(self, reset: bool, path_to_db_dir: str, logger: PageLogger) -> None: + def __init__(self, reset: bool, path_to_db_dir: str, logger: PageLogger | None = None) -> None: + if logger is None: + logger = PageLogger() # Nothing will be logged by this object. self.logger = logger self.path_to_db_dir = path_to_db_dir diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py index e11155596cdf..872199f2b6dd 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/agentic_memory_controller.py @@ -15,11 +15,14 @@ class AgenticMemoryController: Manages memory-based learning, testing, and the flow of information to and from the memory bank. Args: - settings: Settings for the memory controller. reset: True to clear the memory bank before starting. client: The client to call the model. task_assignment_callback: The callback to assign a task to the agent. - logger: The logger to log the model calls. + - config: An optional dict that can be used to override the following values: + - max_train_trials: The maximum number of trials to attempt when training on a task. + - max_test_trials: The maximum number of trials to attempt when testing on a task. + - AgenticMemoryBank: A config dict passed to AgenticMemoryBank. + logger: An optional logger. If None, a default logger will be created. Methods: reset_memory: Resets the memory bank. @@ -34,19 +37,38 @@ class AgenticMemoryController: def __init__( self, - settings: Dict[str, Any], reset: bool, client: ChatCompletionClient, task_assignment_callback: Callable[[str], Awaitable[Tuple[str, str]]], - logger: PageLogger, + config: Dict[str, Any] | None = None, + logger: PageLogger | None = None, ) -> None: + if logger is None: + logger = PageLogger({"level": "INFO"}) self.logger = logger self.logger.enter_function() - self.settings = settings + + # Assign default values that can be overridden by config. + self.max_train_trials = 10 + self.max_test_trials = 3 + agentic_memory_bank_config = None + + if config is not None: + # Apply any overrides from the config. + for key in config: + if key == "max_train_trials": + self.max_train_trials = config[key] + elif key == "max_test_trials": + self.max_test_trials = config[key] + elif key == "AgenticMemoryBank": + agentic_memory_bank_config = config[key] + else: + self.logger.error('Unexpected item in config: ["{}"] = {}'.format(key, config[key])) + self.client = client self.task_assignment_callback = task_assignment_callback self.prompter = Prompter(client, logger) - self.memory_bank = AgenticMemoryBank(self.settings["AgenticMemoryBank"], reset=reset, logger=logger) + self.memory_bank = AgenticMemoryBank(reset=reset, config=agentic_memory_bank_config, logger=logger) self.grader = Grader(client, logger) self.logger.leave_function() @@ -62,9 +84,7 @@ async def train_on_task(self, task: str, expected_answer: str) -> None: """ self.logger.enter_function() self.logger.info("Iterate on the task, possibly discovering a useful new insight.\n") - _, insight = await self._iterate_on_task( - task, expected_answer, self.settings["max_train_trials"], self.settings["max_test_trials"] - ) + _, insight = await self._iterate_on_task(task, expected_answer) if insight is None: self.logger.info("No useful insight was discovered.\n") else: @@ -219,7 +239,7 @@ def _format_memory_section(self, memories: List[str]) -> str: return memory_section async def _test_for_failure( - self, task: str, task_plus_insights: str, expected_answer: str, num_trials: int + self, task: str, task_plus_insights: str, expected_answer: str ) -> Tuple[bool, str, str]: """ Attempts to solve the given task multiple times to find a failure case to learn from. @@ -231,7 +251,7 @@ async def _test_for_failure( failure_found = False response, work_history = "", "" - for trial in range(num_trials): + for trial in range(self.max_test_trials): self.logger.info("\n----- TRIAL {} -----\n".format(trial + 1)) # Attempt to solve the task. @@ -252,9 +272,7 @@ async def _test_for_failure( self.logger.leave_function() return failure_found, response, work_history - async def _iterate_on_task( - self, task: str, expected_answer: str, max_train_trials: int, max_test_trials: int - ) -> Tuple[str, None | str]: + async def _iterate_on_task(self, task: str, expected_answer: str) -> Tuple[str, None | str]: """ Repeatedly assigns a task to the agent, and tries to learn from failures by creating useful insights as memories. """ @@ -270,7 +288,7 @@ async def _iterate_on_task( successful_insight = None # Loop until success (or timeout) while learning from failures. - for trial in range(1, max_train_trials + 1): + for trial in range(1, self.max_train_trials + 1): self.logger.info("\n----- TRAIN TRIAL {} -----\n".format(trial)) task_plus_insights = task @@ -284,7 +302,7 @@ async def _iterate_on_task( # Can we find a failure case to learn from? failure_found, response, work_history = await self._test_for_failure( - task, task_plus_insights, expected_answer, max_test_trials + task, task_plus_insights, expected_answer ) if not failure_found: # No. Time to exit the loop. @@ -299,7 +317,7 @@ async def _iterate_on_task( break # Will we try again? - if trial == max_train_trials: + if trial == self.max_train_trials: # No. We're out of training trials. self.logger.info("\nNo more trials will be attempted.\n") break diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py index d62078e88811..b23d1750fbfd 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/apprentice.py @@ -26,9 +26,12 @@ class Apprentice: and call the Agentic Memory Controller using this class as an example. Args: - settings: The settings for the apprentice. client: The client to call the model. - logger: The logger to log the model calls. + - config: An optional dict that can be used to override the following values: + - name_of_agent_or_team: The name of the target agent or team for assigning tasks to. + - disable_prefix_caching: True to disable prefix caching by prepending random ints to the first message. + - AgenticMemoryController: A config dict passed to AgenticMemoryController. + logger: An optional logger. If None, a default logger will be created. Methods: reset_memory: Resets the memory bank. @@ -38,22 +41,44 @@ class Apprentice: train_on_task: Repeatedly assigns a task to the completion agent, and tries to learn from failures by creating useful insights as memories. """ - def __init__(self, settings: Dict[str, Any], client: ChatCompletionClient, logger: PageLogger) -> None: - self.client = client + def __init__( + self, + client: ChatCompletionClient, + config: Dict[str, Any] | None = None, + logger: PageLogger | None = None, + ) -> None: + if logger is None: + logger = PageLogger({"level": "INFO"}) self.logger = logger - self.name_of_agent_or_team = settings["name_of_agent_or_team"] - self.disable_prefix_caching = settings["disable_prefix_caching"] + # Assign default values that can be overridden by config. + self.name_of_agent_or_team = "SimpleAgent" + self.disable_prefix_caching = False + agentic_memory_controller_config = None + + if config is not None: + # Apply any overrides from the config. + for key in config: + if key == "name_of_agent_or_team": + self.name_of_agent_or_team = config[key] + elif key == "disable_prefix_caching": + self.disable_prefix_caching = config[key] + elif key == "AgenticMemoryController": + agentic_memory_controller_config = config[key] + else: + self.logger.error('Unexpected item in config: ["{}"] = {}'.format(key, config[key])) + + self.client = client if self.disable_prefix_caching: self.rand = random.Random() self.rand.seed(int(time.time() * 1000)) # Create the AgenticMemoryController, which creates the AgenticMemoryBank. self.memory_controller = AgenticMemoryController( - settings=settings["AgenticMemoryController"], reset=True, client=self.client, task_assignment_callback=self.assign_task_to_agent_or_team, + config=agentic_memory_controller_config, logger=self.logger, ) diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py index a045ee9afd98..569c82b9a8a0 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/grader.py @@ -25,7 +25,7 @@ class Grader: Args: client: The client to call the model. - logger: The logger to log the model calls. + logger: An optional logger. If None, no logging will be performed. Methods: test_apprentice: Tests the apprentice on the given task. @@ -33,9 +33,11 @@ class Grader: is_response_correct: Determines whether the response is equivalent to the task's correct answer. """ - def __init__(self, client: ChatCompletionClient, logger: PageLogger): - self.client = client + def __init__(self, client: ChatCompletionClient, logger: PageLogger | None = None) -> None: + if logger is None: + logger = PageLogger() # Nothing will be logged by this object. self.logger = logger + self.client = client # Check whether to report results to the client. self.report_results = hasattr(self.client, "report_result") @@ -51,9 +53,8 @@ async def test_apprentice( num_trials: int, use_memory: bool, client: ChatCompletionClient, - logger: PageLogger, ) -> Tuple[int, int]: - logger.enter_function() + self.logger.enter_function() self.logger.info("Testing the apprentice on the given task.\n") @@ -74,7 +75,7 @@ async def test_apprentice( self.logger.info("Answer is INCORRECT.\n") self.logger.info("\nSuccess rate: {}%\n".format(round((num_successes / num_trials) * 100))) - logger.leave_function() + self.logger.leave_function() return num_successes, num_trials async def call_model( diff --git a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py index 5203f4f71312..269512009c51 100644 --- a/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py +++ b/python/packages/autogen-ext/src/autogen_ext/agentic_memory/page_logger.py @@ -71,9 +71,9 @@ class PageLogger: Logs text and images to a set of HTML pages, one per function/method, linked to each other in a call tree. Args: - settings: A dictionary containing the following keys: + - config: An optional dict that can be used to override the following values: - level: The logging level, one of DEBUG, INFO, WARNING, ERROR, CRITICAL, or NONE. - - path: The path to the directory where the log files will be saved. + - path: The path to the directory where the log files will be written. Methods: debug: Adds DEBUG text to the current page if debugging level <= DEBUG. @@ -91,7 +91,7 @@ class PageLogger: leave_function: Finishes the page corresponding to the current function call. """ - def __init__(self, settings: Dict[str, Any]) -> None: + def __init__(self, config: Dict[str, Any] | None = None) -> None: self.levels = { "DEBUG": 10, "INFO": 20, @@ -100,10 +100,25 @@ def __init__(self, settings: Dict[str, Any]) -> None: "CRITICAL": 50, "NONE": 100, } - self.level = self.levels[settings["level"]] + + # Assign default values that can be overridden by config. + self.level = self.levels["NONE"] # Default to no logging at all. + self.log_dir = os.path.expanduser("~/pagelogs/temp") + + if config is not None: + # Apply any overrides from the config. + for key in config: + if key == "level": + self.level = self.levels[config[key]] + elif key == "path": + self.log_dir = os.path.expanduser(config[key]) + else: + raise ValueError(f"Unknown key in PageLogger config: {key}") + + # If the log level is set to NONE or higher, don't log anything. if self.level >= self.levels["NONE"]: return - self.log_dir = os.path.expanduser(settings["path"]) + self.page_stack = PageStack() self.pages: List[Page] = [] self.last_page_id = 0 diff --git a/python/samples/agentic_memory/eval_learning_from_demonstration.py b/python/samples/agentic_memory/eval_learning_from_demonstration.py index f5d85e0ff76b..afe9ad6eb18b 100644 --- a/python/samples/agentic_memory/eval_learning_from_demonstration.py +++ b/python/samples/agentic_memory/eval_learning_from_demonstration.py @@ -37,7 +37,6 @@ async def eval_learning_from_demonstration( num_trials=num_trials, use_memory=True, client=client, - logger=logger, ) success_rate = round((num_successes / num_trials) * 100) results_str_1 = "Success rate before demonstration: {}%".format(success_rate) @@ -56,7 +55,6 @@ async def eval_learning_from_demonstration( num_trials=num_trials, use_memory=True, client=client, - logger=logger, ) success_rate = round((num_successes / num_trials) * 100) results_str_2 = "Success rate after demonstration: {}%".format(success_rate) @@ -75,7 +73,7 @@ async def run_example(settings_filepath: str) -> None: # Create the necessary components. logger = PageLogger(settings["PageLogger"]) client = create_oai_client(settings["client"]) - apprentice = Apprentice(settings["Apprentice"], client, logger) + apprentice = Apprentice(client, settings["Apprentice"], logger) # Call the example function. results = await eval_learning_from_demonstration(apprentice, client, logger, settings["test"]) diff --git a/python/samples/agentic_memory/eval_self_teaching.py b/python/samples/agentic_memory/eval_self_teaching.py index a3e76fd81d5c..639b1491bd67 100644 --- a/python/samples/agentic_memory/eval_self_teaching.py +++ b/python/samples/agentic_memory/eval_self_teaching.py @@ -50,7 +50,6 @@ async def eval_self_teaching( num_trials=num_final_test_trials, use_memory=True, client=client, - logger=logger, ) logger.info("Task 1 success rate: {}%".format(round((num_successes / num_trials) * 100))) total_num_successes_1 += num_successes @@ -63,7 +62,6 @@ async def eval_self_teaching( num_trials=num_final_test_trials, use_memory=True, client=client, - logger=logger, ) logger.info("Task 2 success rate: {}%".format(round((num_successes / num_trials) * 100))) total_num_successes_2 += num_successes @@ -92,7 +90,7 @@ async def run_example(settings_filepath: str) -> None: # Create the necessary components. logger = PageLogger(settings["PageLogger"]) client = create_oai_client(settings["client"]) - apprentice = Apprentice(settings["Apprentice"], client, logger) + apprentice = Apprentice(client, settings["Apprentice"], logger) # Call the example function. results = await eval_self_teaching(apprentice, client, logger, settings["test"]) diff --git a/python/samples/agentic_memory/eval_teachability.py b/python/samples/agentic_memory/eval_teachability.py index 6973f5d54db0..13bda50892f8 100644 --- a/python/samples/agentic_memory/eval_teachability.py +++ b/python/samples/agentic_memory/eval_teachability.py @@ -75,7 +75,7 @@ async def run_example(settings_filepath: str) -> None: # Create the necessary components. logger = PageLogger(settings["PageLogger"]) client = create_oai_client(settings["client"]) - apprentice = Apprentice(settings["Apprentice"], client, logger) + apprentice = Apprentice(client, settings["Apprentice"], logger) # Call the example function. results = await eval_teachability(apprentice, client, logger, settings["test"])