From 403844ef2b5c0b09ad0cebf4d60c94d896f458fa Mon Sep 17 00:00:00 2001 From: Eric Zhu Date: Wed, 29 Jan 2025 16:37:18 -0800 Subject: [PATCH] feat: add Semantic Kernel Adapter documentation and usage examples in user guides (#5256) Partially address #5205 and #5226 --- .../tutorial/models.ipynb | 95 +++++++ .../components/model-clients.ipynb | 95 +++++++ .../_sk_chat_completion_adapter.py | 247 ++++++++++-------- 3 files changed, 330 insertions(+), 107 deletions(-) diff --git a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/models.ipynb b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/models.ipynb index 94a31db3cee..0f9778d144f 100644 --- a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/models.ipynb +++ b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/models.ipynb @@ -327,6 +327,101 @@ "response = await model_client.create([UserMessage(content=\"What is the capital of France?\", source=\"user\")])\n", "print(response)" ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Semantic Kernel Adapter\n", + "\n", + "The {py:class}`~autogen_ext.models.semantic_kernel.SKChatCompletionAdapter`\n", + "allows you to use Semantic kernel model clients as a\n", + "{py:class}`~autogen_core.models.ChatCompletionClient` by adapting them to the required interface.\n", + "\n", + "You need to install the relevant provider extras to use this adapter. \n", + "\n", + "The list of extras that can be installed:\n", + "\n", + "- `semantic-kernel-anthropic`: Install this extra to use Anthropic models.\n", + "- `semantic-kernel-google`: Install this extra to use Google Gemini models.\n", + "- `semantic-kernel-ollama`: Install this extra to use Ollama models.\n", + "- `semantic-kernel-mistralai`: Install this extra to use MistralAI models.\n", + "- `semantic-kernel-aws`: Install this extra to use AWS models.\n", + "- `semantic-kernel-hugging-face`: Install this extra to use Hugging Face models.\n", + "\n", + "For example, to use Anthropic models, you need to install `semantic-kernel-anthropic`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# pip install \"autogen-ext[semantic-kernel-anthropic]\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use this adapter, you need create a Semantic Kernel model client and pass it to the adapter.\n", + "\n", + "For example, to use the Anthropic model:" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "finish_reason='stop' content='The capital of France is Paris. It is also the largest city in France and one of the most populous metropolitan areas in Europe.' usage=RequestUsage(prompt_tokens=0, completion_tokens=0) cached=False logprobs=None\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "from autogen_core.models import UserMessage\n", + "from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter\n", + "from semantic_kernel import Kernel\n", + "from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings\n", + "from semantic_kernel.memory.null_memory import NullMemory\n", + "\n", + "sk_client = AnthropicChatCompletion(\n", + " ai_model_id=\"claude-3-5-sonnet-20241022\",\n", + " api_key=os.environ[\"ANTHROPIC_API_KEY\"],\n", + " service_id=\"my-service-id\", # Optional; for targeting specific services within Semantic Kernel\n", + ")\n", + "settings = AnthropicChatPromptExecutionSettings(\n", + " temperature=0.2,\n", + ")\n", + "\n", + "anthropic_model_client = SKChatCompletionAdapter(\n", + " sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=settings\n", + ")\n", + "\n", + "# Call the model directly.\n", + "model_result = await anthropic_model_client.create(\n", + " messages=[UserMessage(content=\"What is the capital of France?\", source=\"User\")]\n", + ")\n", + "print(model_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read more about the [Semantic Kernel Adapter](../../../reference/python/autogen_ext.models.semantic_kernel.rst)." + ] } ], "metadata": { diff --git a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb index bb275d2b33c..be7129712df 100644 --- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb +++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb @@ -336,6 +336,101 @@ "print(response)" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Semantic Kernel Adapter\n", + "\n", + "The {py:class}`~autogen_ext.models.semantic_kernel.SKChatCompletionAdapter`\n", + "allows you to use Semantic kernel model clients as a\n", + "{py:class}`~autogen_core.models.ChatCompletionClient` by adapting them to the required interface.\n", + "\n", + "You need to install the relevant provider extras to use this adapter. \n", + "\n", + "The list of extras that can be installed:\n", + "\n", + "- `semantic-kernel-anthropic`: Install this extra to use Anthropic models.\n", + "- `semantic-kernel-google`: Install this extra to use Google Gemini models.\n", + "- `semantic-kernel-ollama`: Install this extra to use Ollama models.\n", + "- `semantic-kernel-mistralai`: Install this extra to use MistralAI models.\n", + "- `semantic-kernel-aws`: Install this extra to use AWS models.\n", + "- `semantic-kernel-hugging-face`: Install this extra to use Hugging Face models.\n", + "\n", + "For example, to use Anthropic models, you need to install `semantic-kernel-anthropic`." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "vscode": { + "languageId": "shellscript" + } + }, + "outputs": [], + "source": [ + "# pip install \"autogen-ext[semantic-kernel-anthropic]\"" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "To use this adapter, you need create a Semantic Kernel model client and pass it to the adapter.\n", + "\n", + "For example, to use the Anthropic model:" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "finish_reason='stop' content='The capital of France is Paris. It is also the largest city in France and one of the most populous metropolitan areas in Europe.' usage=RequestUsage(prompt_tokens=0, completion_tokens=0) cached=False logprobs=None\n" + ] + } + ], + "source": [ + "import os\n", + "\n", + "from autogen_core.models import UserMessage\n", + "from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter\n", + "from semantic_kernel import Kernel\n", + "from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings\n", + "from semantic_kernel.memory.null_memory import NullMemory\n", + "\n", + "sk_client = AnthropicChatCompletion(\n", + " ai_model_id=\"claude-3-5-sonnet-20241022\",\n", + " api_key=os.environ[\"ANTHROPIC_API_KEY\"],\n", + " service_id=\"my-service-id\", # Optional; for targeting specific services within Semantic Kernel\n", + ")\n", + "settings = AnthropicChatPromptExecutionSettings(\n", + " temperature=0.2,\n", + ")\n", + "\n", + "anthropic_model_client = SKChatCompletionAdapter(\n", + " sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=settings\n", + ")\n", + "\n", + "# Call the model directly.\n", + "model_result = await anthropic_model_client.create(\n", + " messages=[UserMessage(content=\"What is the capital of France?\", source=\"User\")]\n", + ")\n", + "print(model_result)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Read more about the [Semantic Kernel Adapter](../../../reference/python/autogen_ext.models.semantic_kernel.rst)." + ] + }, { "cell_type": "markdown", "metadata": {}, diff --git a/python/packages/autogen-ext/src/autogen_ext/models/semantic_kernel/_sk_chat_completion_adapter.py b/python/packages/autogen-ext/src/autogen_ext/models/semantic_kernel/_sk_chat_completion_adapter.py index 9f501fbbffc..fbc76b83627 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/semantic_kernel/_sk_chat_completion_adapter.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/semantic_kernel/_sk_chat_completion_adapter.py @@ -35,11 +35,20 @@ class SKChatCompletionAdapter(ChatCompletionClient): By leveraging this adapter, you can: - - Pass in a `Kernel` and any supported Semantic Kernel `ChatCompletionClientBase` connector. - - Provide tools (via Autogen `Tool` or `ToolSchema`) for function calls during chat completion. - - Stream responses or retrieve them in a single request. - - Provide prompt settings to control the chat completion behavior either globally through the constructor - or on a per-request basis through the `extra_create_args` dictionary. + - Pass in a `Kernel` and any supported Semantic Kernel `ChatCompletionClientBase` connector. + - Provide tools (via Autogen `Tool` or `ToolSchema`) for function calls during chat completion. + - Stream responses or retrieve them in a single request. + - Provide prompt settings to control the chat completion behavior either globally through the constructor + or on a per-request basis through the `extra_create_args` dictionary. + + The list of extras that can be installed: + + - `semantic-kernel-anthropic`: Install this extra to use Anthropic models. + - `semantic-kernel-google`: Install this extra to use Google Gemini models. + - `semantic-kernel-ollama`: Install this extra to use Ollama models. + - `semantic-kernel-mistralai`: Install this extra to use MistralAI models. + - `semantic-kernel-aws`: Install this extra to use AWS models. + - `semantic-kernel-hugging-face`: Install this extra to use Hugging Face models. Args: sk_client (ChatCompletionClientBase): @@ -54,124 +63,148 @@ class SKChatCompletionAdapter(ChatCompletionClient): service_id (Optional[str]): Optional service identifier. - Example usage: + Examples: - .. code-block:: python + Anthropic models: - import asyncio - from semantic_kernel import Kernel - from semantic_kernel.memory.null_memory import NullMemory - from semantic_kernel.connectors.ai.open_ai.services.azure_chat_completion import AzureChatCompletion - from semantic_kernel.connectors.ai.open_ai.prompt_execution_settings.azure_chat_prompt_execution_settings import ( - AzureChatPromptExecutionSettings, - ) - from semantic_kernel.connectors.ai.google.google_ai import GoogleAIChatCompletion - from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings - from autogen_core.models import SystemMessage, UserMessage, LLMMessage - from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter - from autogen_core import CancellationToken - from autogen_core.tools import BaseTool - from pydantic import BaseModel + .. code-block:: bash + pip install "autogen-ext[semantic-kernel-anthropic]" - # 1) Basic tool definition (for demonstration) - class CalculatorArgs(BaseModel): - a: float - b: float + .. code-block:: python + import asyncio + import os - class CalculatorResult(BaseModel): - result: float + from autogen_agentchat.agents import AssistantAgent + from autogen_core.models import UserMessage + from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter + from semantic_kernel import Kernel + from semantic_kernel.connectors.ai.anthropic import AnthropicChatCompletion, AnthropicChatPromptExecutionSettings + from semantic_kernel.memory.null_memory import NullMemory - class CalculatorTool(BaseTool[CalculatorArgs, CalculatorResult]): - def __init__(self) -> None: - super().__init__( - args_type=CalculatorArgs, - return_type=CalculatorResult, - name="calculator", - description="Add two numbers together", + async def main() -> None: + sk_client = AnthropicChatCompletion( + ai_model_id="claude-3-5-sonnet-20241022", + api_key=os.environ["ANTHROPIC_API_KEY"], + service_id="my-service-id", # Optional; for targeting specific services within Semantic Kernel + ) + settings = AnthropicChatPromptExecutionSettings( + temperature=0.2, ) - async def run(self, args: CalculatorArgs, cancellation_token: CancellationToken) -> CalculatorResult: - return CalculatorResult(result=args.a + args.b) - - - async def main(): - # 2) Create a Semantic Kernel instance (with null memory for simplicity) - kernel = Kernel(memory=NullMemory()) - - # ---------------------------------------------------------------- - # Example A: Azure OpenAI - # ---------------------------------------------------------------- - deployment_name = "" - endpoint = "" - api_key = "" - - azure_client = AzureChatCompletion(deployment_name=deployment_name, endpoint=endpoint, api_key=api_key) - azure_settings = AzureChatPromptExecutionSettings(temperature=0.8) - azure_adapter = SKChatCompletionAdapter(sk_client=azure_client, kernel=kernel, prompt_settings=azure_settings) - - # ---------------------------------------------------------------- - # Example B: Google Gemini - # ---------------------------------------------------------------- - google_api_key = "" - google_model = "gemini-1.5-flash" - google_client = GoogleAIChatCompletion(gemini_model_id=google_model, api_key=google_api_key) - google_adapter = SKChatCompletionAdapter(sk_client=google_client) - - # ---------------------------------------------------------------- - # Example C: Ollama (local Llama-based model) - # ---------------------------------------------------------------- - ollama_client = OllamaChatCompletion( - service_id="ollama", # custom ID - host="http://localhost:11434", - ai_model_id="llama3.1", - ) - request_settings = OllamaChatPromptExecutionSettings( - # For model specific settings, specify them in the options dictionary. - # For more information on the available options, refer to the Ollama API documentation: - # https://github.com/ollama/ollama/blob/main/docs/modelfile.md#valid-parameters-and-values - options={ - "temperature": 0.8, - }, - ) - ollama_adapter = SKChatCompletionAdapter(sk_client=ollama_client, prompt_settings=request_settings) - - # 3) Create a tool and register it with the kernel - calc_tool = CalculatorTool() - - # 4) Prepare messages for a chat completion - messages: list[LLMMessage] = [ - SystemMessage(content="You are a helpful assistant."), - UserMessage(content="What is 2 + 2?", source="user"), - ] - - # 5) Invoke chat completion with different adapters - # Azure example - azure_result = await azure_adapter.create( - messages=messages, - tools=[calc_tool], - ) - print("Azure result:", azure_result.content) + model_client = SKChatCompletionAdapter(sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=settings) - # Google example - google_result = await google_adapter.create( - messages=messages, - tools=[calc_tool], - ) - print("Google result:", google_result.content) + # Call the model directly. + model_result = await model_client.create( + messages=[UserMessage(content="What is the capital of France?", source="User")] + ) + print(model_result) - # Ollama example - ollama_result = await ollama_adapter.create( - messages=messages, - tools=[calc_tool], + # Create an assistant agent with the model client. + assistant = AssistantAgent("assistant", model_client=model_client) + # Call the assistant with a task. + result = await assistant.run(task="What is the capital of France?") + print(result) + + + asyncio.run(main()) + + Google Gemini models: + + .. code-block:: bash + + pip install "autogen-ext[semantic-kernel-google]" + + .. code-block:: python + + import asyncio + import os + + from autogen_agentchat.agents import AssistantAgent + from autogen_core.models import UserMessage + from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter + from semantic_kernel import Kernel + from semantic_kernel.connectors.ai.google.google_ai import ( + GoogleAIChatCompletion, + GoogleAIChatPromptExecutionSettings, ) - print("Ollama result:", ollama_result.content) + from semantic_kernel.memory.null_memory import NullMemory + + + async def main() -> None: + sk_client = GoogleAIChatCompletion( + gemini_model_id="gemini-1.5-flash", + api_key=os.environ["GEMINI_API_KEY"], + ) + settings = GoogleAIChatPromptExecutionSettings( + temperature=0.2, + ) + + model_client = SKChatCompletionAdapter(sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=settings) + + # Call the model directly. + model_result = await model_client.create( + messages=[UserMessage(content="What is the capital of France?", source="User")] + ) + print(model_result) + + # Create an assistant agent with the model client. + assistant = AssistantAgent("assistant", model_client=model_client) + # Call the assistant with a task. + result = await assistant.run(task="What is the capital of France?") + print(result) + + + asyncio.run(main()) + + Ollama models: + + .. code-block:: bash + + pip install "autogen-ext[semantic-kernel-ollama]" + + .. code-block:: python + + import asyncio + + from autogen_agentchat.agents import AssistantAgent + from autogen_core.models import UserMessage + from autogen_ext.models.semantic_kernel import SKChatCompletionAdapter + from semantic_kernel import Kernel + from semantic_kernel.connectors.ai.ollama import OllamaChatCompletion, OllamaChatPromptExecutionSettings + from semantic_kernel.memory.null_memory import NullMemory + + + async def main() -> None: + sk_client = OllamaChatCompletion( + host="http://localhost:11434", + ai_model_id="llama3.2:latest", + ) + ollama_settings = OllamaChatPromptExecutionSettings( + options={"temperature": 0.5}, + ) + + model_client = SKChatCompletionAdapter( + sk_client, kernel=Kernel(memory=NullMemory()), prompt_settings=ollama_settings + ) + + # Call the model directly. + model_result = await model_client.create( + messages=[UserMessage(content="What is the capital of France?", source="User")] + ) + print(model_result) + + # Create an assistant agent with the model client. + assistant = AssistantAgent("assistant", model_client=model_client) + # Call the assistant with a task. + result = await assistant.run(task="What is the capital of France?") + print(result) - if __name__ == "__main__": asyncio.run(main()) + """ def __init__(