diff --git a/python/packages/autogen-core/docs/src/reference/index.md b/python/packages/autogen-core/docs/src/reference/index.md index f9dc5c1a19b1..ffa2581ba96d 100644 --- a/python/packages/autogen-core/docs/src/reference/index.md +++ b/python/packages/autogen-core/docs/src/reference/index.md @@ -52,6 +52,7 @@ python/autogen_ext.models.openai python/autogen_ext.models.replay python/autogen_ext.models.azure python/autogen_ext.models.semantic_kernel +python/autogen_ext.models.ollama python/autogen_ext.tools.code_execution python/autogen_ext.tools.graphrag python/autogen_ext.tools.http diff --git a/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.ollama.rst b/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.ollama.rst new file mode 100644 index 000000000000..67076e797251 --- /dev/null +++ b/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.ollama.rst @@ -0,0 +1,8 @@ +autogen\_ext.models.ollama +========================== + + +.. automodule:: autogen_ext.models.ollama + :members: + :undoc-members: + :show-inheritance: diff --git a/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py b/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py index a3df82b3b0c6..4ab1af045dd7 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py @@ -402,6 +402,7 @@ async def create( response_format_value = value.model_json_schema() else: # response_format_value is not a Pydantic model class + # TODO: Should this be an warning/error? response_format_value = None # Remove 'response_format' from create_args to prevent passing it twice @@ -842,15 +843,16 @@ def model_info(self) -> ModelInfo: class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseOllamaClientConfigurationConfigModel]): """Chat completion client for Ollama hosted models. - You can also use this client for Ollama-compatible ChatCompletion endpoints. - Ollama must be installed and the appropriate model pulled. Args: model (str): Which Ollama model to use. - host (str): Model host url. - response_format (optional, pydantic.BaseModel) + host (optional, str): Model host url. + response_format (optional, pydantic.BaseModel): The format of the response. If provided, the response will be parsed into this format as json. + model_info (optional, ModelInfo): The capabilities of the model. **Required if the model is not listed in the ollama model info.** + Note: + Only models with 200k+ downloads (as of Jan 21, 2025), + phi4, deepseek-r1 have pre-defined model infos. See `this file `__ for the full list. An entry for one model encompases all parameter variants of that model. To use this client, you must install the `ollama` extension: @@ -886,7 +888,11 @@ class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseO client = ChatCompletionClient.load_component(config) To output structured data, you can use the `response_format` argument: + .. code-block:: python + + from autogen_ext.models.ollama import OllamaChatCompletionClient + from autogen_core.models import UserMessage from pydantic import BaseModel @@ -902,7 +908,8 @@ class StructuredOutput(BaseModel): result = await ollama_client.create([UserMessage(content="Who was the first man on the moon?", source="user")]) # type: ignore print(result) - Note: Tool usage in ollama is stricter than in its OpenAI counterparts. While OpenAI accepts a map of [str, Any], Ollama requires a map of [str, Property] where Property is a typed object containing ``type`` and ``description`` fields. Therefore, only the keys ``type`` and ``description`` will be converted from the properties blob in the tool schema. + Note: + Tool usage in ollama is stricter than in its OpenAI counterparts. While OpenAI accepts a map of [str, Any], Ollama requires a map of [str, Property] where Property is a typed object containing ``type`` and ``description`` fields. Therefore, only the keys ``type`` and ``description`` will be converted from the properties blob in the tool schema. To view the full list of available configuration options, see the :py:class:`OllamaClientConfigurationConfigModel` class. diff --git a/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py index 7bc43b395b37..d35dc601c049 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py @@ -5,9 +5,12 @@ from typing_extensions import TypedDict +# response_format MUST be a pydantic.BaseModel type or None +# TODO: check if we can extend response_format to support json and/or dict class CreateArguments(TypedDict, total=False): model: str host: Optional[str] + response_format: Any class BaseOllamaClientConfiguration(CreateArguments, total=False): @@ -20,9 +23,11 @@ class BaseOllamaClientConfiguration(CreateArguments, total=False): # Pydantic equivalents of the above TypedDicts +# response_format MUST be a pydantic.BaseModel type or None class CreateArgumentsConfigModel(BaseModel): model: str host: str | None = None + response_format: Any = None class BaseOllamaClientConfigurationConfigModel(CreateArgumentsConfigModel):