diff --git a/python/packages/autogen-core/docs/src/reference/index.md b/python/packages/autogen-core/docs/src/reference/index.md
index f9dc5c1a19b1..ffa2581ba96d 100644
--- a/python/packages/autogen-core/docs/src/reference/index.md
+++ b/python/packages/autogen-core/docs/src/reference/index.md
@@ -52,6 +52,7 @@ python/autogen_ext.models.openai
 python/autogen_ext.models.replay
 python/autogen_ext.models.azure
 python/autogen_ext.models.semantic_kernel
+python/autogen_ext.models.ollama
 python/autogen_ext.tools.code_execution
 python/autogen_ext.tools.graphrag
 python/autogen_ext.tools.http
diff --git a/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.ollama.rst b/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.ollama.rst
new file mode 100644
index 000000000000..67076e797251
--- /dev/null
+++ b/python/packages/autogen-core/docs/src/reference/python/autogen_ext.models.ollama.rst
@@ -0,0 +1,8 @@
+autogen\_ext.models.ollama
+==========================
+
+
+.. automodule:: autogen_ext.models.ollama
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py b/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py
index a3df82b3b0c6..4ab1af045dd7 100644
--- a/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py
+++ b/python/packages/autogen-ext/src/autogen_ext/models/ollama/_ollama_client.py
@@ -402,6 +402,7 @@ async def create(
                 response_format_value = value.model_json_schema()
             else:
                 # response_format_value is not a Pydantic model class
+                # TODO: Should this be an warning/error?
                 response_format_value = None
 
         # Remove 'response_format' from create_args to prevent passing it twice
@@ -842,15 +843,16 @@ def model_info(self) -> ModelInfo:
 class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseOllamaClientConfigurationConfigModel]):
     """Chat completion client for Ollama hosted models.
 
-    You can also use this client for Ollama-compatible ChatCompletion endpoints.
-
     Ollama must be installed and the appropriate model pulled.
 
     Args:
         model (str): Which Ollama model to use.
-        host (str): Model host url.
-        response_format (optional, pydantic.BaseModel)
+        host (optional, str): Model host url.
+        response_format (optional, pydantic.BaseModel): The format of the response. If provided, the response will be parsed into this format as json.
+        model_info (optional, ModelInfo): The capabilities of the model. **Required if the model is not listed in the ollama model info.**
 
+    Note:
+        Only models with 200k+ downloads (as of Jan 21, 2025), + phi4, deepseek-r1 have pre-defined model infos. See `this file <https://github.com/microsoft/autogen/blob/main/python/packages/autogen-ext/src/autogen_ext/models/ollama/_model_info.py>`__ for the full list. An entry for one model encompases all parameter variants of that model.
 
     To use this client, you must install the `ollama` extension:
 
@@ -886,7 +888,11 @@ class OllamaChatCompletionClient(BaseOllamaChatCompletionClient, Component[BaseO
         client = ChatCompletionClient.load_component(config)
 
     To output structured data, you can use the `response_format` argument:
+
     .. code-block:: python
+
+        from autogen_ext.models.ollama import OllamaChatCompletionClient
+        from autogen_core.models import UserMessage
         from pydantic import BaseModel
 
 
@@ -902,7 +908,8 @@ class StructuredOutput(BaseModel):
         result = await ollama_client.create([UserMessage(content="Who was the first man on the moon?", source="user")])  # type: ignore
         print(result)
 
-    Note: Tool usage in ollama is stricter than in its OpenAI counterparts. While OpenAI accepts a map of [str, Any], Ollama requires a map of [str, Property] where Property is a typed object containing ``type`` and ``description`` fields. Therefore, only the keys ``type`` and ``description`` will be converted from the properties blob in the tool schema.
+    Note:
+        Tool usage in ollama is stricter than in its OpenAI counterparts. While OpenAI accepts a map of [str, Any], Ollama requires a map of [str, Property] where Property is a typed object containing ``type`` and ``description`` fields. Therefore, only the keys ``type`` and ``description`` will be converted from the properties blob in the tool schema.
 
     To view the full list of available configuration options, see the :py:class:`OllamaClientConfigurationConfigModel` class.
 
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py
index 7bc43b395b37..d35dc601c049 100644
--- a/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py
+++ b/python/packages/autogen-ext/src/autogen_ext/models/ollama/config/__init__.py
@@ -5,9 +5,12 @@
 from typing_extensions import TypedDict
 
 
+# response_format MUST be a pydantic.BaseModel type or None
+# TODO: check if we can extend response_format to support json and/or dict
 class CreateArguments(TypedDict, total=False):
     model: str
     host: Optional[str]
+    response_format: Any
 
 
 class BaseOllamaClientConfiguration(CreateArguments, total=False):
@@ -20,9 +23,11 @@ class BaseOllamaClientConfiguration(CreateArguments, total=False):
 
 
 # Pydantic equivalents of the above TypedDicts
+# response_format MUST be a pydantic.BaseModel type or None
 class CreateArgumentsConfigModel(BaseModel):
     model: str
     host: str | None = None
+    response_format: Any = None
 
 
 class BaseOllamaClientConfigurationConfigModel(CreateArgumentsConfigModel):