microsoft · ekzhu · Jan 16, 2025 · Jan 5, 2025 · Jan 8, 2025 · Jan 9, 2025
diff --git a/python/packages/autogen-core/docs/src/reference/index.md b/python/packages/autogen-core/docs/src/reference/index.md
@@ -48,6 +48,7 @@ python/autogen_ext.agents.video_surfer
 python/autogen_ext.agents.video_surfer.tools
 python/autogen_ext.auth.azure
 python/autogen_ext.teams.magentic_one
+python/autogen_ext.models.cache
 python/autogen_ext.models.openai
 python/autogen_ext.models.replay
 python/autogen_ext.tools.langchain
@@ -56,5 +57,7 @@ python/autogen_ext.tools.code_execution
 python/autogen_ext.code_executors.local
 python/autogen_ext.code_executors.docker
 python/autogen_ext.code_executors.azure
+python/autogen_ext.cache_store.diskcache
+python/autogen_ext.cache_store.redis
 python/autogen_ext.runtimes.grpc
 ```
diff --git a/...es/autogen-core/docs/src/reference/python/autogen_ext.cache_store.diskcache.rst b/...es/autogen-core/docs/src/reference/python/autogen_ext.cache_store.diskcache.rst
@@ -0,0 +1,8 @@
+autogen\_ext.cache_store.diskcache
+==================================
+
+
+.. automodule:: autogen_ext.cache_store.diskcache
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/...ckages/autogen-core/docs/src/reference/python/autogen_ext.cache_store.redis.rst b/...ckages/autogen-core/docs/src/reference/python/autogen_ext.cache_store.redis.rst
@@ -0,0 +1,8 @@
+autogen\_ext.cache_store.redis
+==============================
+
+
+.. automodule:: autogen_ext.cache_store.redis
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/...on/packages/autogen-core/docs/src/reference/python/autogen_ext.models.cache.rst b/...on/packages/autogen-core/docs/src/reference/python/autogen_ext.models.cache.rst
@@ -0,0 +1,8 @@
+autogen\_ext.models.cache
+=========================
+
+
+.. automodule:: autogen_ext.models.cache
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/...n/packages/autogen-core/docs/src/reference/python/autogen_ext.models.replay.rst b/...n/packages/autogen-core/docs/src/reference/python/autogen_ext.models.replay.rst
@@ -1,8 +1,8 @@
-autogen\_ext.models.replay
-==========================
-
-
-.. automodule:: autogen_ext.models.replay
-   :members:
-   :undoc-members:
-   :show-inheritance:
+autogen\_ext.models.replay
+==========================
+
+
+.. automodule:: autogen_ext.models.replay
+   :members:
+   :undoc-members:
+   :show-inheritance:
diff --git a/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/models.ipynb b/python/packages/autogen-core/docs/src/user-guide/agentchat-user-guide/tutorial/models.ipynb
@@ -6,7 +6,11 @@
             "source": [
                 "# Models\n",
                 "\n",
-                "In many cases, agents need access to LLM model services such as OpenAI, Azure OpenAI, or local models. Since there are many different providers with different APIs, `autogen-core` implements a protocol for [model clients](../../core-user-guide/framework/model-clients.ipynb) and `autogen-ext` implements a set of model clients for popular model services. AgentChat can use these model clients to interact with model services. "
+                "In many cases, agents need access to LLM model services such as OpenAI, Azure OpenAI, or local models. Since there are many different providers with different APIs, `autogen-core` implements a protocol for [model clients](../../core-user-guide/framework/model-clients.ipynb) and `autogen-ext` implements a set of model clients for popular model services. AgentChat can use these model clients to interact with model services. \n",
+                "\n",
+                "```{note}\n",
+                "See {py:class}`~autogen_ext.models.cache.ChatCompletionCache` for a caching wrapper to use with the following clients.\n",
+                "```"
             ]
         },
         {

diff --git a/...n/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb b/...n/packages/autogen-core/docs/src/user-guide/core-user-guide/framework/model-clients.ipynb
@@ -96,7 +96,13 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Default [Model Capabilities](../faqs.md#what-are-model-capabilities-and-how-do-i-specify-them) may be overridden should the need arise.\n",
+    "Default [Model Capabilities](../faqs.md#what-are-model-capabilities-and-how-do-i-specify-them) may be overridden should the need arise.\n"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
     "\n",
     "\n",
     "### Streaming Response\n",
@@ -315,6 +321,84 @@
     "```"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Caching Wrapper\n",
+    "\n",
+    "`autogen_ext` implements {py:class}`~autogen_ext.models.cache.ChatCompletionCache` that can wrap any {py:class}`~autogen_core.models.ChatCompletionClient`. Using this wrapper avoids incurring token usage when querying the underlying client with the same prompt multiple times.\n",
+    "\n",
+    "{py:class}`~autogen_core.models.ChatCompletionCache` uses a {py:class}`~autogen_core.CacheStore` protocol. We have implemented some useful variants of {py:class}`~autogen_core.CacheStore` including {py:class}`~autogen_ext.cache_store.diskcache.DiskCacheStore` and {py:class}`~autogen_ext.cache_store.redis.RedisStore`.\n",
+    "\n",
+    "Here's an example of using `diskcache` for local caching:"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# pip install -U \"autogen-ext[openai, diskcache]\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "True\n"
+     ]
+    }
+   ],
+   "source": [
+    "import asyncio\n",
+    "import tempfile\n",
+    "\n",
+    "from autogen_core.models import UserMessage\n",
+    "from autogen_ext.cache_store.diskcache import DiskCacheStore\n",
+    "from autogen_ext.models.cache import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache\n",
+    "from autogen_ext.models.openai import OpenAIChatCompletionClient\n",
+    "from diskcache import Cache\n",
+    "\n",
+    "\n",
+    "async def main() -> None:\n",
+    "    with tempfile.TemporaryDirectory() as tmpdirname:\n",
+    "        # Initialize the original client\n",
+    "        openai_model_client = OpenAIChatCompletionClient(model=\"gpt-4o\")\n",
+    "\n",
+    "        # Then initialize the CacheStore, in this case with diskcache.Cache.\n",
+    "        # You can also use redis like:\n",
+    "        # from autogen_ext.cache_store.redis import RedisStore\n",
+    "        # import redis\n",
+    "        # redis_instance = redis.Redis()\n",
+    "        # cache_store = RedisCacheStore[CHAT_CACHE_VALUE_TYPE](redis_instance)\n",
+    "        cache_store = DiskCacheStore[CHAT_CACHE_VALUE_TYPE](Cache(tmpdirname))\n",
+    "        cache_client = ChatCompletionCache(openai_model_client, cache_store)\n",
+    "\n",
+    "        response = await cache_client.create([UserMessage(content=\"Hello, how are you?\", source=\"user\")])\n",
+    "        print(response)  # Should print response from OpenAI\n",
+    "        response = await cache_client.create([UserMessage(content=\"Hello, how are you?\", source=\"user\")])\n",
+    "        print(response)  # Should print cached response\n",
+    "\n",
+    "\n",
+    "asyncio.run(main())"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Inspecting `cached_client.total_usage()` (or `model_client.total_usage()`) before and after a cached response should yield idential counts.\n",
+    "\n",
+    "Note that the caching is sensitive to the exact arguments provided to `cached_client.create` or `cached_client.create_stream`, so changing `tools` or `json_output` arguments might lead to a cache miss."
+   ]
+  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -615,7 +699,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.7"
+   "version": "3.12.1"
   }
  },
  "nbformat": 4,

diff --git a/python/packages/autogen-core/pyproject.toml b/python/packages/autogen-core/pyproject.toml
@@ -72,6 +72,8 @@ dev = [
     "autogen_ext==0.4.3",
 
     # Documentation tooling
+    "diskcache",
+    "redis",
     "sphinx-autobuild",
 ]
 

diff --git a/python/packages/autogen-core/src/autogen_core/__init__.py b/python/packages/autogen-core/src/autogen_core/__init__.py
@@ -10,6 +10,7 @@
 from ._agent_runtime import AgentRuntime
 from ._agent_type import AgentType
 from ._base_agent import BaseAgent
+from ._cache_store import CacheStore, InMemoryStore
 from ._cancellation_token import CancellationToken
 from ._closure_agent import ClosureAgent, ClosureContext
 from ._component_config import (
@@ -85,6 +86,8 @@
     "AgentMetadata",
     "AgentRuntime",
     "BaseAgent",
+    "CacheStore",
+    "InMemoryStore",
     "CancellationToken",
     "AgentInstantiationContext",
     "TopicId",

diff --git a/python/packages/autogen-core/src/autogen_core/_cache_store.py b/python/packages/autogen-core/src/autogen_core/_cache_store.py
@@ -0,0 +1,46 @@
+from typing import Dict, Generic, Optional, Protocol, TypeVar
+
+T = TypeVar("T")
+
+
+class CacheStore(Protocol, Generic[T]):
+    """
+    This protocol defines the basic interface for store/cache operations.
+
+    Sub-classes should handle the lifecycle of underlying storage.
+    """
+
+    def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
+        """
+        Retrieve an item from the store.
+
+        Args:
+            key: The key identifying the item in the store.
+            default (optional): The default value to return if the key is not found.
+                                Defaults to None.
+
+        Returns:
+            The value associated with the key if found, else the default value.
+        """
+        ...
+
+    def set(self, key: str, value: T) -> None:
+        """
+        Set an item in the store.
+
+        Args:
+            key: The key under which the item is to be stored.
+            value: The value to be stored in the store.
+        """
+        ...
+
+
+class InMemoryStore(CacheStore[T]):
+    def __init__(self) -> None:
+        self.store: Dict[str, T] = {}
+
+    def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
+        return self.store.get(key, default)
+
+    def set(self, key: str, value: T) -> None:
+        self.store[key] = value
diff --git a/python/packages/autogen-core/tests/test_cache_store.py b/python/packages/autogen-core/tests/test_cache_store.py
@@ -0,0 +1,48 @@
+from unittest.mock import Mock
+
+from autogen_core import CacheStore, InMemoryStore
+
+
+def test_set_and_get_object_key_value() -> None:
+    mock_store = Mock(spec=CacheStore)
+    test_key = "test_key"
+    test_value = object()
+    mock_store.set(test_key, test_value)
+    mock_store.get.return_value = test_value
+    mock_store.set.assert_called_with(test_key, test_value)
+    assert mock_store.get(test_key) == test_value
+
+
+def test_get_non_existent_key() -> None:
+    mock_store = Mock(spec=CacheStore)
+    key = "non_existent_key"
+    mock_store.get.return_value = None
+    assert mock_store.get(key) is None
+
+
+def test_set_overwrite_existing_key() -> None:
+    mock_store = Mock(spec=CacheStore)
+    key = "test_key"
+    initial_value = "initial_value"
+    new_value = "new_value"
+    mock_store.set(key, initial_value)
+    mock_store.set(key, new_value)
+    mock_store.get.return_value = new_value
+    mock_store.set.assert_called_with(key, new_value)
+    assert mock_store.get(key) == new_value
+
+
+def test_inmemory_store() -> None:
+    store = InMemoryStore[int]()
+    test_key = "test_key"
+    test_value = 42
+    store.set(test_key, test_value)
+    assert store.get(test_key) == test_value
+
+    new_value = 2
+    store.set(test_key, new_value)
+    assert store.get(test_key) == new_value
+
+    key = "non_existent_key"
+    default_value = 99
+    assert store.get(key, default_value) == default_value
diff --git a/python/packages/autogen-ext/pyproject.toml b/python/packages/autogen-ext/pyproject.toml
@@ -46,6 +46,12 @@ video-surfer = [
     "ffmpeg-python",
     "openai-whisper",
 ]
+diskcache = [
+    "diskcache>=5.6.3"
+]
+redis = [
+    "redis>=5.2.1"
+]
 
 grpc = [
     "grpcio~=1.62.0", # TODO: update this once we have a stable version.

diff --git a/python/packages/autogen-ext/src/autogen_ext/cache_store/__init__.py b/python/packages/autogen-ext/src/autogen_ext/cache_store/__init__.py
diff --git a/python/packages/autogen-ext/src/autogen_ext/cache_store/diskcache.py b/python/packages/autogen-ext/src/autogen_ext/cache_store/diskcache.py
@@ -0,0 +1,26 @@
+from typing import Any, Optional, TypeVar, cast
+
+import diskcache
+from autogen_core import CacheStore
+
+T = TypeVar("T")
+
+
+class DiskCacheStore(CacheStore[T]):
+    """
+    A typed CacheStore implementation that uses diskcache as the underlying storage.
+    See :class:`~autogen_ext.models.cache.ChatCompletionCache` for an example of usage.
+
+    Args:
+        cache_instance: An instance of diskcache.Cache.
+                        The user is responsible for managing the DiskCache instance's lifetime.
+    """
+
+    def __init__(self, cache_instance: diskcache.Cache):  # type: ignore[no-any-unimported]
+        self.cache = cache_instance
+
+    def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
+        return cast(Optional[T], self.cache.get(key, default))  # type: ignore[reportUnknownMemberType]
+
+    def set(self, key: str, value: T) -> None:
+        self.cache.set(key, cast(Any, value))  # type: ignore[reportUnknownMemberType]
diff --git a/python/packages/autogen-ext/src/autogen_ext/cache_store/redis.py b/python/packages/autogen-ext/src/autogen_ext/cache_store/redis.py
@@ -0,0 +1,29 @@
+from typing import Any, Optional, TypeVar, cast
+
+import redis
+from autogen_core import CacheStore
+
+T = TypeVar("T")
+
+
+class RedisStore(CacheStore[T]):
+    """
+    A typed CacheStore implementation that uses redis as the underlying storage.
+    See :class:`~autogen_ext.models.cache.ChatCompletionCache` for an example of usage.
+
+    Args:
+        cache_instance: An instance of `redis.Redis`.
+                        The user is responsible for managing the Redis instance's lifetime.
+    """
+
+    def __init__(self, redis_instance: redis.Redis):
+        self.cache = redis_instance
+
+    def get(self, key: str, default: Optional[T] = None) -> Optional[T]:
+        value = cast(Optional[T], self.cache.get(key))
+        if value is None:
+            return default
+        return value
+
+    def set(self, key: str, value: T) -> None:
+        self.cache.set(key, cast(Any, value))
diff --git a/python/packages/autogen-ext/src/autogen_ext/models/cache/__init__.py b/python/packages/autogen-ext/src/autogen_ext/models/cache/__init__.py
@@ -0,0 +1,6 @@
+from ._chat_completion_cache import CHAT_CACHE_VALUE_TYPE, ChatCompletionCache
+
+__all__ = [
+    "CHAT_CACHE_VALUE_TYPE",
+    "ChatCompletionCache",
+]