diff --git a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb index 4447838b6fa..28d3a0a057d 100644 --- a/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb +++ b/python/packages/autogen-core/docs/src/user-guide/core-user-guide/components/model-clients.ipynb @@ -184,15 +184,10 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Comparing usage returns in the above Non Streaming `model_client.create(messages=messages)` vs streaming `model_client.create_stream(messages=messages)` we see differences.\n", - "The non streaming response by default returns valid prompt and completion token usage counts. \n", - "The streamed response by default returns zero values.\n", + "Comparing usage returns in the above non-streaming `model_client.create(messages=messages)` to streaming `model_client.create_stream(messages=messages)`, we see differences. The non-streaming response by default returns a valid prompt and completion token usage counts. The streamed response by default returns zero values.\n", "\n", - "as documented in the OPENAI API Reference an additional parameter `stream_options` can be specified to return valid usage counts. see [stream_options](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options)\n", + "As documented in the OpenAI API Reference, an additional parameter `stream_options` can be specified to return valid usage counts. See [stream_options](https://platform.openai.com/docs/api-reference/chat/create#chat-create-stream_options). Only set this when using streaming, i.e. when using `create_stream`. To enable this, set `extra_create_args={\"stream_options\": {\"include_usage\": True}},` when calling `create_stream`. Depending on which completion client is being used, the maximum empty chunks allowed may need to be adjusted, e.g. `max_consecutive_empty_chunk_tolerance=2`, to account for the trailing empty message containing usage information.\n", "\n", - "Only set this when you using streaming ie , using `create_stream` \n", - "\n", - "to enable this in `create_stream` set `extra_create_args={\"stream_options\": {\"include_usage\": True}},`\n", "\n", "```{note}\n", "Note whilst other API's like LiteLLM also support this, it is not always guarenteed that it is fully supported or correct.\n", diff --git a/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py b/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py index 79c13442c7d..95b2b1bc421 100644 --- a/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py +++ b/python/packages/autogen-ext/src/autogen_ext/models/openai/_openai_client.py @@ -1008,6 +1008,8 @@ class OpenAIChatCompletionClient(BaseOpenAIChatCompletionClient, Component[OpenA client = ChatCompletionClient.load_component(config) + Note: When usage information is requested (see `documentation `_.) with the `create_stream` method, `max_consecutive_empty_chunk_tolerance` should be increased to permit the trailing empty chunk carrying the usage information. E.g. `completion_client.create_stream(... , max_consecutive_empty_chunk_tolerance=2, extra_create_args={"stream_options": {"include_usage": True}})`. + To view the full list of available configuration options, see the :py:class:`OpenAIClientConfigurationConfigModel` class. """ @@ -1117,7 +1119,7 @@ class AzureOpenAIChatCompletionClient( # api_key="sk-...", # For key-based authentication. `AZURE_OPENAI_API_KEY` environment variable can also be used instead. ) - To load the client that uses identity based aith from a configuration, you can use the `load_component` method: + To load the client that uses identity based auth from a configuration, you can use the `load_component` method: .. code-block:: python @@ -1142,7 +1144,8 @@ class AzureOpenAIChatCompletionClient( client = ChatCompletionClient.load_component(config) - + Note: When usage information is requested (see `documentation `_.) with the `create_stream` method, `max_consecutive_empty_chunk_tolerance` should be increased to permit the trailing empty chunk carrying the usage information. E.g. `completion_client.create_stream(... , max_consecutive_empty_chunk_tolerance=2, extra_create_args={"stream_options": {"include_usage": True}})`. + To view the full list of available configuration options, see the :py:class:`AzureOpenAIClientConfigurationConfigModel` class.