Use .iter() API to fully replace existing streaming implementation

dmontagu · dmontagu · commit 6586af307122 · 2025-02-25T00:02:09.000-08:00
diff --git a/docs/agents.md b/docs/agents.md
@@ -220,6 +220,181 @@ Once the run finishes, `agent_run.final_result` becomes a [`AgentRunResult`][pyd
 
 ---
 
+### Streaming
+
+Here is an example of streaming in combination with `async for`:
+
+```python {title="streaming.py"}
+import asyncio
+from dataclasses import dataclass
+from datetime import date
+
+from pydantic_ai import (
+    Agent,
+    capture_run_messages,
+)
+from pydantic_ai.agent import is_handle_response_node, is_model_request_node
+from pydantic_ai.messages import (
+    PartStartEvent,
+    PartDeltaEvent,
+    FunctionToolCallEvent,
+    FunctionToolResultEvent,
+    FinalResultEvent, TextPartDelta, ToolCallPartDelta,
+)
+from pydantic_ai.tools import RunContext
+from pydantic_graph import End
+
+
+@dataclass
+class WeatherService:
+    async def get_forecast(self, location: str, forecast_date: date) -> str:
+        # In real code: call weather API, DB queries, etc.
+        return f"The forecast in {location} on {forecast_date} is 24°C and sunny."
+
+    async def get_historic_weather(self, location: str, forecast_date: date) -> str:
+        # In real code: call a historical weather API or DB
+        return f"The weather in {location} on {forecast_date} was 18°C and partly cloudy."
+
+
+weather_agent = Agent[WeatherService, str](
+    "openai:gpt-4o",
+    deps_type=WeatherService,
+    result_type=str,  # We'll produce a final answer as plain text
+    system_prompt="Providing a weather forecast at the locations the user provides.",
+)
+
+
+@weather_agent.tool
+async def weather_forecast(
+    ctx: RunContext[WeatherService],
+    location: str,
+    forecast_date: date,
+) -> str:
+    if forecast_date >= date.today():
+        return await ctx.deps.get_forecast(location, forecast_date)
+    else:
+        return await ctx.deps.get_historic_weather(location, forecast_date)
+
+
+async def main():
+    # The user asks for tomorrow's weather in Paris
+    user_prompt = "What will the weather be like in Paris tomorrow?"
+
+    # We'll capture raw messages for debugging
+    with capture_run_messages() as messages:
+        # Provide a WeatherService instance as the agent's dependencies
+        deps = WeatherService()
+
+        # Begin a node-by-node, streaming iteration
+        with weather_agent.iter(user_prompt, deps=deps) as run:
+            node = run.next_node  # The first node to run
+            while not isinstance(node, End):
+                if is_model_request_node(node):
+                    # A model request node => We can stream tokens from the model's request
+                    print("=== ModelRequestNode: streaming partial request tokens ===")
+                    async with node.stream(run.ctx) as request_stream:
+                        async for event in request_stream:
+                            if isinstance(event, PartStartEvent):
+                                print(f"[Request] Starting part {event.index}: {event.part!r}")
+                            elif isinstance(event, PartDeltaEvent):
+                                if isinstance(event.delta, TextPartDelta):
+                                    print(f"[Request] Part {event.index} text delta: {event.delta.content_delta!r}")
+                                elif isinstance(event.delta, ToolCallPartDelta):
+                                    print(f"[Request] Part {event.index} args_delta={event.delta.args_delta}")
+                            elif isinstance(event, FinalResultEvent):
+                                print(f"[Result] The model produced a final result (tool_name={event.tool_name})")
+
+                elif is_handle_response_node(node):
+                    # A handle-response node => The model returned some data, potentially calls a tool
+                    print("=== HandleResponseNode: streaming partial response & tool usage ===")
+                    async with node.stream(run.ctx) as handle_stream:
+                        async for event in handle_stream:
+                            if isinstance(event, FunctionToolCallEvent):
+                                print(f"[Tools] The LLM calls tool={event.part.tool_name!r} with args={event.part.args} (tool_call_id={event.part.tool_call_id!r})")
+                            elif isinstance(event, FunctionToolResultEvent):
+                                print(f"[Tools] Tool call {event.tool_call_id!r} returned => {event.result.content}")
+
+                node = await run.next(node)
+
+            # Once an End node is reached, the agent run is complete
+            assert run.result is not None
+            print("\n=== Final Agent Output ===")
+            print("Forecast:", run.result.data)
+
+    # Show the raw messages exchanged
+    print("\n=== Raw Messages Captured ===")
+    for m in messages:
+        print(" -", m)
+
+
+if __name__ == "__main__":
+    asyncio.run(main())
+
+"""
+=== ModelRequestNode: streaming partial request tokens ===
+[Request] Starting part 0: ToolCallPart(tool_name='weather_forecast', args='', tool_call_id='call_Q0QqiZfIhHyNViiLG7jT0G9R', part_kind='tool-call')
+[Request] Part 0 args_delta={"
+[Request] Part 0 args_delta=location
+[Request] Part 0 args_delta=":"
+[Request] Part 0 args_delta=Paris
+[Request] Part 0 args_delta=","
+[Request] Part 0 args_delta=forecast
+[Request] Part 0 args_delta=_date
+[Request] Part 0 args_delta=":"
+[Request] Part 0 args_delta=202
+[Request] Part 0 args_delta=3
+[Request] Part 0 args_delta=-
+[Request] Part 0 args_delta=11
+[Request] Part 0 args_delta=-
+[Request] Part 0 args_delta=02
+[Request] Part 0 args_delta="}
+=== HandleResponseNode: streaming partial response & tool usage ===
+[Tools] The LLM calls tool='weather_forecast' with args={"location":"Paris","forecast_date":"2023-11-02"} (tool_call_id='call_Q0QqiZfIhHyNViiLG7jT0G9R')
+[Tools] Tool call 'call_Q0QqiZfIhHyNViiLG7jT0G9R' returned => The weather in Paris on 2023-11-02 was 18°C and partly cloudy.
+=== ModelRequestNode: streaming partial request tokens ===
+[Request] Starting part 0: TextPart(content='', part_kind='text')
+[Result] The model produced a final result (tool_name=None)
+[Request] Part 0 text delta: 'The'
+[Request] Part 0 text delta: ' weather'
+[Request] Part 0 text delta: ' forecast'
+[Request] Part 0 text delta: ' for'
+[Request] Part 0 text delta: ' Paris'
+[Request] Part 0 text delta: ' tomorrow'
+[Request] Part 0 text delta: ','
+[Request] Part 0 text delta: ' November'
+[Request] Part 0 text delta: ' '
+[Request] Part 0 text delta: '2'
+[Request] Part 0 text delta: ','
+[Request] Part 0 text delta: ' '
+[Request] Part 0 text delta: '202'
+[Request] Part 0 text delta: '3'
+[Request] Part 0 text delta: ','
+[Request] Part 0 text delta: ' is'
+[Request] Part 0 text delta: ' expected'
+[Request] Part 0 text delta: ' to'
+[Request] Part 0 text delta: ' be'
+[Request] Part 0 text delta: ' '
+[Request] Part 0 text delta: '18'
+[Request] Part 0 text delta: '°C'
+[Request] Part 0 text delta: ' and'
+[Request] Part 0 text delta: ' partly'
+[Request] Part 0 text delta: ' cloudy'
+[Request] Part 0 text delta: '.'
+=== HandleResponseNode: streaming partial response & tool usage ===
+
+=== Final Agent Output ===
+Forecast: The weather forecast for Paris tomorrow, November 2, 2023, is expected to be 18°C and partly cloudy.
+
+=== Raw Messages Captured ===
+ - ModelRequest(parts=[SystemPromptPart(content='Providing a weather forecast at the locations the user provides.', dynamic_ref=None, part_kind='system-prompt'), UserPromptPart(content='What will the weather be like in Paris tomorrow?', timestamp=datetime.datetime(2025, 2, 25, 7, 16, 4, 867863, tzinfo=datetime.timezone.utc), part_kind='user-prompt')], kind='request')
+ - ModelResponse(parts=[ToolCallPart(tool_name='weather_forecast', args='{"location":"Paris","forecast_date":"2023-11-02"}', tool_call_id='call_Q0QqiZfIhHyNViiLG7jT0G9R', part_kind='tool-call')], model_name='gpt-4o', timestamp=datetime.datetime(2025, 2, 25, 7, 16, 8, tzinfo=datetime.timezone.utc), kind='response')
+ - ModelRequest(parts=[ToolReturnPart(tool_name='weather_forecast', content='The weather in Paris on 2023-11-02 was 18°C and partly cloudy.', tool_call_id='call_Q0QqiZfIhHyNViiLG7jT0G9R', timestamp=datetime.datetime(2025, 2, 25, 7, 16, 9, 150432, tzinfo=datetime.timezone.utc), part_kind='tool-return')], kind='request')
+ - ModelResponse(parts=[TextPart(content='The weather forecast for Paris tomorrow, November 2, 2023, is expected to be 18°C and partly cloudy.', part_kind='text')], model_name='gpt-4o', timestamp=datetime.datetime(2025, 2, 25, 7, 16, 9, tzinfo=datetime.timezone.utc), kind='response')
+"""
+```
+
+---
+
 ### Additional Configuration
 
 #### Usage Limits
diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -10,7 +10,7 @@
 from typing import Any, Generic, Literal, Union, cast
 
 import logfire_api
-from typing_extensions import TypeVar, assert_never
+from typing_extensions import TypeGuard, TypeVar, assert_never
 
 from pydantic_graph import BaseNode, Graph, GraphRunContext
 from pydantic_graph.nodes import End, NodeRunEndT
@@ -40,6 +40,8 @@
     'HandleResponseNode',
     'build_run_context',
     'capture_run_messages',
+    'is_model_request_node',
+    'is_handle_response_node',
 )
 
 _logfire = logfire_api.Logfire(otel_scope='pydantic-ai')
@@ -236,12 +238,30 @@ async def run(
 
         return await self._make_request(ctx)
 
+    @asynccontextmanager
+    async def stream(
+        self,
+        ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, T]],
+    ) -> AsyncIterator[result.AgentStream[DepsT, T]]:
+        async with self._stream(ctx) as streamed_response:
+            agent_stream = result.AgentStream[DepsT, T](
+                streamed_response,
+                ctx.deps.result_schema,
+                ctx.deps.result_validators,
+                build_run_context(ctx),
+                ctx.deps.usage_limits,
+            )
+            yield agent_stream
+            # In case the user didn't manually consume the full stream, ensure it is fully consumed here,
+            # otherwise usage won't be properly counted:
+            async for _ in agent_stream:
+                pass
+
     @asynccontextmanager
     async def _stream(
         self,
         ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, T]],
     ) -> AsyncIterator[models.StreamedResponse]:
-        # TODO: Consider changing this to return something more similar to a `StreamedRunResult`, then make it public
         assert not self._did_stream, 'stream() should only be called once per node'
 
         model_settings, model_request_parameters = await self._prepare_request(ctx)
@@ -575,7 +595,7 @@ async def process_function_tools(
             for task in done:
                 index = tasks.index(task)
                 result = task.result()
-                yield _messages.FunctionToolResultEvent(result, call_id=call_index_to_event_id[index])
+                yield _messages.FunctionToolResultEvent(result, tool_call_id=call_index_to_event_id[index])
                 if isinstance(result, (_messages.ToolReturnPart, _messages.RetryPromptPart)):
                     results_by_index[index] = result
                 else:
@@ -685,3 +705,15 @@ def build_agent_graph(
         auto_instrument=False,
     )
     return graph
+
+
+def is_model_request_node(
+    node: BaseNode[GraphAgentState, GraphAgentDeps[DepsT, Any], result.FinalResult[NodeRunEndT]],
+) -> TypeGuard[ModelRequestNode[DepsT, NodeRunEndT]]:
+    return isinstance(node, ModelRequestNode)
+
+
+def is_handle_response_node(
+    node: BaseNode[GraphAgentState, GraphAgentDeps[DepsT, Any], result.FinalResult[NodeRunEndT]],
+) -> TypeGuard[HandleResponseNode[DepsT, NodeRunEndT]]:
+    return isinstance(node, HandleResponseNode)
diff --git a/pydantic_ai_slim/pydantic_ai/agent.py b/pydantic_ai_slim/pydantic_ai/agent.py
@@ -45,7 +45,8 @@
 HandleResponseNode = _agent_graph.HandleResponseNode
 ModelRequestNode = _agent_graph.ModelRequestNode
 UserPromptNode = _agent_graph.UserPromptNode
-
+is_handle_response_node = _agent_graph.is_handle_response_node
+is_model_request_node = _agent_graph.is_model_request_node
 
 __all__ = (
     'Agent',
@@ -56,6 +57,8 @@
     'HandleResponseNode',
     'ModelRequestNode',
     'UserPromptNode',
+    'is_handle_response_node',
+    'is_model_request_node',
 )
 
 _logfire = logfire_api.Logfire(otel_scope='pydantic-ai')
diff --git a/pydantic_ai_slim/pydantic_ai/messages.py b/pydantic_ai_slim/pydantic_ai/messages.py
@@ -533,9 +533,24 @@ class PartDeltaEvent:
     """Event type identifier, used as a discriminator."""
 
 
+@dataclass
+class FinalResultEvent:
+    """An event indicating the response to the current model request matches the result schema."""
+
+    tool_name: str | None
+    """The name of the result tool that was called. `None` if the result is from text content and not from a tool."""
+    event_kind: Literal['final_result'] = 'final_result'
+    """Event type identifier, used as a discriminator."""
+
+
 ModelResponseStreamEvent = Annotated[Union[PartStartEvent, PartDeltaEvent], pydantic.Discriminator('event_kind')]
 """An event in the model response stream, either starting a new part or applying a delta to an existing one."""
 
+AgentStreamEvent = Annotated[
+    Union[PartStartEvent, PartDeltaEvent, FinalResultEvent], pydantic.Discriminator('event_kind')
+]
+"""An event in the agent stream."""
+
 
 @dataclass
 class FunctionToolCallEvent:
@@ -558,7 +573,7 @@ class FunctionToolResultEvent:
 
     result: ToolReturnPart | RetryPromptPart
     """The result of the call to the function tool."""
-    call_id: str
+    tool_call_id: str
     """An ID used to match the result to its original call."""
     event_kind: Literal['function_tool_result'] = 'function_tool_result'
     """Event type identifier, used as a discriminator."""
diff --git a/pydantic_ai_slim/pydantic_ai/result.py b/pydantic_ai_slim/pydantic_ai/result.py