pydantic · dmontagu · Feb 27, 2025 · Feb 25, 2025 · Feb 25, 2025 · Feb 26, 2025
diff --git a/docs/agents.md b/docs/agents.md
@@ -220,6 +220,178 @@ Once the run finishes, `agent_run.final_result` becomes a [`AgentRunResult`][pyd
 
 ---
 
+### Streaming
+
+Here is an example of streaming an agent run in combination with `async for` iteration:
+
+```python {title="streaming.py"} {test="skip"}
+import asyncio
+from dataclasses import dataclass
+from datetime import date
+
+from pydantic_ai import Agent
+from pydantic_ai.messages import (
+    FinalResultEvent,
+    FunctionToolCallEvent,
+    FunctionToolResultEvent,
+    PartDeltaEvent,
+    PartStartEvent,
+    TextPartDelta,
+    ToolCallPartDelta,
+)
+from pydantic_ai.tools import RunContext
+
+
+@dataclass
+class WeatherService:
+    async def get_forecast(self, location: str, forecast_date: date) -> str:
+        # In real code: call weather API, DB queries, etc.
+        return f'The forecast in {location} on {forecast_date} is 24°C and sunny.'
+
+    async def get_historic_weather(self, location: str, forecast_date: date) -> str:
+        # In real code: call a historical weather API or DB
+        return (
+            f'The weather in {location} on {forecast_date} was 18°C and partly cloudy.'
+        )
+
+
+weather_agent = Agent[WeatherService, str](
+    'openai:gpt-4o',
+    deps_type=WeatherService,
+    result_type=str,  # We'll produce a final answer as plain text
+    system_prompt='Providing a weather forecast at the locations the user provides.',
+)
+
+
+@weather_agent.tool
+async def weather_forecast(
+    ctx: RunContext[WeatherService],
+    location: str,
+    forecast_date: date,
+) -> str:
+    if forecast_date >= date.today():
+        return await ctx.deps.get_forecast(location, forecast_date)
+    else:
+        return await ctx.deps.get_historic_weather(location, forecast_date)
+
+
+async def main():
+    # The user asks for tomorrow's weather in Paris
+    user_prompt = 'What will the weather be like in Paris tomorrow?'
+
+    # Provide a WeatherService instance as the agent's dependencies
+    deps = WeatherService()
+
+    # Begin a node-by-node, streaming iteration
+    with weather_agent.iter(user_prompt, deps=deps) as run:
+        async for node in run:
+            if Agent.is_model_request_node(node):
+                # A model request node => We can stream tokens from the model's request
+                print('=== ModelRequestNode: streaming partial request tokens ===')
+                async with node.stream(run.ctx) as request_stream:
+                    async for event in request_stream:
+                        if isinstance(event, PartStartEvent):
+                            print(
+                                f'[Request] Starting part {event.index}: {event.part!r}'
+                            )
+                        elif isinstance(event, PartDeltaEvent):
+                            if isinstance(event.delta, TextPartDelta):
+                                print(
+                                    f'[Request] Part {event.index} text delta: {event.delta.content_delta!r}'
+                                )
+                            elif isinstance(event.delta, ToolCallPartDelta):
+                                print(
+                                    f'[Request] Part {event.index} args_delta={event.delta.args_delta}'
+                                )
+                        elif isinstance(event, FinalResultEvent):
+                            print(
+                                f'[Result] The model produced a final result (tool_name={event.tool_name})'
+                            )
+
+            elif Agent.is_handle_response_node(node):
+                # A handle-response node => The model returned some data, potentially calls a tool
+                print(
+                    '=== HandleResponseNode: streaming partial response & tool usage ==='
+                )
+                async with node.stream(run.ctx) as handle_stream:
+                    async for event in handle_stream:
+                        if isinstance(event, FunctionToolCallEvent):
+                            print(
+                                f'[Tools] The LLM calls tool={event.part.tool_name!r} with args={event.part.args} (tool_call_id={event.part.tool_call_id!r})'
+                            )
+                        elif isinstance(event, FunctionToolResultEvent):
+                            print(
+                                f'[Tools] Tool call {event.tool_call_id!r} returned => {event.result.content}'
+                            )
+
+        # Once an End node is reached, the agent run is complete
+        assert run.result is not None
+        print('\n=== Final Agent Output ===')
+        print('Forecast:', run.result.data)
+
+
+if __name__ == '__main__':
+    asyncio.run(main())
+
+"""
+=== ModelRequestNode: streaming partial request tokens ===
+[Request] Starting part 0: ToolCallPart(tool_name='weather_forecast', args='', tool_call_id='call_Q0QqiZfIhHyNViiLG7jT0G9R', part_kind='tool-call')
+[Request] Part 0 args_delta={"
+[Request] Part 0 args_delta=location
+[Request] Part 0 args_delta=":"
+[Request] Part 0 args_delta=Paris
+[Request] Part 0 args_delta=","
+[Request] Part 0 args_delta=forecast
+[Request] Part 0 args_delta=_date
+[Request] Part 0 args_delta=":"
+[Request] Part 0 args_delta=202
+[Request] Part 0 args_delta=3
+[Request] Part 0 args_delta=-
+[Request] Part 0 args_delta=11
+[Request] Part 0 args_delta=-
+[Request] Part 0 args_delta=02
+[Request] Part 0 args_delta="}
+=== HandleResponseNode: streaming partial response & tool usage ===
+[Tools] The LLM calls tool='weather_forecast' with args={"location":"Paris","forecast_date":"2023-11-02"} (tool_call_id='call_Q0QqiZfIhHyNViiLG7jT0G9R')
+[Tools] Tool call 'call_Q0QqiZfIhHyNViiLG7jT0G9R' returned => The weather in Paris on 2023-11-02 was 18°C and partly cloudy.
+=== ModelRequestNode: streaming partial request tokens ===
+[Request] Starting part 0: TextPart(content='', part_kind='text')
+[Result] The model produced a final result (tool_name=None)
+[Request] Part 0 text delta: 'The'
+[Request] Part 0 text delta: ' weather'
+[Request] Part 0 text delta: ' forecast'
+[Request] Part 0 text delta: ' for'
+[Request] Part 0 text delta: ' Paris'
+[Request] Part 0 text delta: ' tomorrow'
+[Request] Part 0 text delta: ','
+[Request] Part 0 text delta: ' November'
+[Request] Part 0 text delta: ' '
+[Request] Part 0 text delta: '2'
+[Request] Part 0 text delta: ','
+[Request] Part 0 text delta: ' '
+[Request] Part 0 text delta: '202'
+[Request] Part 0 text delta: '3'
+[Request] Part 0 text delta: ','
+[Request] Part 0 text delta: ' is'
+[Request] Part 0 text delta: ' expected'
+[Request] Part 0 text delta: ' to'
+[Request] Part 0 text delta: ' be'
+[Request] Part 0 text delta: ' '
+[Request] Part 0 text delta: '18'
+[Request] Part 0 text delta: '°C'
+[Request] Part 0 text delta: ' and'
+[Request] Part 0 text delta: ' partly'
+[Request] Part 0 text delta: ' cloudy'
+[Request] Part 0 text delta: '.'
+=== HandleResponseNode: streaming partial response & tool usage ===
+
+=== Final Agent Output ===
+Forecast: The weather forecast for Paris tomorrow, November 2, 2023, is expected to be 18°C and partly cloudy.
+"""
+```
+
+---
+
 ### Additional Configuration
 
 #### Usage Limits

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -2,15 +2,14 @@
 
 import asyncio
 import dataclasses
-from abc import ABC
 from collections.abc import AsyncIterator, Iterator, Sequence
 from contextlib import asynccontextmanager, contextmanager
 from contextvars import ContextVar
 from dataclasses import field
 from typing import Any, Generic, Literal, Union, cast
 
 import logfire_api
-from typing_extensions import TypeVar, assert_never
+from typing_extensions import TypeGuard, TypeVar, assert_never
 
 from pydantic_graph import BaseNode, Graph, GraphRunContext
 from pydantic_graph.nodes import End, NodeRunEndT
@@ -55,6 +54,7 @@
     logfire._internal.stack_info.NON_USER_CODE_PREFIXES += (str(Path(__file__).parent.absolute()),)
 
 T = TypeVar('T')
+S = TypeVar('S')
 NoneType = type(None)
 EndStrategy = Literal['early', 'exhaustive']
 """The strategy for handling multiple tool calls when a final result is found.
@@ -107,8 +107,31 @@ class GraphAgentDeps(Generic[DepsT, ResultDataT]):
     run_span: logfire_api.LogfireSpan
 
 
+class AgentNode(BaseNode[GraphAgentState, GraphAgentDeps[DepsT, Any], result.FinalResult[NodeRunEndT]]):
+    """The base class for all agent nodes.
+
+    Using subclass of `BaseNode` for all nodes reduces the amount of boilerplate of generics everywhere
+    """
+
+
+def is_agent_node(
+    node: BaseNode[GraphAgentState, GraphAgentDeps[T, Any], result.FinalResult[S]] | End[result.FinalResult[S]],
+) -> TypeGuard[AgentNode[T, S]]:
+    """Check if the provided node is an instance of `AgentNode`.
+
+    Usage:
+
+        if is_agent_node(node):
+            # `node` is an AgentNode
+            ...
+
+    This method preserves the generic parameters on the narrowed type, unlike `isinstance(node, AgentNode)`.
+    """
+    return isinstance(node, AgentNode)
+
+
 @dataclasses.dataclass
-class UserPromptNode(BaseNode[GraphAgentState, GraphAgentDeps[DepsT, Any], result.FinalResult[NodeRunEndT]], ABC):
+class UserPromptNode(AgentNode[DepsT, NodeRunEndT]):
     user_prompt: str | Sequence[_messages.UserContent]
 
     system_prompts: tuple[str, ...]
@@ -215,7 +238,7 @@ async def add_tool(tool: Tool[DepsT]) -> None:
 
 
 @dataclasses.dataclass
-class ModelRequestNode(BaseNode[GraphAgentState, GraphAgentDeps[DepsT, Any], result.FinalResult[NodeRunEndT]]):
+class ModelRequestNode(AgentNode[DepsT, NodeRunEndT]):
     """Make a request to the model using the last message in state.message_history."""
 
     request: _messages.ModelRequest
@@ -236,12 +259,30 @@ async def run(
 
         return await self._make_request(ctx)
 
+    @asynccontextmanager
+    async def stream(
+        self,
+        ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, T]],
+    ) -> AsyncIterator[result.AgentStream[DepsT, T]]:
+        async with self._stream(ctx) as streamed_response:
+            agent_stream = result.AgentStream[DepsT, T](
+                streamed_response,
+                ctx.deps.result_schema,
+                ctx.deps.result_validators,
+                build_run_context(ctx),
+                ctx.deps.usage_limits,
+            )
+            yield agent_stream
+            # In case the user didn't manually consume the full stream, ensure it is fully consumed here,
+            # otherwise usage won't be properly counted:
+            async for _ in agent_stream:
+                pass
+
     @asynccontextmanager
     async def _stream(
         self,
         ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT, T]],
     ) -> AsyncIterator[models.StreamedResponse]:
-        # TODO: Consider changing this to return something more similar to a `StreamedRunResult`, then make it public
         assert not self._did_stream, 'stream() should only be called once per node'
 
         model_settings, model_request_parameters = await self._prepare_request(ctx)
@@ -319,7 +360,7 @@ def _finish_handling(
 
 
 @dataclasses.dataclass
-class HandleResponseNode(BaseNode[GraphAgentState, GraphAgentDeps[DepsT, Any], result.FinalResult[NodeRunEndT]]):
+class HandleResponseNode(AgentNode[DepsT, NodeRunEndT]):
     """Process a model response, and decide whether to end the run or make a new request."""
 
     model_response: _messages.ModelResponse
@@ -575,7 +616,7 @@ async def process_function_tools(
             for task in done:
                 index = tasks.index(task)
                 result = task.result()
-                yield _messages.FunctionToolResultEvent(result, call_id=call_index_to_event_id[index])
+                yield _messages.FunctionToolResultEvent(result, tool_call_id=call_index_to_event_id[index])
                 if isinstance(result, (_messages.ToolReturnPart, _messages.RetryPromptPart)):
                     results_by_index[index] = result
                 else: