Skip to content

Commit 2b4bc75

Browse files
feat(api): new models for TTS, STT, + new audio features for Realtime (#2232)
1 parent e9f971a commit 2b4bc75

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

42 files changed

+2333
-325
lines changed

.stats.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,2 @@
1-
configured_endpoints: 81
2-
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-b26121d5df6eb5d3032a45a267473798b15fcfec76dd44a3256cf1238be05fa4.yml
1+
configured_endpoints: 82
2+
openapi_spec_url: https://storage.googleapis.com/stainless-sdk-openapi-specs/openai%2Fopenai-c22f59c66aec7914b6ee653d3098d1c1c8c16c180d2a158e819c8ddbf476f74b.yml

api.md

+20
Original file line numberDiff line numberDiff line change
@@ -151,7 +151,11 @@ Types:
151151
```python
152152
from openai.types.audio import (
153153
Transcription,
154+
TranscriptionInclude,
154155
TranscriptionSegment,
156+
TranscriptionStreamEvent,
157+
TranscriptionTextDeltaEvent,
158+
TranscriptionTextDoneEvent,
155159
TranscriptionVerbose,
156160
TranscriptionWord,
157161
TranscriptionCreateResponse,
@@ -338,7 +342,9 @@ from openai.types.beta.realtime import (
338342
ConversationItemDeleteEvent,
339343
ConversationItemDeletedEvent,
340344
ConversationItemInputAudioTranscriptionCompletedEvent,
345+
ConversationItemInputAudioTranscriptionDeltaEvent,
341346
ConversationItemInputAudioTranscriptionFailedEvent,
347+
ConversationItemRetrieveEvent,
342348
ConversationItemTruncateEvent,
343349
ConversationItemTruncatedEvent,
344350
ConversationItemWithReference,
@@ -375,6 +381,8 @@ from openai.types.beta.realtime import (
375381
SessionCreatedEvent,
376382
SessionUpdateEvent,
377383
SessionUpdatedEvent,
384+
TranscriptionSessionUpdate,
385+
TranscriptionSessionUpdatedEvent,
378386
)
379387
```
380388

@@ -390,6 +398,18 @@ Methods:
390398

391399
- <code title="post /realtime/sessions">client.beta.realtime.sessions.<a href="./src/openai/resources/beta/realtime/sessions.py">create</a>(\*\*<a href="src/openai/types/beta/realtime/session_create_params.py">params</a>) -> <a href="./src/openai/types/beta/realtime/session_create_response.py">SessionCreateResponse</a></code>
392400

401+
### TranscriptionSessions
402+
403+
Types:
404+
405+
```python
406+
from openai.types.beta.realtime import TranscriptionSession
407+
```
408+
409+
Methods:
410+
411+
- <code title="post /realtime/transcription_sessions">client.beta.realtime.transcription_sessions.<a href="./src/openai/resources/beta/realtime/transcription_sessions.py">create</a>(\*\*<a href="src/openai/types/beta/realtime/transcription_session_create_params.py">params</a>) -> <a href="./src/openai/types/beta/realtime/transcription_session.py">TranscriptionSession</a></code>
412+
393413
## Assistants
394414

395415
Types:

src/openai/resources/audio/speech.py

+12-2
Original file line numberDiff line numberDiff line change
@@ -54,6 +54,7 @@ def create(
5454
input: str,
5555
model: Union[str, SpeechModel],
5656
voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
57+
instructions: str | NotGiven = NOT_GIVEN,
5758
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
5859
speed: float | NotGiven = NOT_GIVEN,
5960
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -71,13 +72,16 @@ def create(
7172
7273
model:
7374
One of the available [TTS models](https://platform.openai.com/docs/models#tts):
74-
`tts-1` or `tts-1-hd`
75+
`tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
7576
7677
voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
7778
`coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
7879
voices are available in the
7980
[Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
8081
82+
instructions: Control the voice of your generated audio with additional instructions. Does not
83+
work with `tts-1` or `tts-1-hd`.
84+
8185
response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
8286
`wav`, and `pcm`.
8387
@@ -100,6 +104,7 @@ def create(
100104
"input": input,
101105
"model": model,
102106
"voice": voice,
107+
"instructions": instructions,
103108
"response_format": response_format,
104109
"speed": speed,
105110
},
@@ -138,6 +143,7 @@ async def create(
138143
input: str,
139144
model: Union[str, SpeechModel],
140145
voice: Literal["alloy", "ash", "coral", "echo", "fable", "onyx", "nova", "sage", "shimmer"],
146+
instructions: str | NotGiven = NOT_GIVEN,
141147
response_format: Literal["mp3", "opus", "aac", "flac", "wav", "pcm"] | NotGiven = NOT_GIVEN,
142148
speed: float | NotGiven = NOT_GIVEN,
143149
# Use the following arguments if you need to pass additional parameters to the API that aren't available via kwargs.
@@ -155,13 +161,16 @@ async def create(
155161
156162
model:
157163
One of the available [TTS models](https://platform.openai.com/docs/models#tts):
158-
`tts-1` or `tts-1-hd`
164+
`tts-1`, `tts-1-hd` or `gpt-4o-mini-tts`.
159165
160166
voice: The voice to use when generating the audio. Supported voices are `alloy`, `ash`,
161167
`coral`, `echo`, `fable`, `onyx`, `nova`, `sage` and `shimmer`. Previews of the
162168
voices are available in the
163169
[Text to speech guide](https://platform.openai.com/docs/guides/text-to-speech#voice-options).
164170
171+
instructions: Control the voice of your generated audio with additional instructions. Does not
172+
work with `tts-1` or `tts-1-hd`.
173+
165174
response_format: The format to audio in. Supported formats are `mp3`, `opus`, `aac`, `flac`,
166175
`wav`, and `pcm`.
167176
@@ -184,6 +193,7 @@ async def create(
184193
"input": input,
185194
"model": model,
186195
"voice": voice,
196+
"instructions": instructions,
187197
"response_format": response_format,
188198
"speed": speed,
189199
},

0 commit comments

Comments
 (0)