-
Notifications
You must be signed in to change notification settings - Fork 754
/
Copy pathaudio_recorder.h
227 lines (205 loc) · 8.5 KB
/
audio_recorder.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
/*
* ESPRESSIF MIT License
*
* Copyright (c) 2022 <ESPRESSIF SYSTEMS (SHANGHAI) CO., LTD>
*
* Permission is hereby granted for use on all ESPRESSIF SYSTEMS products, in which case,
* it is free of charge, to any person obtaining a copy of this software and associated
* documentation files (the "Software"), to deal in the Software without restriction, including
* without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the Software is furnished
* to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in all copies or
* substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
* FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
* COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
* IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
* CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
#ifndef _AUDIO_RECORDER_H_
#define _AUDIO_RECORDER_H_
#include "recorder_encoder_iface.h"
#include "recorder_sr_iface.h"
#ifdef __cplusplus
extern "C" {
#endif
#define AUDIO_REC_DEF_TASK_SZ (4 * 1024) /*!< Stack size of recorder task */
#define AUDIO_REC_DEF_TASK_PRIO (10) /*!< Priority of recoder task */
#define AUDIO_REC_DEF_TASK_CORE (1) /*!< Pinned to core */
#define AUDIO_REC_DEF_WAKEUP_TM (10000) /*!< Default wake up time (ms) */
#define AUDIO_REC_DEF_WAKEEND_TM (900) /*!< Duration after vad off (ms) */
#define AUDIO_REC_VAD_START_SPEECH_MS (160) /*!< Consecutive speech frame will be judged to vad start (ms) */
#define AUDIO_REC_DEF_VAD_OFF_TM (300) /*!< Default vad off time (ms) */
/**
* @brief Recorder event
*/
typedef struct {
/**
* @brief Audio recorder event type
*/
enum {
AUDIO_REC_WAKEUP_START = -100, /*!< Wakeup start */
AUDIO_REC_WAKEUP_END, /*!< Wakeup stop */
AUDIO_REC_VAD_START, /*!< Vad start */
AUDIO_REC_VAD_END, /*!< Vad stop */
AUDIO_REC_COMMAND_DECT = 0 /*!< Form 0 is the id of the voice commands detected by Multinet*/
/* DO NOT add items below this line */
} type; /*!< Event type */
void *event_data; /*!< Event data:
For `AUDIO_REC_WAKEUP_START`, event data is `recorder_sr_wakeup_result_t`
For `AUDIO_REC_COMMAND_DECT` or higher, event data is `recorder_sr_mn_result_t`
For other events, event data is NULL
*/
size_t data_len; /*!< Length of event data */
} audio_rec_evt_t;
/**
* @brief Event Notification
*/
typedef esp_err_t (*rec_event_cb_t)(audio_rec_evt_t *event, void *user_data);
/**
* @brief Audio recorder configuration
*/
typedef struct {
int pinned_core; /*!< Audio recorder task pinned to core */
int task_prio; /*!< Audio recorder task priority */
int task_size; /*!< Audio recorder task stack size */
rec_event_cb_t event_cb; /*!< Event callback function, event type as audio_rec_evt_t shown above*/
void *user_data; /*!< Pointer to user data (optional) */
recorder_data_read_t read; /*!< Data callback function used feed data to audio recorder */
void *sr_handle; /*!< SR handle */
recorder_sr_iface_t *sr_iface; /*!< SR interface */
int wakeup_time; /*!< Unit:ms. The duration that the wakeup state remains when VAD is not triggered */
int vad_start; /*!< Unit:ms. Consecutive speech frame will be judged to vad start*/
int vad_off; /*!< Unit:ms. When the silence time exceeds this value, it is determined as AUDIO_REC_VAD_END state */
int wakeup_end; /*!< Unit:ms. When the silence time after AUDIO_REC_VAD_END state exceeds this value, it is determined as AUDIO_REC_WAKEUP_END */
void *encoder_handle; /*!< Encoder handle */
recorder_encoder_iface_t *encoder_iface; /*!< Encoder interface */
} audio_rec_cfg_t;
/**
* @brief Audio recorder handle
*/
typedef struct __audio_recorder *audio_rec_handle_t;
#define AUDIO_RECORDER_DEFAULT_CFG() \
{ \
.pinned_core = AUDIO_REC_DEF_TASK_CORE, \
.task_prio = AUDIO_REC_DEF_TASK_PRIO, \
.task_size = AUDIO_REC_DEF_TASK_SZ, \
.event_cb = NULL, \
.user_data = NULL, \
.read = NULL, \
.sr_handle = NULL, \
.sr_iface = NULL, \
.wakeup_time = AUDIO_REC_DEF_WAKEUP_TM, \
.vad_start = AUDIO_REC_VAD_START_SPEECH_MS, \
.vad_off = AUDIO_REC_DEF_VAD_OFF_TM, \
.wakeup_end = AUDIO_REC_DEF_WAKEEND_TM, \
.encoder_handle = NULL, \
.encoder_iface = NULL, \
}
/**
* @brief Initialize and start up audio recorder
*
* @param cfg Configuration of audio recorder
*
* @return NULL failed
* Others audio recorder handle
*/
audio_rec_handle_t audio_recorder_create(audio_rec_cfg_t *cfg);
/**
* @brief Start recording by force
*
* @note If there need to read from recorder without wake word detected
* or read from recorder while the wake word detection is disabled,
* this interface can be use to force start the recorder process.
*
* @param handle Audio recorder handle
*
* @return ESP_OK
* ESP_FAIL
*/
esp_err_t audio_recorder_trigger_start(audio_rec_handle_t handle);
/**
* @brief Stop recording by force
*
* @note No matter the recorder process is triggered by wake word detected or triggered by `audio_recorder_trigger_start`,
* this function can be used to force stop the recorder.
* And if the VAD detection is disabeled, this must be invoked to stop recording after `audio_recorder_trigger_start`.
*
* @param handle Audio recorder handle
*
* @return ESP_OK
* ESP_FAIL
*/
esp_err_t audio_recorder_trigger_stop(audio_rec_handle_t handle);
/**
* @brief Enable or suspend wake word detection
*
* @param handle Audio recorder handle
* @param enable true: enable wake word detection
* false: disable wake word detection
*
* @return ESP_OK
* ESP_FAIL
*/
esp_err_t audio_recorder_wakenet_enable(audio_rec_handle_t handle, bool enable);
/**
* @brief Enable or suspend speech command recognition
*
* @param handle Audio recorder handle
* @param enable true: enable speech command recognition
* false: disable speech command recognition
*
* @return ESP_OK
* ESP_FAIL
*/
esp_err_t audio_recorder_multinet_enable(audio_rec_handle_t handle, bool enable);
/**
* @brief Enable or suspend voice duration check
*
* @param handle Audio recorder handle
* @param enable true: enable voice duration check
* false: disable voice duration check
*
* @return ESP_OK
* ESP_FAIL
*/
esp_err_t audio_recorder_vad_check_enable(audio_rec_handle_t handle, bool enable);
/**
* @brief Read data from audio recorder
*
* @param handle Audio recorder handle
* @param buffer Buffer to save data
* @param length Size of buffer
* @param ticks Timeout for reading
*
* @return Length of data actually read
* ESP_ERR_INVALID_ARG
*/
int audio_recorder_data_read(audio_rec_handle_t handle, void *buffer, int length, TickType_t ticks);
/**
* @brief Destroy audio recorder and recycle all resource
*
* @param handle Audio recorder handle
*
* @return ESP_OK
* ESP_FAIL
*/
esp_err_t audio_recorder_destroy(audio_rec_handle_t handle);
/**
* @brief Get the wake up state of audio recorder
*
* @param handle Audio recorder handle
*
* @return true
* false
*/
bool audio_recorder_get_wakeup_state(audio_rec_handle_t handle);
#ifdef __cplusplus
}
#endif
#endif