|
1 |
| -#include <cstdint> |
| 1 | +#include "napi.h" |
| 2 | +#include "common.h" |
| 3 | + |
| 4 | +#include "whisper.h" |
| 5 | + |
2 | 6 | #include <string>
|
3 | 7 | #include <thread>
|
4 | 8 | #include <vector>
|
5 | 9 | #include <cmath>
|
6 |
| - |
7 |
| -#include "napi.h" |
8 |
| - |
9 |
| -#define DR_WAV_IMPLEMENTATION |
10 |
| -#include "dr_wav.h" |
11 |
| - |
12 |
| -#include "whisper.h" |
| 10 | +#include <cstdint> |
13 | 11 |
|
14 | 12 | struct whisper_params {
|
15 | 13 | int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
|
@@ -44,7 +42,7 @@ struct whisper_params {
|
44 | 42 | std::string model = "../../ggml-large.bin";
|
45 | 43 |
|
46 | 44 | std::vector<std::string> fname_inp = {};
|
47 |
| - std::vector<std::string> fname_outp = {}; |
| 45 | + std::vector<std::string> fname_out = {}; |
48 | 46 | };
|
49 | 47 |
|
50 | 48 | struct whisper_print_user_data {
|
@@ -143,7 +141,6 @@ void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, voi
|
143 | 141 | }
|
144 | 142 |
|
145 | 143 | int run(whisper_params ¶ms, std::vector<std::vector<std::string>> &result) {
|
146 |
| - |
147 | 144 | if (params.fname_inp.empty()) {
|
148 | 145 | fprintf(stderr, "error: no input files specified\n");
|
149 | 146 | return 2;
|
@@ -181,91 +178,14 @@ int run(whisper_params ¶ms, std::vector<std::vector<std::string>> &result) {
|
181 | 178 |
|
182 | 179 | for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
|
183 | 180 | const auto fname_inp = params.fname_inp[f];
|
184 |
| - const auto fname_outp = f < (int)params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f]; |
| 181 | + const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f]; |
185 | 182 |
|
186 | 183 | std::vector<float> pcmf32; // mono-channel F32 PCM
|
187 | 184 | std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
|
188 | 185 |
|
189 |
| - // WAV input |
190 |
| - { |
191 |
| - drwav wav; |
192 |
| - std::vector<uint8_t> wav_data; // used for pipe input from stdin |
193 |
| - |
194 |
| - if (fname_inp == "-") { |
195 |
| - { |
196 |
| - uint8_t buf[1024]; |
197 |
| - while (true) |
198 |
| - { |
199 |
| - const size_t n = fread(buf, 1, sizeof(buf), stdin); |
200 |
| - if (n == 0) { |
201 |
| - break; |
202 |
| - } |
203 |
| - wav_data.insert(wav_data.end(), buf, buf + n); |
204 |
| - } |
205 |
| - } |
206 |
| - |
207 |
| - if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) { |
208 |
| - fprintf(stderr, "error: failed to open WAV file from stdin\n"); |
209 |
| - return 4; |
210 |
| - } |
211 |
| - |
212 |
| - fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size()); |
213 |
| - } |
214 |
| - else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) { |
215 |
| - fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str()); |
216 |
| - return 5; |
217 |
| - } |
218 |
| - |
219 |
| - if (wav.channels != 1 && wav.channels != 2) { |
220 |
| - fprintf(stderr, "error: WAV file '%s' must be mono or stereo\n", fname_inp.c_str()); |
221 |
| - return 6; |
222 |
| - } |
223 |
| - |
224 |
| - if (params.diarize && wav.channels != 2 && params.no_timestamps == false) { |
225 |
| - fprintf(stderr, "error: WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str()); |
226 |
| - return 6; |
227 |
| - } |
228 |
| - |
229 |
| - if (wav.sampleRate != WHISPER_SAMPLE_RATE) { |
230 |
| - fprintf(stderr, "error: WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000); |
231 |
| - return 8; |
232 |
| - } |
233 |
| - |
234 |
| - if (wav.bitsPerSample != 16) { |
235 |
| - fprintf(stderr, "error: WAV file '%s' must be 16-bit\n", fname_inp.c_str()); |
236 |
| - return 9; |
237 |
| - } |
238 |
| - |
239 |
| - const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8); |
240 |
| - |
241 |
| - std::vector<int16_t> pcm16; |
242 |
| - pcm16.resize(n*wav.channels); |
243 |
| - drwav_read_pcm_frames_s16(&wav, n, pcm16.data()); |
244 |
| - drwav_uninit(&wav); |
245 |
| - |
246 |
| - // convert to mono, float |
247 |
| - pcmf32.resize(n); |
248 |
| - if (wav.channels == 1) { |
249 |
| - for (uint64_t i = 0; i < n; i++) { |
250 |
| - pcmf32[i] = float(pcm16[i])/32768.0f; |
251 |
| - } |
252 |
| - } else { |
253 |
| - for (uint64_t i = 0; i < n; i++) { |
254 |
| - pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f; |
255 |
| - } |
256 |
| - } |
257 |
| - |
258 |
| - if (params.diarize) { |
259 |
| - // convert to stereo, float |
260 |
| - pcmf32s.resize(2); |
261 |
| - |
262 |
| - pcmf32s[0].resize(n); |
263 |
| - pcmf32s[1].resize(n); |
264 |
| - for (uint64_t i = 0; i < n; i++) { |
265 |
| - pcmf32s[0][i] = float(pcm16[2*i])/32768.0f; |
266 |
| - pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f; |
267 |
| - } |
268 |
| - } |
| 186 | + if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) { |
| 187 | + fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str()); |
| 188 | + continue; |
269 | 189 | }
|
270 | 190 |
|
271 | 191 | // print system information
|
|
0 commit comments