Skip to content

Commit 09d7d2b

Browse files
authored
examples : refactor in order to reuse code and reduce duplication (ggml-org#482)
* examples : refactor common code into a library * examples : refactor common SDL code into a library * make : update Makefile to use common libs * common : fix MSVC M_PI .. * addon.node : link common lib
1 parent 0336161 commit 09d7d2b

19 files changed

+580
-1254
lines changed

Makefile

+11-8
Original file line numberDiff line numberDiff line change
@@ -197,18 +197,21 @@ clean:
197197

198198
CC_SDL=`sdl2-config --cflags --libs`
199199

200-
main: examples/main/main.cpp ggml.o whisper.o
201-
$(CXX) $(CXXFLAGS) examples/main/main.cpp ggml.o whisper.o -o main $(LDFLAGS)
200+
SRC_COMMON = examples/common.cpp
201+
SRC_COMMON_SDL = examples/common-sdl.cpp
202+
203+
main: examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o
204+
$(CXX) $(CXXFLAGS) examples/main/main.cpp $(SRC_COMMON) ggml.o whisper.o -o main $(LDFLAGS)
202205
./main -h
203206

204-
stream: examples/stream/stream.cpp ggml.o whisper.o
205-
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
207+
stream: examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
208+
$(CXX) $(CXXFLAGS) examples/stream/stream.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o stream $(CC_SDL) $(LDFLAGS)
206209

207-
command: examples/command/command.cpp ggml.o whisper.o
208-
$(CXX) $(CXXFLAGS) examples/command/command.cpp ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
210+
command: examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
211+
$(CXX) $(CXXFLAGS) examples/command/command.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o command $(CC_SDL) $(LDFLAGS)
209212

210-
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o
211-
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
213+
talk: examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o
214+
$(CXX) $(CXXFLAGS) examples/talk/talk.cpp examples/talk/gpt-2.cpp $(SRC_COMMON) $(SRC_COMMON_SDL) ggml.o whisper.o -o talk $(CC_SDL) $(LDFLAGS)
212215

213216
bench: examples/bench/bench.cpp ggml.o whisper.o
214217
$(CXX) $(CXXFLAGS) examples/bench/bench.cpp ggml.o whisper.o -o bench $(LDFLAGS)

bindings/javascript/whisper.js

+1-1
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

examples/CMakeLists.txt

+31
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,37 @@ if (WHISPER_SUPPORT_SDL2)
1414
message(STATUS "SDL2_LIBRARIES = ${SDL2_LIBRARIES}")
1515
endif()
1616

17+
# common
18+
19+
set(TARGET common)
20+
21+
add_library(${TARGET} STATIC
22+
common.h
23+
common.cpp
24+
)
25+
26+
include(DefaultTargetOptions)
27+
28+
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
29+
30+
if (WHISPER_SUPPORT_SDL2)
31+
# common-sdl
32+
33+
set(TARGET common-sdl)
34+
35+
add_library(${TARGET} STATIC
36+
common-sdl.h
37+
common-sdl.cpp
38+
)
39+
40+
include(DefaultTargetOptions)
41+
42+
target_include_directories(${TARGET} PUBLIC ${SDL2_INCLUDE_DIRS})
43+
target_link_libraries(${TARGET} PRIVATE ${SDL2_LIBRARIES})
44+
45+
set_target_properties(${TARGET} PROPERTIES POSITION_INDEPENDENT_CODE ON)
46+
endif()
47+
1748
# examples
1849

1950
include_directories(${CMAKE_CURRENT_SOURCE_DIR})

examples/addon.node/CMakeLists.txt

+1-1
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ string(REPLACE "\"" "" NODE_ADDON_API_DIR ${NODE_ADDON_API_DIR})
2323
target_include_directories(${TARGET} PRIVATE ${NODE_ADDON_API_DIR})
2424
#==================================================================
2525

26-
target_link_libraries(${TARGET} ${CMAKE_JS_LIB} whisper ${CMAKE_THREAD_LIBS_INIT})
26+
target_link_libraries(${TARGET} ${CMAKE_JS_LIB} common whisper ${CMAKE_THREAD_LIBS_INIT})
2727

2828
if(MSVC AND CMAKE_JS_NODELIB_DEF AND CMAKE_JS_NODELIB_TARGET)
2929
# Generate node.lib

examples/addon.node/addon.cpp

+11-91
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
1-
#include <cstdint>
1+
#include "napi.h"
2+
#include "common.h"
3+
4+
#include "whisper.h"
5+
26
#include <string>
37
#include <thread>
48
#include <vector>
59
#include <cmath>
6-
7-
#include "napi.h"
8-
9-
#define DR_WAV_IMPLEMENTATION
10-
#include "dr_wav.h"
11-
12-
#include "whisper.h"
10+
#include <cstdint>
1311

1412
struct whisper_params {
1513
int32_t n_threads = std::min(4, (int32_t) std::thread::hardware_concurrency());
@@ -44,7 +42,7 @@ struct whisper_params {
4442
std::string model = "../../ggml-large.bin";
4543

4644
std::vector<std::string> fname_inp = {};
47-
std::vector<std::string> fname_outp = {};
45+
std::vector<std::string> fname_out = {};
4846
};
4947

5048
struct whisper_print_user_data {
@@ -143,7 +141,6 @@ void whisper_print_segment_callback(struct whisper_context * ctx, int n_new, voi
143141
}
144142

145143
int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
146-
147144
if (params.fname_inp.empty()) {
148145
fprintf(stderr, "error: no input files specified\n");
149146
return 2;
@@ -181,91 +178,14 @@ int run(whisper_params &params, std::vector<std::vector<std::string>> &result) {
181178

182179
for (int f = 0; f < (int) params.fname_inp.size(); ++f) {
183180
const auto fname_inp = params.fname_inp[f];
184-
const auto fname_outp = f < (int)params.fname_outp.size() && !params.fname_outp[f].empty() ? params.fname_outp[f] : params.fname_inp[f];
181+
const auto fname_out = f < (int)params.fname_out.size() && !params.fname_out[f].empty() ? params.fname_out[f] : params.fname_inp[f];
185182

186183
std::vector<float> pcmf32; // mono-channel F32 PCM
187184
std::vector<std::vector<float>> pcmf32s; // stereo-channel F32 PCM
188185

189-
// WAV input
190-
{
191-
drwav wav;
192-
std::vector<uint8_t> wav_data; // used for pipe input from stdin
193-
194-
if (fname_inp == "-") {
195-
{
196-
uint8_t buf[1024];
197-
while (true)
198-
{
199-
const size_t n = fread(buf, 1, sizeof(buf), stdin);
200-
if (n == 0) {
201-
break;
202-
}
203-
wav_data.insert(wav_data.end(), buf, buf + n);
204-
}
205-
}
206-
207-
if (drwav_init_memory(&wav, wav_data.data(), wav_data.size(), nullptr) == false) {
208-
fprintf(stderr, "error: failed to open WAV file from stdin\n");
209-
return 4;
210-
}
211-
212-
fprintf(stderr, "%s: read %zu bytes from stdin\n", __func__, wav_data.size());
213-
}
214-
else if (drwav_init_file(&wav, fname_inp.c_str(), nullptr) == false) {
215-
fprintf(stderr, "error: failed to open '%s' as WAV file\n", fname_inp.c_str());
216-
return 5;
217-
}
218-
219-
if (wav.channels != 1 && wav.channels != 2) {
220-
fprintf(stderr, "error: WAV file '%s' must be mono or stereo\n", fname_inp.c_str());
221-
return 6;
222-
}
223-
224-
if (params.diarize && wav.channels != 2 && params.no_timestamps == false) {
225-
fprintf(stderr, "error: WAV file '%s' must be stereo for diarization and timestamps have to be enabled\n", fname_inp.c_str());
226-
return 6;
227-
}
228-
229-
if (wav.sampleRate != WHISPER_SAMPLE_RATE) {
230-
fprintf(stderr, "error: WAV file '%s' must be %i kHz\n", fname_inp.c_str(), WHISPER_SAMPLE_RATE/1000);
231-
return 8;
232-
}
233-
234-
if (wav.bitsPerSample != 16) {
235-
fprintf(stderr, "error: WAV file '%s' must be 16-bit\n", fname_inp.c_str());
236-
return 9;
237-
}
238-
239-
const uint64_t n = wav_data.empty() ? wav.totalPCMFrameCount : wav_data.size()/(wav.channels*wav.bitsPerSample/8);
240-
241-
std::vector<int16_t> pcm16;
242-
pcm16.resize(n*wav.channels);
243-
drwav_read_pcm_frames_s16(&wav, n, pcm16.data());
244-
drwav_uninit(&wav);
245-
246-
// convert to mono, float
247-
pcmf32.resize(n);
248-
if (wav.channels == 1) {
249-
for (uint64_t i = 0; i < n; i++) {
250-
pcmf32[i] = float(pcm16[i])/32768.0f;
251-
}
252-
} else {
253-
for (uint64_t i = 0; i < n; i++) {
254-
pcmf32[i] = float(pcm16[2*i] + pcm16[2*i + 1])/65536.0f;
255-
}
256-
}
257-
258-
if (params.diarize) {
259-
// convert to stereo, float
260-
pcmf32s.resize(2);
261-
262-
pcmf32s[0].resize(n);
263-
pcmf32s[1].resize(n);
264-
for (uint64_t i = 0; i < n; i++) {
265-
pcmf32s[0][i] = float(pcm16[2*i])/32768.0f;
266-
pcmf32s[1][i] = float(pcm16[2*i + 1])/32768.0f;
267-
}
268-
}
186+
if (!::read_wav(fname_inp, pcmf32, pcmf32s, params.diarize)) {
187+
fprintf(stderr, "error: failed to read WAV file '%s'\n", fname_inp.c_str());
188+
continue;
269189
}
270190

271191
// print system information

examples/command.wasm/CMakeLists.txt

+1
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ add_executable(${TARGET}
1111
include(DefaultTargetOptions)
1212

1313
target_link_libraries(${TARGET} PRIVATE
14+
common
1415
whisper
1516
)
1617

examples/command.wasm/emscripten.cpp

+3-59
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
#include "ggml.h"
2+
#include "common.h"
23
#include "whisper.h"
34

45
#include <emscripten.h>
@@ -27,24 +28,6 @@ std::string g_transcribed = "";
2728

2829
std::vector<float> g_pcmf32;
2930

30-
static std::string trim(const std::string & s) {
31-
std::regex e("^\\s+|\\s+$");
32-
return std::regex_replace(s, e, "");
33-
}
34-
35-
static void high_pass_filter(std::vector<float> & data, float cutoff, float sample_rate) {
36-
const float rc = 1.0f / (2.0f * M_PI * cutoff);
37-
const float dt = 1.0f / sample_rate;
38-
const float alpha = dt / (rc + dt);
39-
40-
float y = data[0];
41-
42-
for (size_t i = 1; i < data.size(); i++) {
43-
y = alpha * (y + data[i] - data[i - 1]);
44-
data[i] = y;
45-
}
46-
}
47-
4831
// compute similarity between two strings using Levenshtein distance
4932
static float similarity(const std::string & s0, const std::string & s1) {
5033
const size_t len0 = s0.size() + 1;
@@ -75,44 +58,6 @@ void command_set_status(const std::string & status) {
7558
g_status = status;
7659
}
7760

78-
bool command_vad_simple(std::vector<float> & pcmf32, int sample_rate, int last_ms, float vad_thold, float freq_thold, bool verbose) {
79-
const int n_samples = pcmf32.size();
80-
const int n_samples_last = (sample_rate * last_ms) / 1000;
81-
82-
if (n_samples_last >= n_samples) {
83-
// not enough samples - assume no speech
84-
return false;
85-
}
86-
87-
if (freq_thold > 0.0f) {
88-
high_pass_filter(pcmf32, freq_thold, sample_rate);
89-
}
90-
91-
float energy_all = 0.0f;
92-
float energy_last = 0.0f;
93-
94-
for (size_t i = 0; i < n_samples; i++) {
95-
energy_all += fabsf(pcmf32[i]);
96-
97-
if (i >= n_samples - n_samples_last) {
98-
energy_last += fabsf(pcmf32[i]);
99-
}
100-
}
101-
102-
energy_all /= n_samples;
103-
energy_last /= n_samples_last;
104-
105-
if (verbose) {
106-
fprintf(stderr, "%s: energy_all: %f, energy_last: %f, vad_thold: %f, freq_thold: %f\n", __func__, energy_all, energy_last, vad_thold, freq_thold);
107-
}
108-
109-
if (energy_last > vad_thold*energy_all) {
110-
return false;
111-
}
112-
113-
return true;
114-
}
115-
11661
std::string command_transcribe(whisper_context * ctx, const whisper_full_params & wparams, const std::vector<float> & pcmf32, float & prob, int64_t & t_ms) {
11762
const auto t_start = std::chrono::high_resolution_clock::now();
11863

@@ -155,7 +100,7 @@ void command_get_audio(int ms, int sample_rate, std::vector<float> & audio) {
155100
const int64_t n_samples = (ms * sample_rate) / 1000;
156101

157102
int64_t n_take = 0;
158-
if (g_pcmf32.size() < n_samples) {
103+
if (n_samples > (int) g_pcmf32.size()) {
159104
n_take = g_pcmf32.size();
160105
} else {
161106
n_take = n_samples;
@@ -187,7 +132,6 @@ void command_main(size_t index) {
187132

188133
printf("command: using %d threads\n", wparams.n_threads);
189134

190-
bool is_running = true;
191135
bool have_prompt = false;
192136
bool ask_prompt = true;
193137
bool print_energy = false;
@@ -233,7 +177,7 @@ void command_main(size_t index) {
233177
{
234178
command_get_audio(vad_ms, WHISPER_SAMPLE_RATE, pcmf32_cur);
235179

236-
if (command_vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) {
180+
if (::vad_simple(pcmf32_cur, WHISPER_SAMPLE_RATE, 1000, vad_thold, freq_thold, print_energy)) {
237181
fprintf(stdout, "%s: Speech detected! Processing ...\n", __func__);
238182
command_set_status("Speech detected! Processing ...");
239183

examples/command/CMakeLists.txt

+1-2
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,5 @@ if (WHISPER_SUPPORT_SDL2)
55

66
include(DefaultTargetOptions)
77

8-
target_include_directories(${TARGET} PRIVATE ${SDL2_INCLUDE_DIRS})
9-
target_link_libraries(${TARGET} PRIVATE whisper ${SDL2_LIBRARIES} ${CMAKE_THREAD_LIBS_INIT})
8+
target_link_libraries(${TARGET} PRIVATE common common-sdl whisper ${CMAKE_THREAD_LIBS_INIT})
109
endif ()

0 commit comments

Comments
 (0)