-
Notifications
You must be signed in to change notification settings - Fork 11
/
index.js
506 lines (474 loc) · 18 KB
/
index.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
import FFT from "fft.js";
/**
* @typedef {Float32Array | Float64Array | number[]} Buffer One of the supported
* buffer types. Other numeric array types may not work correctly.
*/
/**
* A class that can perform autocorrelation on input arrays of a given size.
*
* The class holds internal buffers so that no additional allocations are
* necessary while performing the operation.
*
* @template {Buffer} T the buffer type to use. While inputs to the
* autocorrelation process can be any array-like type, the output buffer
* (whether provided explicitly or using a fresh buffer) is always of this type.
*/
export class Autocorrelator {
/** @private @readonly @type {number} */
_inputLength;
/** @private @type {FFT} */
_fft;
/** @private @type {(size: number) => T} */
_bufferSupplier;
/** @private @type {T} */
_paddedInputBuffer;
/** @private @type {T} */
_transformBuffer;
/** @private @type {T} */
_inverseBuffer;
/**
* A helper method to create an {@link Autocorrelator} using
* {@link Float32Array} buffers.
*
* @param inputLength {number} the input array length to support
* @returns {Autocorrelator<Float32Array>}
*/
static forFloat32Array(inputLength) {
return new Autocorrelator(
inputLength,
(length) => new Float32Array(length),
);
}
/**
* A helper method to create an {@link Autocorrelator} using
* {@link Float64Array} buffers.
*
* @param inputLength {number} the input array length to support
* @returns {Autocorrelator<Float64Array>}
*/
static forFloat64Array(inputLength) {
return new Autocorrelator(
inputLength,
(length) => new Float64Array(length),
);
}
/**
* A helper method to create an {@link Autocorrelator} using `number[]`
* buffers.
*
* @param inputLength {number} the input array length to support
* @returns {Autocorrelator<number[]>}
*/
static forNumberArray(inputLength) {
return new Autocorrelator(inputLength, (length) => Array(length));
}
/**
* Constructs a new {@link Autocorrelator} able to handle input arrays of the
* given length.
*
* @param inputLength {number} the input array length to support. This
* `Autocorrelator` will only support operation on arrays of this length.
* @param bufferSupplier {(length: number) => T} the function to use for
* creating buffers, accepting the length of the buffer to create and
* returning a new buffer of that length. The values of the returned buffer
* need not be initialized in any particular way.
*/
constructor(inputLength, bufferSupplier) {
if (inputLength < 1) {
throw new Error(`Input length must be at least one`);
}
this._inputLength = inputLength;
// We need to double the input length to get correct results, and the FFT
// algorithm we use requires a length that's a power of 2
this._fft = new FFT(ceilPow2(2 * inputLength));
this._bufferSupplier = bufferSupplier;
this._paddedInputBuffer = this._bufferSupplier(this._fft.size);
this._transformBuffer = this._bufferSupplier(2 * this._fft.size);
this._inverseBuffer = this._bufferSupplier(2 * this._fft.size);
}
/**
* Returns the supported input length.
*
* @returns {number} the supported input length
*/
get inputLength() {
return this._inputLength;
}
/**
* Autocorrelates the given input data.
*
* @param input {ArrayLike<number>} the input data to autocorrelate
* @param output {T} the output buffer into which to write the autocorrelated
* data. If not provided, a new buffer will be created.
* @returns {T} `output`
*/
autocorrelate(input, output = this._bufferSupplier(input.length)) {
if (input.length !== this._inputLength) {
throw new Error(
`Input must have length ${this._inputLength} but had length ${input.length}`,
);
}
// Step 0: pad the input array with zeros
for (let i = 0; i < input.length; i++) {
this._paddedInputBuffer[i] = input[i];
}
for (let i = input.length; i < this._paddedInputBuffer.length; i++) {
this._paddedInputBuffer[i] = 0;
}
// Step 1: get the DFT of the input array
this._fft.realTransform(this._transformBuffer, this._paddedInputBuffer);
// We need to fill in the right half of the array too
this._fft.completeSpectrum(this._transformBuffer);
// Step 2: multiply each entry by its conjugate
const tb = this._transformBuffer;
for (let i = 0; i < tb.length; i += 2) {
tb[i] = tb[i] * tb[i] + tb[i + 1] * tb[i + 1];
tb[i + 1] = 0;
}
// Step 3: perform the inverse transform
this._fft.inverseTransform(this._inverseBuffer, this._transformBuffer);
// This last result (the inverse transform) contains the autocorrelation
// data, which is completely real
for (let i = 0; i < input.length; i++) {
output[i] = this._inverseBuffer[2 * i];
}
return output;
}
}
/**
* Returns an array of all the key maximum positions in the given input array.
*
* In McLeod's paper, a key maximum is the highest maximum between a positively
* sloped zero crossing and a negatively sloped one.
*
* TODO: it may be more efficient not to construct a new output array each time,
* but that would also make the code more complicated (more so than the changes
* that were needed to remove the other allocations).
*
* @param input {ArrayLike<number>}
* @returns {number[]}
*/
function getKeyMaximumIndices(input) {
// The indices of the key maxima
/** @type {number[]} */ const keyIndices = [];
// Whether the last zero crossing found was positively sloped; equivalently,
// whether we're looking for a key maximum
let lookingForMaximum = false;
// The largest local maximum found so far
let max = -Infinity;
// The index of the largest local maximum so far
let maxIndex = -1;
for (let i = 1; i < input.length - 1; i++) {
if (input[i - 1] <= 0 && input[i] > 0) {
// Positively sloped zero crossing
lookingForMaximum = true;
maxIndex = i;
max = input[i];
} else if (input[i - 1] > 0 && input[i] <= 0) {
// Negatively sloped zero crossing
lookingForMaximum = false;
if (maxIndex !== -1) {
keyIndices.push(maxIndex);
}
} else if (lookingForMaximum && input[i] > max) {
max = input[i];
maxIndex = i;
}
}
return keyIndices;
}
/**
* Refines the chosen key maximum index chosen from the given data by
* interpolating a parabola using the key maximum index and its two neighbors
* and finding the position of that parabola's maximum value.
*
* This is described in section 5 of the MPM paper as a way to refine the
* position of the maximum.
*
* @param index {number} the chosen key maximum index. This must be between `1`
* and `data.length - 2`, inclusive, since it and its two neighbors need to be
* valid indexes of `data`.
* @param data {ArrayLike<number>} the input array from which `index` was chosen
* @returns {[number, number]} a pair consisting of the refined key maximum index and the
* interpolated value of `data` at that index (the latter of which is used as a
* measure of clarity)
*/
function refineResultIndex(index, data) {
const [x0, x1, x2] = [index - 1, index, index + 1];
const [y0, y1, y2] = [data[x0], data[x1], data[x2]];
// The parabola going through the three data points can be written as
// y = y0(x - x1)(x - x2)/(x0 - x1)(x0 - x2)
// + y1(x - x0)(x - x2)/(x1 - x0)(x1 - x2)
// + y2(x - x0)(x - x1)/(x2 - x0)(x2 - x1)
// Given the definitions of x0, x1, and x2, we can simplify the denominators:
// y = y0(x - x1)(x - x2)/2
// - y1(x - x0)(x - x2)
// + y2(x - x0)(x - x1)/2
// We can expand this out and get the coefficients in standard form:
// a = y0/2 - y1 + y2/2
// b = -(y0/2)(x1 + x2) + y1(x0 + x2) - (y2/2)(x0 + x1)
// c = y0x1x2/2 - y1x0x2 + y2x0x1/2
// The index of the maximum is -b / 2a (by solving for x where the derivative
// is 0).
const a = y0 / 2 - y1 + y2 / 2;
const b = -(y0 / 2) * (x1 + x2) + y1 * (x0 + x2) - (y2 / 2) * (x0 + x1);
const c = (y0 * x1 * x2) / 2 - y1 * x0 * x2 + (y2 * x0 * x1) / 2;
const xMax = -b / (2 * a);
const yMax = a * xMax * xMax + b * xMax + c;
return [xMax, yMax];
}
/**
* A class that can detect the pitch of a note from a time-domain input array.
*
* This class uses the McLeod pitch method (MPM) to detect pitches. MPM is
* described in the paper 'A Smarter Way to Find Pitch' by Philip McLeod and
* Geoff Wyvill
* (http://miracle.otago.ac.nz/tartini/papers/A_Smarter_Way_to_Find_Pitch.pdf).
*
* The class holds internal buffers so that a minimal number of additional
* allocations are necessary while performing the operation.
*
* @template {Buffer} T the buffer type to use internally. Inputs to the
* pitch-detection process can be any numeric array type.
*/
export class PitchDetector {
/** @private @type {Autocorrelator<T>} */
_autocorrelator;
/** @private @type {T} */
_nsdfBuffer;
/** @private @type {number} */
_clarityThreshold = 0.9;
/** @private @type {number} */
_minVolumeAbsolute = 0.0;
/** @private @type {number} */
_maxInputAmplitude = 1.0;
/**
* A helper method to create an {@link PitchDetector} using {@link Float32Array} buffers.
*
* @param inputLength {number} the input array length to support
* @returns {PitchDetector<Float32Array>}
*/
static forFloat32Array(inputLength) {
return new PitchDetector(inputLength, (length) => new Float32Array(length));
}
/**
* A helper method to create an {@link PitchDetector} using {@link Float64Array} buffers.
*
* @param inputLength {number} the input array length to support
* @returns {PitchDetector<Float64Array>}
*/
static forFloat64Array(inputLength) {
return new PitchDetector(inputLength, (length) => new Float64Array(length));
}
/**
* A helper method to create an {@link PitchDetector} using `number[]` buffers.
*
* @param inputLength {number} the input array length to support
* @returns {PitchDetector<number[]>}
*/
static forNumberArray(inputLength) {
return new PitchDetector(inputLength, (length) => Array(length));
}
/**
* Constructs a new {@link PitchDetector} able to handle input arrays of the
* given length.
*
* @param inputLength {number} the input array length to support. This
* `PitchDetector` will only support operation on arrays of this length.
* @param bufferSupplier {(inputLength: number) => T} the function to use for
* creating buffers, accepting the length of the buffer to create and
* returning a new buffer of that length. The values of the returned buffer
* need not be initialized in any particular way.
*/
constructor(inputLength, bufferSupplier) {
this._autocorrelator = new Autocorrelator(inputLength, bufferSupplier);
this._nsdfBuffer = bufferSupplier(inputLength);
}
/**
* Returns the supported input length.
*
* @returns {number} the supported input length
*/
get inputLength() {
return this._autocorrelator.inputLength;
}
/**
* Sets the clarity threshold used when identifying the correct pitch (the constant
* `k` from the MPM paper). The value must be between 0 (exclusive) and 1
* (inclusive), with the most suitable range being between 0.8 and 1.
*
* @param threshold {number} the clarity threshold
*/
set clarityThreshold(threshold) {
if (!Number.isFinite(threshold) || threshold <= 0 || threshold > 1) {
throw new Error("clarityThreshold must be a number in the range (0, 1]");
}
this._clarityThreshold = threshold;
}
/**
* Sets the minimum detectable volume, as an absolute number between 0 and
* `maxInputAmplitude`, inclusive, to consider in a sample when detecting the
* pitch. If a sample fails to meet this minimum volume, `findPitch` will
* return a clarity of 0.
*
* Volume is calculated as the RMS (root mean square) of the input samples.
*
* @param volume {number} the minimum volume as an absolute amplitude value
*/
set minVolumeAbsolute(volume) {
if (
!Number.isFinite(volume) ||
volume < 0 ||
volume > this._maxInputAmplitude
) {
throw new Error(
`minVolumeAbsolute must be a number in the range [0, ${this._maxInputAmplitude}]`,
);
}
this._minVolumeAbsolute = volume;
}
/**
* Sets the minimum volume using a decibel measurement. Must be less than or
* equal to 0: 0 indicates the loudest possible sound (see
* `maxInputAmplitude`), -10 is a sound with a tenth of the volume of the
* loudest possible sound, etc.
*
* Volume is calculated as the RMS (root mean square) of the input samples.
*
* @param db {number} the minimum volume in decibels, with 0 being the loudest
* sound
*/
set minVolumeDecibels(db) {
if (!Number.isFinite(db) || db > 0) {
throw new Error("minVolumeDecibels must be a number <= 0");
}
this._minVolumeAbsolute = this._maxInputAmplitude * 10 ** (db / 10);
}
/**
* Sets the maximum amplitude of an input reading. Must be greater than 0.
*
* @param amplitude {number} the maximum amplitude (absolute value) of an input reading
*/
set maxInputAmplitude(amplitude) {
if (!Number.isFinite(amplitude) || amplitude <= 0) {
throw new Error("maxInputAmplitude must be a number > 0");
}
this._maxInputAmplitude = amplitude;
}
/**
* Returns the pitch detected using McLeod Pitch Method (MPM) along with a
* measure of its clarity.
*
* The clarity is a value between 0 and 1 (potentially inclusive) that
* represents how "clear" the pitch was. A clarity value of 1 indicates that
* the pitch was very distinct, while lower clarity values indicate less
* definite pitches.
*
* @param input {ArrayLike<number>} the time-domain input data
* @param sampleRate {number} the sample rate at which the input data was
* collected
* @returns {[number, number]} the detected pitch, in Hz, followed by the
* clarity. If a pitch cannot be determined from the input, such as if the
* volume is too low (see `minVolumeAbsolute` and `minVolumeDecibels`), this
* will be `[0, 0]`.
*/
findPitch(input, sampleRate) {
// If the highest key maximum is less than the minimum volume, we don't need
// to bother detecting the pitch, as the sample is too quiet.
if (this._belowMinimumVolume(input)) return [0, 0];
this._nsdf(input);
const keyMaximumIndices = getKeyMaximumIndices(this._nsdfBuffer);
if (keyMaximumIndices.length === 0) {
// No key maxima means that we either don't have enough data to analyze or
// that the data was flawed (such as an input array of zeroes)
return [0, 0];
}
// The highest key maximum
const nMax = Math.max(...keyMaximumIndices.map((i) => this._nsdfBuffer[i]));
// Following the paper, we return the pitch corresponding to the first key
// maximum higher than K * nMax. This is guaranteed not to be undefined, since
// we know of at least one key maximum satisfying this condition (whichever
// key maximum gave us nMax).
const resultIndex = keyMaximumIndices.find(
(i) => this._nsdfBuffer[i] >= this._clarityThreshold * nMax,
);
const [refinedResultIndex, clarity] = refineResultIndex(
// @ts-expect-error resultIndex is guaranteed to be defined
resultIndex,
this._nsdfBuffer,
);
// Due to floating point errors, the clarity may occasionally come out to be
// slightly over 1.0. We can avoid incorrect results by clamping the value.
return [sampleRate / refinedResultIndex, Math.min(clarity, 1.0)];
}
/**
* Returns whether the input audio data is below the minimum volume allowed by
* the pitch detector.
*
* @private
* @param input {ArrayLike<number>}
* @returns {boolean}
*/
_belowMinimumVolume(input) {
if (this._minVolumeAbsolute === 0) return false;
let squareSum = 0;
for (let i = 0; i < input.length; i++) {
squareSum += input[i] ** 2;
}
return Math.sqrt(squareSum / input.length) < this._minVolumeAbsolute;
}
/**
* Computes the NSDF of the input and stores it in the internal buffer. This
* is equation (9) in the McLeod pitch method paper.
*
* @private
* @param input {ArrayLike<number>}
*/
_nsdf(input) {
// The function r'(tau) is the autocorrelation
this._autocorrelator.autocorrelate(input, this._nsdfBuffer);
// The function m'(tau) (defined in equation (6)) can be computed starting
// with m'(0), which is equal to 2r'(0), and then iteratively modified to
// get m'(1), m'(2), etc. For example, to get m'(1), we take m'(0) and
// subtract x_0^2 and x_{W-1}^2. Then, to get m'(2), we take m'(1) and
// subtract x_1^2 and x_{W-2}^2, and further values are similar (see the
// note at the end of section 6 in the MPM paper).
//
// The resulting array values are 2 * r'(tau) / m'(tau). We use m below as
// the incremental value of m'.
let m = 2 * this._nsdfBuffer[0];
/** @type {number} */ let i;
// As pointed out by issuefiler on GitHub, we can take advantage of the fact
// that m will never increase to avoid division by zero by ending this loop
// once m === 0. The rest of the array values after m becomes 0 will just be
// set to 0 themselves. We actually check for m > 0 rather than m === 0
// because there may be small floating-point errors that cause m to become
// negative rather than exactly 0.
for (i = 0; i < this._nsdfBuffer.length && m > 0; i++) {
this._nsdfBuffer[i] = (2 * this._nsdfBuffer[i]) / m;
m -= input[i] ** 2 + input[input.length - i - 1] ** 2;
}
// If there are any array values remaining, it means m === 0 for those
// values of tau, so we can just set them to 0
for (; i < this._nsdfBuffer.length; i++) {
this._nsdfBuffer[i] = 0;
}
}
}
/**
* Rounds up the input to the next power of 2.
*
* @param {number} v
* @returns {number} the next power of 2 at least as large as `v`
*/
function ceilPow2(v) {
// https://graphics.stanford.edu/~seander/bithacks.html#RoundUpPowerOf2
v--;
v |= v >> 1;
v |= v >> 2;
v |= v >> 4;
v |= v >> 8;
v |= v >> 16;
v++;
return v;
}