Skip to content

Commit 4ddaae6

Browse files
committed
Imported RTP changes 2 from userconcept.
1 parent 9b984e0 commit 4ddaae6

File tree

8 files changed

+264
-68
lines changed

8 files changed

+264
-68
lines changed

Libraries/ecma-rtp/codec.mjs

+76
Original file line numberDiff line numberDiff line change
@@ -15,3 +15,79 @@
1515
* along with ecma-rtp. If not, see <https://www.gnu.org/licenses/>.
1616
*/
1717

18+
19+
import alawmulaw from 'alawmulaw';
20+
import waveResampler from 'wave-resampler';
21+
22+
/**
23+
* Superclass to represent an audio Codec.
24+
*/
25+
class Codec {
26+
constructor () {
27+
this.samples = []
28+
}
29+
30+
transcode () {
31+
32+
}
33+
34+
/**
35+
* We need to do some testing to see how resampling options affect STT because
36+
*
37+
* Cubic without Low Pass Filter:
38+
* generate: 0.283ms
39+
* transcode: 0.144ms
40+
* resample: 0.442ms
41+
*
42+
* Cubic with LPF:
43+
* generate: 0.375ms
44+
* transcode: 0.123ms
45+
* resample: 6.605ms
46+
*
47+
* That's on 20ms audio on an i5 9500. Interestingly it scales x2 per x10 duration,
48+
* so it's not terrible - but if we can shave time off, so much the better
49+
*/
50+
static resampleOld (samples) {
51+
var floatSamples=Float32Array.from(Float32Array.from(samples).map(x=>x/0x8000));
52+
var newSamples = waveResampler.resample(floatSamples, 16000, 8000); // RETURNS A FLOAT64 NOT AN INT16 READ THE DOCS , {method: "cubic", LPF: false}
53+
return Int16Array.from(newSamples.map(x => (x>0 ? x*0x7FFF : x*0x8000)));
54+
}
55+
56+
static resample (samples) {
57+
var decoded = alawmulaw.mulaw.decode(samples);
58+
var resampled = waveResampler.resample(decoded, 16000, 8000, {method: "cubic", LPF: false});
59+
return Int16Array.from(resampled);
60+
}
61+
62+
set samples (samples) {
63+
this.samples.push(this.transcode(samples));
64+
}
65+
66+
get samples () {
67+
return this.samples.pop;
68+
}
69+
}
70+
71+
/**
72+
* Represents mulaw (as used by Combadges)
73+
*/
74+
class μLaw extends Codec {
75+
constructor ({sourceRate = 16000,
76+
sourceDepth = 16,
77+
targetRate = 8000,
78+
targetDepth = 8} = {}) {
79+
super();
80+
}
81+
82+
83+
84+
transcode (samples, callback) {
85+
if (samples instanceof Uint8Array) {
86+
return alawmulaw.mulaw.decode(samples);
87+
} else if (samples instanceof Int16Array) {
88+
return alawmulaw.mulaw.encode(samples);
89+
}
90+
}
91+
}
92+
93+
export { μLaw, Codec }

Libraries/ecma-rtp/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"name": "ecma-rtp",
2+
"name": "@combadge/ecma-rtp",
33
"version": "0.0.5",
44
"type": "module",
55
"description": "An RTP Library for Spin Doctor, written in ECMAScript",

Libraries/ecma-rtp/rtp.mjs

+65-25
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,8 @@
2323

2424

2525
import alawmulaw from 'alawmulaw'; // For now, move to codec.mjs later.
26+
import dgram from 'dgram';
27+
import { Codec } from './codec.mjs';
2628

2729
const Payload = {
2830
Mulaw8K: 0,
@@ -275,14 +277,73 @@ class RTPHeader {
275277
}
276278

277279
/**
278-
* Temporary class - replace with RTPPacket instances when you work out how.
280+
* Should provide a source and sink for RTP Packets. For now, hardcode to the correct values for Combadge audio.
281+
*
282+
* Down the line, we should generalise this to allow the library to be reused elsewhere -
283+
* (since I wouldn't be writing it if there was another approachable JS RTP library)
279284
*/
280-
class RTPStream {
281-
constructor() {
285+
class RTPServer {
286+
constructor(listenAddress, portNumber, consumer = undefined) {
282287
this.sampleCount = new Number(160); // At 8 bits/sample, this can be used for both incrementing the timestamp AND counting bytes.
283288
this.header = new RTPHeader(this.sampleCount);
289+
this._consumer = undefined;
290+
291+
if (portNumber % 2 != 0) {
292+
throw `RTP Ports must be even. Odd-numbered ports are reserved for RTCP. Invalid port ${portNumber} passed.`;
293+
}
294+
295+
if (consumer) {
296+
this._consumer = consumer;
297+
}
298+
299+
this.udpServer = dgram.createSocket('udp4');
300+
this.udpServer.bind(portNumber, listenAddress)
301+
this.udpServer.on('listening', () => {
302+
const address = this.udpServer.address();
303+
console.log(`RTP Server spawned at ${address.address}:${address.port}`);
304+
})
305+
this.udpServer.on('message', (message, clientInfo) => {
306+
var packet = RTPPacket.from(message);
307+
this.receivePacket(packet);
308+
})
309+
310+
/*
311+
this.transcoding = true;
312+
var floatSamples=Float32Array.from(Float32Array.from(this.recSamples).map(x=>x/0x8000));
313+
var newSamples = waveResampler.resample(floatSamples, 8000, 16000); // RETURNS A FLOAT64 NOT AN INT16 READ THE DOCS
314+
var samples1616=Int16Array.from(newSamples.map(x => (x>0 ? x*0x7FFF : x*0x8000)));
315+
var wav16buffer = new Buffer.from(samples1616.buffer);
316+
console.log("Result:", model.stt(wav16buffer));
317+
this.transcoding = false;
318+
*/
319+
}
320+
321+
/**
322+
* Function to pass in a function to receive audio from the server.
323+
*/
324+
set consumer (consumer) {
325+
this._consumer = consumer;
326+
}
327+
328+
get consumer () {
329+
return this._consumer;
330+
}
331+
332+
/** Do something with the packet, then forward it to the Consumer if set, or cache it if not. */
333+
receivePacket (packet) {
334+
var samples = Codec.resampleOld(packet.payload);
335+
this.consumer.receiveSamples(samples);
284336
}
285337

338+
/**
339+
* Send audio to a remote device. We'll be explicit about target, rather than just using send() from the responder
340+
* because otherwise I can already see an exploit where a snooper sends 0-length packets to ensure they're always the most recent address.
341+
* socket.send(msg[, offset, length][, port][, address][, callback])
342+
*/
343+
sendPacket (packet, address, port) {
344+
packetData = packet.toBuffer()
345+
this.udpServer.send(packetData, 0, packetData.length, port, address)
346+
}
286347

287348
/**
288349
* Return a completed media packet without transcoding.
@@ -308,25 +369,4 @@ class RTPStream {
308369
}
309370
}
310371

311-
class MediaQueue {
312-
constructor() {
313-
this.inQueue = new Array();
314-
this.outQueue = new Array();
315-
}
316-
317-
/**
318-
* Add a packet to the queue for RTP decoding
319-
*/
320-
addReceivedPacket (packet, callback) {
321-
return null;
322-
}
323-
324-
/**
325-
* Add a media stream for RTP encoding
326-
*/
327-
addTransmitMedia (samples, callback) {
328-
return null;
329-
}
330-
}
331-
332-
export { Payload, RTPHeader, RTPStream, RTPPacket, MediaQueue };
372+
export { Payload, RTPHeader, RTPPacket, RTPServer };

Libraries/robin-agent/non_verbal.mjs

+92
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,92 @@
1+
/**
2+
* Robin-agent - an interactive voice agent for Spin Doctor
3+
* Copyright (C) 2021-2022 The Combadge Project by mo-g
4+
*
5+
* Robin-agent is free software: you can redistribute it and/or modify
6+
* it under the terms of the GNU Affero General Public License as published by
7+
* the Free Software Foundation, version 3 of the License.
8+
*
9+
* Robin-agent is distributed in the hope that it will be useful,
10+
* but WITHOUT ANY WARRANTY; without even the implied warranty of
11+
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12+
* GNU General Public License for more details.
13+
*
14+
* You should have received a copy of the GNU General Public License
15+
* along with Robin-agent. If not, see <https://www.gnu.org/licenses/>.
16+
*/
17+
18+
/**
19+
* tone({
20+
freq: Tones["E♭6"],
21+
lengthInSecs: 0.2,
22+
volume: tone.MAX_16/2,
23+
rate: 16000,
24+
shape: 'sine'
25+
})
26+
*/
27+
28+
29+
import { Tones, FixedPitch, Silence} from '@combadge/ecma-tonegenerator'
30+
31+
32+
/**
33+
* Superclass for all nonverbal sound generators. By default, we should generate audio in the same codec and profile as Robin uses internally.
34+
*
35+
*
36+
*/
37+
class Sound {
38+
constructor (notes) {
39+
this.tones = notes.map(function (note) {
40+
var tone = new FixedPitch({frequency:Tones[note],
41+
bitDepth: 16,
42+
sampleRate: 16000});
43+
var silence = new Silence({bitDepth: 16,
44+
sampleRate: 16000});
45+
46+
return [tone.approximate({duration:500}), silence.accurate({duration:500})];
47+
});
48+
49+
}
50+
}
51+
52+
/**
53+
* Click, B5, Click, B5, Click, D6
54+
*/
55+
class ListeningChirp extends Sound {
56+
constructor () {
57+
notes = ["B5", "B5", "D6"];
58+
super(notes);
59+
}
60+
61+
get output () {
62+
output = [];
63+
this.tones.forEach(function (tone) {
64+
tone.forEach(function (sample) {
65+
output.push(sample);
66+
})
67+
});
68+
return new Int16Array(output);
69+
}
70+
}
71+
72+
/**
73+
*
74+
*/
75+
class OfflineChirp extends Sound {
76+
constructor () {
77+
notes = ["B5", "B5"];
78+
super(notes);
79+
}
80+
}
81+
82+
/**
83+
* B5, D6, B5
84+
*/
85+
class BotswainWhistle extends Sound {
86+
constructor () {
87+
notes = ["B5", "D6", "B5"];
88+
super(notes);
89+
}
90+
}
91+
92+
export { ListeningChirp, OfflineChirp, BotswainWhistle };

Libraries/robin-agent/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
{
2-
"name": "robin-agent",
2+
"name": "@combadge/robin-agent",
33
"version": "0.0.1",
44
"type": "module",
55
"description": "An interactive voice agent for Spin Doctor.",

Libraries/robin-agent/robin.mjs

+20-15
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,6 @@ const scorerFile = "./huge-vocabulary.scorer";
2525

2626
const vadTimeOut = 300;
2727
const vadAggressiveness = vad.Mode.VERY_AGGRESSIVE;
28-
29-
3028
const vadInstance = new vad(vadAggressiveness);
3129

3230

@@ -39,32 +37,27 @@ console.error('Loaded model.');
3937
/**
4038
* Agent provides an instance of an automated responder to take voice commands and send commands back to badges for action
4139
*
42-
* Currently stubbed.
40+
* The API for this and ecma-rtp should be made more-or-less independent, so you can feed this any source of numbers and it should work - from file, ecma-rtp, or something else.
41+
* We should also (for now) hard code a specific codec and profile, and transcode in the RTP library.
4342
*/
4443
class Agent {
45-
constructor (callback = undefined) {
44+
constructor (callback = undefined, audioResponder = undefined) {
4645
this.samples = [];
4746
this.recSamples = [];
4847
this.transcoding = false;
4948
this._callback = callback;
49+
this._audioResponder = audioResponder;
5050
}
5151

5252
recogniseBatch () {
53-
console.log("Transcoding, please wait.")
54-
this.transcoding = true;
55-
var floatSamples=Float32Array.from(Float32Array.from(this.recSamples).map(x=>x/0x8000));
56-
var newSamples = waveResampler.resample(floatSamples, 8000, 16000); // RETURNS A FLOAT64 NOT AN INT16 READ THE DOCS
57-
var samples1616=Int16Array.from(newSamples.map(x => (x>0 ? x*0x7FFF : x*0x8000)));
58-
var wav16buffer = new Buffer.from(samples1616.buffer);
59-
console.log("Result:", model.stt(wav16buffer));
53+
console.log(this.recSamples);
54+
console.log("Result:", model.stt(new Buffer.from(this.recSamples)));
6055
this.transcoding = false;
6156
}
6257

6358
receiveSamples (samples) {
64-
if (this.transcoding == false) {
65-
this.samples.push(...samples);
66-
}
67-
if (this.samples.length > 50000) {
59+
this.samples.push(...samples);
60+
if (this.samples.length > 60000) {
6861
this.recSamples = this.samples;
6962
this.samples = [];
7063
this.recogniseBatch();
@@ -80,6 +73,18 @@ class Agent {
8073
get callback () {
8174
return this._callback;
8275
}
76+
77+
/**
78+
* Expects a function(Buffer) to pass back audio to, which will be transcoded
79+
* and encapsulated as an RTP packet before sending to the client.
80+
*/
81+
set audioResponder (audioResponder = undefined) {
82+
this._audioResponder = audioResponder;
83+
}
84+
85+
get audioResponder () {
86+
return this._audioResponder;
87+
}
8388
}
8489

8590
export { Agent };

0 commit comments

Comments
 (0)