Skip to content

Commit

Permalink
Only use filtered audio data for VAD, not STT processing
Browse files Browse the repository at this point in the history
  • Loading branch information
kercre123 committed Jun 27, 2024
1 parent 880c53a commit 6d4987b
Showing 1 changed file with 28 additions and 23 deletions.
51 changes: 28 additions & 23 deletions chipper/pkg/wirepod/speechrequest/speechrequest.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,22 +21,23 @@ var debugWriteFile bool = false
var debugFile *os.File

type SpeechRequest struct {
Device string
Session string
FirstReq []byte
Stream interface{}
IsKG bool
IsIG bool
MicData []byte
DecodedMicData []byte
PrevLen int
PrevLenRaw int
InactiveFrames int
ActiveFrames int
VADInst *webrtcvad.VAD
LastAudioChunk []byte
IsOpus bool
OpusStream *opus.OggStream
Device string
Session string
FirstReq []byte
Stream interface{}
IsKG bool
IsIG bool
MicData []byte
DecodedMicData []byte
FilteredMicData []byte
PrevLen int
PrevLenRaw int
InactiveFrames int
ActiveFrames int
VADInst *webrtcvad.VAD
LastAudioChunk []byte
IsOpus bool
OpusStream *opus.OggStream
}

func BytesToSamples(buf []byte) []int16 {
Expand Down Expand Up @@ -67,9 +68,9 @@ func (req *SpeechRequest) OpusDecode(chunk []byte) []byte {
if err != nil {
logger.Println(err)
}
return highPassFilter(n)
return n
} else {
return highPassFilter(chunk)
return chunk
}
}

Expand Down Expand Up @@ -245,8 +246,9 @@ func ReqToSpeechRequest(req interface{}) SpeechRequest {
request.OpusStream = &opus.OggStream{}
decodedFirstReq, _ := request.OpusStream.Decode(request.FirstReq)
request.FirstReq = highPassFilter(decodedFirstReq)
request.DecodedMicData = append(request.DecodedMicData, request.FirstReq...)
request.LastAudioChunk = request.DecodedMicData[request.PrevLen:]
request.FilteredMicData = append(request.FilteredMicData, request.FirstReq...)
request.DecodedMicData = append(request.DecodedMicData, decodedFirstReq...)
request.LastAudioChunk = request.FilteredMicData[request.PrevLen:]
request.PrevLen = len(request.DecodedMicData)
request.IsOpus = true
}
Expand All @@ -265,8 +267,9 @@ func (req *SpeechRequest) GetNextStreamChunk() ([]byte, error) {
}
req.MicData = append(req.MicData, chunk.InputAudio...)
req.DecodedMicData = append(req.DecodedMicData, req.OpusDecode(chunk.InputAudio)...)
req.FilteredMicData = append(req.FilteredMicData, highPassFilter(req.OpusDecode(chunk.InputAudio))...)
dataReturn := req.DecodedMicData[req.PrevLen:]
req.LastAudioChunk = req.DecodedMicData[req.PrevLen:]
req.LastAudioChunk = req.FilteredMicData[req.PrevLen:]
req.PrevLen = len(req.DecodedMicData)
return dataReturn, nil
} else if str, ok := req.Stream.(pb.ChipperGrpc_StreamingIntentGraphServer); ok {
Expand All @@ -278,8 +281,9 @@ func (req *SpeechRequest) GetNextStreamChunk() ([]byte, error) {
}
req.MicData = append(req.MicData, chunk.InputAudio...)
req.DecodedMicData = append(req.DecodedMicData, req.OpusDecode(chunk.InputAudio)...)
req.FilteredMicData = append(req.FilteredMicData, highPassFilter(req.OpusDecode(chunk.InputAudio))...)
dataReturn := req.DecodedMicData[req.PrevLen:]
req.LastAudioChunk = req.DecodedMicData[req.PrevLen:]
req.LastAudioChunk = req.FilteredMicData[req.PrevLen:]
req.PrevLen = len(req.DecodedMicData)
if debugWriteFile {
debugFile.Write(chunk.InputAudio)
Expand All @@ -294,8 +298,9 @@ func (req *SpeechRequest) GetNextStreamChunk() ([]byte, error) {
}
req.MicData = append(req.MicData, chunk.InputAudio...)
req.DecodedMicData = append(req.DecodedMicData, req.OpusDecode(chunk.InputAudio)...)
req.FilteredMicData = append(req.FilteredMicData, highPassFilter(req.OpusDecode(chunk.InputAudio))...)
dataReturn := req.DecodedMicData[req.PrevLen:]
req.LastAudioChunk = req.DecodedMicData[req.PrevLen:]
req.LastAudioChunk = req.FilteredMicData[req.PrevLen:]
req.PrevLen = len(req.DecodedMicData)
return dataReturn, nil
}
Expand Down

0 comments on commit 6d4987b

Please sign in to comment.