Skip to content

Commit

Permalink
Merge pull request #351 from kercre123/highpass-filter
Browse files Browse the repository at this point in the history
Remove soxr
  • Loading branch information
kercre123 authored Jun 27, 2024
2 parents 03ed9e1 + a86d2ca commit 880c53a
Show file tree
Hide file tree
Showing 7 changed files with 27 additions and 52 deletions.
3 changes: 0 additions & 3 deletions chipper/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ require (
github.com/akavel/rsrc v0.10.2 // indirect
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc // indirect
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf // indirect
github.com/alphacep/vosk-api/go v0.3.50 // indirect
github.com/cenkalti/backoff v2.2.1+incompatible // indirect
github.com/currantlabs/ble v0.0.0-20171229162446-c1d21c164cf8 // indirect
github.com/dchest/jsmin v0.0.0-20220218165748-59f39799265f // indirect
Expand All @@ -49,7 +48,6 @@ require (
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/jamesruan/sodium v0.0.0-20181216154042-9620b83ffeae // indirect
github.com/josephspurrier/goversioninfo v1.4.0 // indirect
github.com/kercre123/vosk-api v1.0.1 // indirect
github.com/kr/text v0.2.0 // indirect
github.com/magiconair/properties v1.8.1 // indirect
github.com/mattn/go-colorable v0.1.8 // indirect
Expand All @@ -67,7 +65,6 @@ require (
github.com/spf13/pflag v1.0.5 // indirect
github.com/spf13/viper v1.7.1 // indirect
github.com/subosito/gotenv v1.2.0 // indirect
github.com/zaf/resample v1.5.0 // indirect
golang.org/x/image v0.10.0 // indirect
golang.org/x/net v0.16.0 // indirect
golang.org/x/sys v0.15.0 // indirect
Expand Down
8 changes: 0 additions & 8 deletions chipper/go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -53,8 +53,6 @@ github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc h1:cAKDfWh5Vpd
github.com/alecthomas/template v0.0.0-20160405071501-a0175ee3bccc/go.mod h1:LOuyumcjzFXgccqObfd/Ljyb9UuFJ6TxHnclSeseNhc=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf h1:qet1QNfXsQxTZqLG4oE62mJzwPIB8+Tee4RNCL9ulrY=
github.com/alecthomas/units v0.0.0-20151022065526-2efee857e7cf/go.mod h1:ybxpYRFXyAe+OPACYpWeL0wqObRcbAqCMya13uyzqw0=
github.com/alphacep/vosk-api/go v0.3.50 h1:2vSN41RCU1WdHEqBrhKtTggfKL6Yu5Dmj+urVszwiuw=
github.com/alphacep/vosk-api/go v0.3.50/go.mod h1:9X8IJsHnFk/b1xyvjlZifo+ZL5VTAx3LW+JQce/eRcA=
github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY=
github.com/arbovm/levenshtein v0.0.0-20160628152529-48b4e1c0c4d0/go.mod h1:t2tdKJDJF9BV14lnkjHmOQgcvEKgtqs5a1N3LNdJhGE=
github.com/armon/circbuf v0.0.0-20150827004946-bbbad097214e/go.mod h1:3U/XgcO3hCbHZ8TKRvWD2dDTCfh9M9ya+I9JpbB7O8o=
Expand Down Expand Up @@ -138,8 +136,6 @@ github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMo
github.com/fsnotify/fsnotify v1.4.9 h1:hsms1Qyu0jgnwNXIxa+/V/PDsU6CfLf6CNO8H7IWoS4=
github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ=
github.com/fsouza/go-dockerclient v1.6.6/go.mod h1:3/oRIWoe7uT6bwtAayj/EmJmepBjeL4pYvt7ZxC7Rnk=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20231214200047-940de9dbe9c9 h1:B3bFAoLQ8y4RFNR8A4GjkCiKzuUIwblYUiIgRgWr604=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20231214200047-940de9dbe9c9/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240618151033-bf4cb4abad4e h1:np99/bjGH4/khEujoGbwc0ohMLh32GjovhEv2mJRNfs=
github.com/ggerganov/whisper.cpp/bindings/go v0.0.0-20240618151033-bf4cb4abad4e/go.mod h1:QIjZ9OktHFG7p+/m3sMvrAJKKdWrr1fZIK0rM6HZlyo=
github.com/ghodss/yaml v1.0.0/go.mod h1:4dBDuWmgqj2HViK6kFavaiC9ZROes6MMH2rRYeMEF04=
Expand Down Expand Up @@ -308,8 +304,6 @@ github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7V
github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw=
github.com/karrick/godirwalk v1.8.0/go.mod h1:H5KPZjojv4lE+QYImBI8xVtrBRgYrIVsaRPx4tDPEn4=
github.com/karrick/godirwalk v1.10.3/go.mod h1:RoGL9dQei4vP9ilrpETWE8CLOZ1kiN0LhBygSwrAsHA=
github.com/kercre123/vosk-api v1.0.1 h1:D5CeAMNHPj87M9fKrqP+a2gEQefq7sJCpaiuRscbiJY=
github.com/kercre123/vosk-api v1.0.1/go.mod h1:mJlLhtYS207jVY9QffYGxhX6Up0UfSQ3p0uNbXsf3Zc=
github.com/kercre123/vosk-api/go v1.0.2 h1:NDJUNv2ddw128amiVZ2xE2gKfKHeBRRhboSh+yiH6Wg=
github.com/kercre123/vosk-api/go v1.0.2/go.mod h1:oVZG/VFmg23uNDzjShcw7UhZHWYG2zXgBm5FqioE2Ao=
github.com/kercre123/zeroconf v1.0.1 h1:Mbd8mN6xnNWYIqBN38x3jJjiPP2RmK4orzbGZsa1EOY=
Expand Down Expand Up @@ -495,8 +489,6 @@ github.com/yuin/goldmark v1.1.25/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9de
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.1.32/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
github.com/zaf/resample v1.5.0 h1:c3yumHrV1cJoED8ZY2Ai3cehS8s0mJSroA9/vMaUcho=
github.com/zaf/resample v1.5.0/go.mod h1:e4yWalfgRccQrnZSrkIxTqmMCOPhTi1xvYpNpRIB13k=
go.etcd.io/bbolt v1.3.2/go.mod h1:IbVyRI1SCnLcuJnV2u8VeU0CEYM7e686BmAb1XKL+uU=
go.mongodb.org/mongo-driver v1.4.2/go.mod h1:WcMNYLx/IlOxLe6JRJiv2uXuCz6zBLndR4SoGjYphSc=
go.opencensus.io v0.21.0/go.mod h1:mSImk1erAIZhrmZN+AvHh14ztQfjbGwt4TtuofqLduU=
Expand Down
42 changes: 13 additions & 29 deletions chipper/pkg/wirepod/ttr/convert.go
Original file line number Diff line number Diff line change
@@ -1,12 +1,8 @@
package wirepod_ttr

// TODO
import (
"bytes"
"encoding/binary"
"math"

"github.com/zaf/resample"
)

func bytesToInt16s(data []byte) []int16 {
Expand All @@ -25,16 +21,20 @@ func int16sToBytes(data []int16) []byte {
return bytes
}

// entry-point (go, not default)
func downsample24kTo16k(input []byte) [][]byte {
iVolBytes := increaseVolume(input, 6)
outBytes := downsample24kTo16kLinear(iVolBytes)
outBytes := downsample24kTo16kLinear(input)
var audioChunks [][]byte
// the "s" sounds are harsh. put through filter
//filteredBytes := lowPassFilter(outBytes, 4000, 16000)
for len(outBytes) >= 1024 {
audioChunks = append(audioChunks, outBytes[:1024])
outBytes = outBytes[1024:]
filteredBytes := lowPassFilter(outBytes, 4000, 16000)
iVolBytes := increaseVolume(filteredBytes, 5)
for len(iVolBytes) > 0 {
if len(iVolBytes) < 1024 {
chunk := make([]byte, 1024)
copy(chunk, iVolBytes)
audioChunks = append(audioChunks, chunk)
break
}
audioChunks = append(audioChunks, iVolBytes[:1024])
iVolBytes = iVolBytes[1024:]
}

return audioChunks
Expand Down Expand Up @@ -73,7 +73,7 @@ func lowPassFilter(data []byte, cutoffFreq float64, sampleRate int) []byte {
return int16sToBytes(filtered)
}

// technically copied too
// copied too
func downsample24kTo16kLinear(input []byte) []byte {
int16s := bytesToInt16s(input)
outputLength := (len(int16s) * 2) / 3
Expand All @@ -90,19 +90,3 @@ func downsample24kTo16kLinear(input []byte) []byte {

return int16sToBytes(output)
}

// entry-point (soxr)
func downsampleAudioSoxr(input []byte) [][]byte {
newBytes := new(bytes.Buffer)
dec, _ := resample.New(newBytes, 24000, 16000, 1, resample.I16, resample.VeryHighQ)
dec.Write(input)
var audioChunks [][]byte
decodedBytes := newBytes.Bytes()
filteredBytes := lowPassFilter(decodedBytes, 4000, 16000)
iVolBytes := increaseVolume(filteredBytes, 5)
for len(iVolBytes) >= 1024 {
audioChunks = append(audioChunks, iVolBytes[:1024])
iVolBytes = iVolBytes[1024:]
}
return audioChunks
}
17 changes: 9 additions & 8 deletions chipper/pkg/wirepod/ttr/kgsim_cmds.go
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,7 @@ type LLMCommand struct {
var ValidLLMCommands []LLMCommand = []LLMCommand{
{
Command: "playAnimationWI",
Description: "Plays an animation on the robot without interrupting speech. This should be used FAR more than the playAnimation command. This is great for storytelling and making any normal response animated. Don't put two of these right next to each other. Use this MANY times. The param choices are the onlyy choices you have. You can't create any.",
Description: "Plays an animation on the robot without interrupting speech. This should be used FAR more than the playAnimation command. This is great for storytelling and making any normal response animated. Don't put two of these right next to each other. Use this MANY times. The param choices are the only choices you have. You can't create any.",
ParamChoices: "happy, veryHappy, sad, verySad, angry, frustrated, dartingEyes, confused, thinking, celebrate, love",
Action: ActionPlayAnimationWI,
SupportedModels: []string{"all"},
Expand Down Expand Up @@ -307,6 +307,9 @@ func getOpenAIVoice(voice string) openai.SpeechVoice {

// TODO
func DoSayText_OpenAI(robot *vector.Vector, input string) error {
if strings.TrimSpace(input) == "" {
return nil
}
openaiVoice := getOpenAIVoice(vars.APIConfig.Knowledge.OpenAIPrompt)
// if vars.APIConfig.Knowledge.OpenAIVoice == "" {
// openaiVoice = openai.VoiceFable
Expand All @@ -318,7 +321,7 @@ func DoSayText_OpenAI(robot *vector.Vector, input string) error {
Model: openai.TTSModel1,
Input: input,
Voice: openaiVoice,
ResponseFormat: "pcm",
ResponseFormat: openai.SpeechResponseFormatPcm,
})
if err != nil {
logger.Println(err)
Expand All @@ -338,12 +341,8 @@ func DoSayText_OpenAI(robot *vector.Vector, input string) error {
},
})
//time.Sleep(time.Millisecond * 30)
var audioChunks [][]byte
if os.Getenv("USE_GO_DESAMPLE") == "true" {
audioChunks = downsample24kTo16k(speechBytes)
} else {
audioChunks = downsampleAudioSoxr(speechBytes)
}
audioChunks := downsample24kTo16k(speechBytes)

var chunksToDetermineLength []byte
for _, chunk := range audioChunks {
chunksToDetermineLength = append(chunksToDetermineLength, chunk...)
Expand All @@ -366,6 +365,8 @@ func DoSayText_OpenAI(robot *vector.Vector, input string) error {
},
})
}()
os.WriteFile("../../pre-process.pcm", speechBytes, 0777)
os.WriteFile("../../post-process.pcm", chunksToDetermineLength, 0777)
time.Sleep(pcmLength(chunksToDetermineLength) + (time.Millisecond * 50))
return nil
}
Expand Down
1 change: 1 addition & 0 deletions chipper/webroot/js/main.js
Original file line number Diff line number Diff line change
Expand Up @@ -347,6 +347,7 @@ function updateKGAPI() {
if (data.provider === "openai") {
getE("openaiKey").value = data.key;
getE("openAIPrompt").value = data.openai_prompt;
getE("openaiVoice").value = data.openai_voice;
getE("commandYes").checked = data.commands_enable
getE("intentyes").checked = data.intentgraph
getE("saveChatYes").checked = data.save_chat
Expand Down
2 changes: 1 addition & 1 deletion dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ FROM ubuntu

COPY . .

RUN chmod +x /setup.sh && apt-get update && apt-get install -y dos2unix && dos2unix /setup.sh && apt-get install -y avahi-daemon avahi-autoipd libsoxr0
RUN chmod +x /setup.sh && apt-get update && apt-get install -y dos2unix && dos2unix /setup.sh && apt-get install -y avahi-daemon avahi-autoipd

RUN ["/bin/sh", "-c", "STT=vosk ./setup.sh"]

Expand Down
6 changes: 3 additions & 3 deletions setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -90,13 +90,13 @@ function getPackages() {
echo "Installing required packages"
if [[ ${TARGET} == "debian" ]]; then
apt update -y
apt install -y wget openssl net-tools libsox-dev libopus-dev make iproute2 xz-utils libopusfile-dev pkg-config gcc curl g++ unzip avahi-daemon git libasound2-dev libsodium-dev libsoxr-dev
apt install -y wget openssl net-tools libsox-dev libopus-dev make iproute2 xz-utils libopusfile-dev pkg-config gcc curl g++ unzip avahi-daemon git libasound2-dev libsodium-dev
elif [[ ${TARGET} == "arch" ]]; then
pacman -Sy --noconfirm
sudo pacman -S --noconfirm wget openssl net-tools sox opus make iproute2 opusfile curl unzip avahi git libsodium go pkg-config libsoxr
sudo pacman -S --noconfirm wget openssl net-tools sox opus make iproute2 opusfile curl unzip avahi git libsodium go pkg-config
elif [[ ${TARGET} == "fedora" ]]; then
dnf update
dnf install -y wget openssl net-tools sox opus make opusfile curl unzip avahi git libsodium-devel soxr-devel
dnf install -y wget openssl net-tools sox opus make opusfile curl unzip avahi git libsodium-devel
elif [[ ${TARGET} == "darwin" ]]; then
sudo -u $SUDO_USER brew update
sudo -u $SUDO_USER brew install wget pkg-config opus opusfile
Expand Down

0 comments on commit 880c53a

Please sign in to comment.