-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathcli.py
76 lines (60 loc) · 2.1 KB
/
cli.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
# Python to run Suno-ai's Text-to-Speech API - BARK.
# Thank you for the idea and intial code to play with @
# https://github.com/suno-ai/bark/pull/84
import datetime
from bark import SAMPLE_RATE,generate_audio,preload_models
from scipy.io.wavfile import write as write_wav
import os
import numpy as np
import nltk
# from loguru import logger
import playsound
# Set up loguru logger
# logger.add("/tmp/bark.log", format="{debug}")
nltk.download('punkt')
preload_models(
text_use_small=True,
coarse_use_small=True,
fine_use_small=True,
codec_use_gpu=False,
)
# Set up sample rate (importing instead atm)
# SAMPLE_RATE = 22050
# Set a History Prompt (buggy)
HISTORY_PROMPT = "en_speaker_3"
while True:
# Prompt input
initial_prompt = "WOMAN: " + input(""" Input: """)
# Use a pipe to pass input
# ex: mkfifo mypipe
# python myscript.py &
# echo "hello world" > mypipe
# with open('mypipe', 'r') as f:
# initial_prompt = "WOMAN: " + f.readline().strip()
long_string = initial_prompt
# Tokenize to split strink into chunks for processing
sentences = nltk.sent_tokenize(long_string)
chunks = ['']
token_counter = 0
for sentence in sentences:
current_tokens = len(nltk.Text(sentence))
if token_counter + current_tokens <= 250:
token_counter = token_counter + current_tokens
chunks[-1] = chunks[-1] + " " + sentence
else:
chunks.append(sentence)
token_counter = current_tokens
# Generate audio for each prompt
audio_arrays = []
for prompt in chunks:
audio_array = generate_audio(prompt,history_prompt=HISTORY_PROMPT)
# audio_array = generate_audio(prompt)
audio_arrays.append(audio_array)
# Combine the audio files
combined_audio = np.concatenate(audio_arrays)
# Write the combined audio to a file
timestamp_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"Bark_audio_{timestamp_str}.wav"
write_wav(filename, SAMPLE_RATE, combined_audio)
# play audio using playsound
playsound.playsound(filename)