add: refactored the code + saving endpoints in a json file + transferring of smaller sections into individual functions + enum for voices

Mark · Mark · commit 54139699fccb · 2024-08-23T22:09:11.000+02:00
fix: Error #2: Special characters like emojis will not throw an error if sound is generated from txt file
diff --git a/README.md b/README.md
@@ -5,12 +5,11 @@ This is a simple Python program that gives you an `.mp3` file including the give
 I thank all people that use this for their project. I love to contribute to the community. However, please credit me by using the GitHub project link.
 
 ## Usage
-
 To use this program, you need an internet connection, python 3.6+ and all of the required packages installed.
 To install the required packages, run: `pip3 install -r requirements.txt`
 
 ### Create audio from file
-1. Make sure you have your text in plaintext. You can name it anything
+1. Make sure you have your text in the file plaintext.
 2. Run `py main.py -txt FILENAME.txt -v VOICENAME` (see voices below)
 
 Only latin characters are supported.
@@ -21,27 +20,58 @@ Only latin characters are supported.
 You can have non-latin characters (as long as it has a TTS supported voice).
 
 ### Create audio in python script
-1. Put the file `tiktokvoice.py` into your directory.
-2. Import the text-to-speech function with `from tiktokvoice import tts`.
-3. Execute `tts(TEXT, VOICENAME, OUTPUTFILENAME, PLAYSOUND)` in your code. 
-
-I provided an [example script](https://github.com/GiorDior/TikTok-Voice-TTS/blob/main/examplescript.py) which shows how the tts function could be used in a script.
-
-## Supported languages:
-List of every voice and its designation: [voices](https://github.com/oscie57/tiktok-voice/wiki/Voice-Codes)
-
-- Portuguese (Brazil)
-- German
-- English (Australia)
-- English (United Kingdom)
-- English (United States)
-- English (Disney)
-- Spanish
-- Spanish (Mexico)
-- French
-- Indonesian
-- Japanese
-- Korean
+1. Put the file folder/package `tiktok_voice` into your directory.
+2. Import the text-to-speech function and the voices with `from tiktok_voice import tts, Voice`.
+3. Execute `tts(TEXT, VOICE, OUTPUTFILENAME, PLAYSOUND)` in your code. 
+
+I provided an [example script](https://github.com/GiorDior/TikTok-Voice-TTS/blob/main/example_script.py) which shows how the tts function could be used in a script.
+
+## Voices
+List of every voice and its designation:
+
+| Name                 |
+| -------------------- |
+| GHOSTFACE            |
+| CHEWBACCA            |
+| C3PO                 |
+| STITCH               |
+| STORMTROOPER         |
+| ROCKET               |
+| EN_AU_FEMALE_1       |
+| EN_AU_MALE_1         |
+| EN_UK_MALE_1         |
+| EN_UK_MALE_2         |
+| EN_US_FEMALE_1       |
+| EN_US_FEMALE_2       |
+| EN_US_MALE_1         |
+| EN_US_MALE_2         |
+| EN_US_MALE_3         |
+| EN_US_MALE_4         |
+| FR_MALE_1            |
+| FR_MALE_2            |
+| DE_FEMALE            |
+| DE_MALE              |
+| ES_MALE              |
+| ES_MX_MALE           |
+| BR_FEMALE_1          |
+| BR_FEMALE_2          |
+| BR_FEMALE_3          |
+| BR_MALE              |
+| ID_FEMALE            |
+| JP_FEMALE_1          |
+| JP_FEMALE_2          |
+| JP_FEMALE_3          |
+| JP_MALE              |
+| KR_MALE_1            |
+| KR_FEMALE            |
+| KR_MALE_2            |
+| EN_FEMALE_ALTO       |
+| EN_MALE_TENOR        |
+| EN_FEMALE_WARMY_BREEZE |
+| EN_MALE_SUNSHINE_SOON |
+| EN_MALE_NARRATION    |
+| EN_MALE_FUNNY        |
+| EN_FEMALE_EMOTIONAL  |
 
 ## Samples
 
diff --git a/example_script.py b/example_script.py
@@ -1,11 +1,10 @@
-from tiktokvoice import tts
-
-text = 'Tangerines are smaller and less rounded than the oranges. The taste is considered less sour, as well as sweeter and stronger, than that of an orange. A ripe tangerine is firm to slightly soft, and pebbly-skinned with no deep grooves, as well as orange in color. The peel is thin, with little bitter white mesocarp. All of these traits are shared by mandarins generally. Peak tangerine season lasts from autumn to spring. Tangerines are most commonly peeled and eaten by hand. The fresh fruit is also used in salads, desserts and main dishes. The peel is used fresh or dried as a spice or zest for baking and drinks. Fresh tangerine juice and frozen juice concentrate are commonly available in the United States.'
-voice = "en_us_006"
-
-# arguments:
-#   - input text
-#   - vocie which is used for the audio
-#   - output file name
-#   - play sound after generating the audio
-tts(text, voice, "output.mp3", play_sound=True)
+from tiktok_voice import tts, Voice
+
+text = 'Tangerines are smaller and less rounded than the oranges. The taste is considered less sour, as well as sweeter and stronger, than that of an orange. A ripe tangerine is firm to slightly soft, and pebbly-skinned with no deep grooves, as well as orange in color. The peel is thin, with little bitter white mesocarp. All of these traits are shared by mandarins generally. Peak tangerine season lasts from autumn to spring. Tangerines are most commonly peeled and eaten by hand. The fresh fruit is also used in salads, desserts and main dishes. The peel is used fresh or dried as a spice or zest for baking and drinks. Fresh tangerine juice and frozen juice concentrate are commonly available in the United States.'
+
+# arguments:
+#   - input text
+#   - voice which is used for the audio
+#   - output file name
+#   - play sound after generating the audio
+tts(text, Voice.EN_US_MALE_1, "output.mp3", play_sound=True)
diff --git a/main.py b/main.py
@@ -1,42 +1,40 @@
 # author: Giorgio
-# date: 19.03.2024
+# date: 23.08.2024
 # topic: TikTok-Voice-TTS
-# version: 1.1
+# version: 1.3
 
+from codecs import BOM_UTF32
 import argparse
-
 # the script in the directory
-import tiktokvoice
+from tiktok_voice import tts, Voice
 
 def main():
     # adding arguments
     parser = argparse.ArgumentParser(description='TikTok TTS')
     parser.add_argument('-t', help='text input')
-    parser.add_argument('-v', help='voice selection', choices=tiktokvoice.VOICES)
-    parser.add_argument('-n', help='output filename', default='output.mp3')
-    parser.add_argument('-txt', help='text input from a txt file', type=argparse.FileType('r'))
+    parser.add_argument('-v', help='voice selection')
+    parser.add_argument('-o', help='output filename', default='output.mp3')
+    parser.add_argument('-txt', help='text input from a txt file', type=argparse.FileType('r', encoding="utf-8"))
     parser.add_argument('-play', help='play sound after generating audio', action='store_true')
 
     args = parser.parse_args()
 
     # checking if given values are valid
     if not args.t and not args.txt:
-        print("Error: insert a valid text or txt file")
-        return
+        raise ValueError("insert a valid text or txt file")
 
     if args.t and args.txt:
-        print("Error: only one input type is possible")
-        return
+        raise ValueError("only one input type is possible")
     
-    if not args.v:
-        print("Error: no voice has been selected")
-        return
+    voice: Voice | None = Voice.from_string(args.v)
+    if voice == None:
+        raise ValueError("no valid voice has been selected")
 
     # executing script
     if args.t:
-        tiktokvoice.tts(args.t, args.v, args.n, args.play)
+        tts(args.t, voice, args.o, args.play)
     elif args.txt:
-        tiktokvoice.tts(args.txt.read(), args.v, args.n, args.play)
+        tts(args.txt.read(), voice, args.o, args.play)
 
 if __name__ == "__main__":
     main()
diff --git a/tiktok_voice/__init__.py b/tiktok_voice/__init__.py
@@ -0,0 +1,2 @@
+from .src.text_to_speech import tts
+from .src.voice import Voice
diff --git a/tiktok_voice/data/config.json b/tiktok_voice/data/config.json
@@ -0,0 +1,14 @@
+[
+    {
+        "url": "https://tiktok-tts.weilnet.workers.dev/api/generation",
+        "response": "data"
+    },
+    {
+        "url": "https://countik.com/api/text/speech",
+        "response": "v_data"
+    },
+    {
+        "url": "https://gesserit.co/api/tiktok-tts",
+        "response": "base64"
+    }
+]
diff --git a/tiktok_voice/src/text_to_speech.py b/tiktok_voice/src/text_to_speech.py
@@ -0,0 +1,130 @@
+# Python standard modules
+import os
+import requests
+import base64
+import re
+from json import load
+from threading import Thread
+from typing import Dict, List, Optional
+
+# Downloaded modules
+from playsound import playsound
+
+# Local files
+from .voice import Voice
+
+def tts(
+    text: str,
+    voice: Voice,
+    output_file_path: str = "output.mp3",
+    play_sound: bool = False
+):
+    """Main function to convert text to speech and save to a file."""
+    
+    # Validate input arguments
+    _validate_args(text, voice)
+
+    # Load endpoint data from the endpoints.json file
+    endpoint_data: List[Dict[str, str]] = _load_endpoints()
+    
+
+    # Iterate over endpoints to find a working one
+    for endpoint in endpoint_data:
+        # Generate audio bytes from the current endpoint
+        audio_bytes: bytes = _fetch_audio_bytes(endpoint, text, voice)
+        
+        if audio_bytes:
+            # Save the generated audio to a file
+            _save_audio_file(output_file_path, audio_bytes)
+        
+            # Optionally play the audio file
+            if play_sound:
+                playsound(output_file_path)
+            
+            # Stop after processing a valid endpoint
+            break
+
+def _save_audio_file(output_file_path: str, audio_bytes: bytes):
+    """Write the audio bytes to a file."""
+    if os.path.exists(output_file_path):
+        os.remove(output_file_path)
+        
+    with open(output_file_path, "wb") as file:
+        file.write(audio_bytes)
+
+def _fetch_audio_bytes(
+    endpoint: Dict[str, str],
+    text: str,
+    voice: Voice
+) -> Optional[bytes]:
+    """Fetch audio data from an endpoint and decode it."""
+    
+    # Initialize variables for endpoint validity and audio data
+    text_chunks: List[str] = _split_text(text)
+    audio_chunks: List[str] = ["" for _ in range(len(text_chunks))]
+
+    # Function to generate audio for each text chunk
+    def generate_audio_chunk(index: int, text_chunk: str):
+        try:
+            response = requests.post(endpoint["url"], json={"text": text_chunk, "voice": voice.value})
+            response.raise_for_status()
+            audio_chunks[index] = response.json()[endpoint["response"]]
+        except (requests.RequestException, KeyError):
+            return
+
+    # Start threads for generating audio for each chunk
+    threads = [Thread(target=generate_audio_chunk, args=(i, chunk)) for i, chunk in enumerate(text_chunks)]
+    for thread in threads:
+        thread.start()
+
+    for thread in threads:
+        thread.join()
+
+    if any(not chunk for chunk in audio_chunks):
+        return None
+
+    # Concatenate and decode audio data from all chunks
+    return base64.b64decode("".join(audio_chunks))
+
+def _load_endpoints() -> List[Dict[str, str]]:
+    """Load endpoint configurations from a JSON file."""
+    script_dir = os.path.dirname(__file__)
+    json_file_path = os.path.join(script_dir, '../data', 'config.json')
+    with open(json_file_path, 'r') as file:
+        return load(file)
+
+def _validate_args(text: str, voice: Voice):
+    """Validate the input arguments."""
+    
+    # Check if the voice is of the correct type
+    if not isinstance(voice, Voice):
+        raise TypeError("'voice' must be of type Voice")
+    
+    # Check if the text is not empty
+    if not text:
+        raise ValueError("text must not be empty")
+
+def _split_text(text: str) -> List[str]:
+    """Split text into chunks of 300 characters or less."""
+    
+    # Split text into chunks based on punctuation marks
+    merged_chunks: List[str] = []
+    separated_chunks: List[str] = re.findall(r'.*?[.,!?:;-]|.+', text)
+
+    # Further split any chunks longer than 300 characters
+    for i, chunk in enumerate(separated_chunks):
+        if len(chunk) > 300:
+            separated_chunks[i:i+1] = re.findall(r'.*?[ ]|.+', chunk) 
+
+    # Combine chunks into segments of 300 characters or less
+    current_chunk: str = ""
+    for separated_chunk in separated_chunks:
+        if len(current_chunk) + len(separated_chunk) <= 300:
+            current_chunk += separated_chunk
+        else:
+            merged_chunks.append(current_chunk)
+            current_chunk = separated_chunk
+
+    # Append the last chunk
+    merged_chunks.append(current_chunk)
+    return merged_chunks
diff --git a/tiktok_voice/src/voice.py b/tiktok_voice/src/voice.py
@@ -0,0 +1,65 @@
+# author: Giorgio
+# date: 23.08.2024
+# topic: TikTok-Voice-TTS
+# version: 1.3
+
+from enum import Enum
+
+# Enum to define available voices for text-to-speech conversion
+class Voice(Enum):
+    # DISNEY VOICES
+    GHOSTFACE = 'en_us_ghostface'
+    CHEWBACCA = 'en_us_chewbacca'
+    C3PO = 'en_us_c3po'
+    STITCH = 'en_us_stitch'
+    STORMTROOPER = 'en_us_stormtrooper'
+    ROCKET = 'en_us_rocket'
+    # ENGLISH VOICES
+    EN_AU_FEMALE_1 = 'en_au_001'
+    EN_AU_MALE_1 = 'en_au_002'
+    EN_UK_MALE_1 = 'en_uk_001'
+    EN_UK_MALE_2 = 'en_uk_003'
+    EN_US_FEMALE_1 = 'en_us_001'
+    EN_US_FEMALE_2 = 'en_us_002'
+    EN_US_MALE_1 = 'en_us_006'
+    EN_US_MALE_2 = 'en_us_007'
+    EN_US_MALE_3 = 'en_us_009'
+    EN_US_MALE_4 = 'en_us_010'
+    # EUROPE VOICES
+    FR_MALE_1 = 'fr_001'
+    FR_MALE_2 = 'fr_002'
+    DE_FEMALE = 'de_001'
+    DE_MALE = 'de_002'
+    ES_MALE = 'es_002'
+    # AMERICA VOICES
+    ES_MX_MALE = 'es_mx_002'
+    BR_FEMALE_1 = 'br_001'
+    BR_FEMALE_2 = 'br_003'
+    BR_FEMALE_3 = 'br_004'
+    BR_MALE = 'br_005'
+    # ASIA VOICES
+    ID_FEMALE = 'id_001'
+    JP_FEMALE_1 = 'jp_001'
+    JP_FEMALE_2 = 'jp_003'
+    JP_FEMALE_3 = 'jp_005'
+    JP_MALE = 'jp_006'
+    KR_MALE_1 = 'kr_002'
+    KR_FEMALE = 'kr_003'
+    KR_MALE_2 = 'kr_004'
+    # SINGING VOICES
+    EN_FEMALE_ALTO = 'en_female_f08_salut_damour'
+    EN_MALE_TENOR = 'en_male_m03_lobby'
+    EN_FEMALE_WARMY_BREEZE = 'en_female_f08_warmy_breeze'
+    EN_MALE_SUNSHINE_SOON = 'en_male_m03_sunshine_soon'
+    # OTHER
+    EN_MALE_NARRATION = 'en_male_narration'
+    EN_MALE_FUNNY = 'en_male_funny'
+    EN_FEMALE_EMOTIONAL = 'en_female_emotional'
+
+    # Function to check if a string matches any enum member name
+    def from_string(input_string: str):
+        # Iterate over all enum members
+        for voice in Voice:
+            if voice.name == input_string:
+                return voice
+        return None
diff --git a/tiktokvoice.py b/tiktokvoice.py

Original file line number	Diff line number	Diff line change
`@@ -0,0 +1,2 @@`
	`1`	`+from .src.text_to_speech import tts`
	`2`	`+from .src.voice import Voice`