Plachtaa · RoversCode · Feb 8, 2025 · Feb 9, 2025 · Feb 9, 2025 · Feb 9, 2025
diff --git a/.gitignore b/.gitignore
@@ -21,3 +21,7 @@ reconstructed/
 .python-version
 ruff.log
 /configs/inuse/
+modeldata
+dataset
+runs
+venv
diff --git a/README.txt b/README.txt
@@ -0,0 +1,23 @@
+Seed-VC 语音转换工具
+
+使用前准备:
+1. 安装 Python (3.10 或更高版本)
+   - 访问 https://www.python.org/downloads/
+   - 下载并安装 Python
+   - 安装时请勾选 "Add Python to PATH"
+
+2. 安装 NVIDIA 显卡驱动 (如果有NVIDIA显卡)
+   - 访问 https://www.nvidia.com/download/index.aspx
+   - 下载并安装适合您显卡的最新驱动
+
+使用方法:
+1. 双击 start.bat
+2. 首次运行会自动安装所需环境
+3. 在启动器中选择转换模式
+4. 可选择自定义模型和配置文件
+5. 点击启动开始使用
+
+注意事项:
+- 首次运行需要下载依赖，请保持网络连接
+- 建议使用NVIDIA显卡以获得最佳性能
+- 如遇到问题，请查看错误提示或联系技术支持
diff --git a/app_svc.py b/app_svc.py
@@ -10,20 +10,26 @@
 import numpy as np
 from pydub import AudioSegment
 import argparse
+from pathlib import Path
 # Load model and configuration
 
 fp16 = False
 device = None
 def load_models(args):
     global sr, hop_length, fp16
     fp16 = args.fp16
+    ckpt_root = Path(__file__).parent / "checkpoints"
     print(f"Using device: {device}")
     print(f"Using fp16: {fp16}")
     # f0 conditioned model
     if args.checkpoint_path is None or args.checkpoint_path == "":
-        dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
-                                                                         "DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth",
-                                                                         "config_dit_mel_seed_uvit_whisper_base_f0_44k.yml")
+        # 检测模型
+        dit_checkpoint_path = ckpt_root / "seed_vc" / "svc_model" / "DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth"
+        dit_config_path = ckpt_root / "seed_vc" / "svc_model" / "config_dit_mel_seed_uvit_whisper_base_f0_44k.yml"
+        if not dit_checkpoint_path.exists() or not dit_config_path.exists():
+            dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
+                                                                            "DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth",
+                                                                            "config_dit_mel_seed_uvit_whisper_base_f0_44k.yml")
     else:
         print(f"Using custom checkpoint: {args.checkpoint_path}")
         dit_checkpoint_path = args.checkpoint_path
@@ -52,9 +58,11 @@ def load_models(args):
     # Load additional modules
     from modules.campplus.DTDNN import CAMPPlus
 
-    campplus_ckpt_path = load_custom_model_from_hf(
-        "funasr/campplus", "campplus_cn_common.bin", config_filename=None
-    )
+    campplus_ckpt_path = ckpt_root / "campplus" / "campplus_cn_common.bin"
+    if not campplus_ckpt_path.exists():
+        campplus_ckpt_path = load_custom_model_from_hf(
+            "funasr/campplus", "campplus_cn_common.bin", config_filename=None
+        )
     campplus_model = CAMPPlus(feat_dim=80, embedding_size=192)
     campplus_model.load_state_dict(torch.load(campplus_ckpt_path, map_location="cpu"))
     campplus_model.eval()
@@ -75,8 +83,7 @@ def load_models(args):
         from modules.hifigan.f0_predictor import ConvRNNF0Predictor
         hift_config = yaml.safe_load(open('configs/hifigan.yml', 'r'))
         hift_gen = HiFTGenerator(**hift_config['hift'], f0_predictor=ConvRNNF0Predictor(**hift_config['f0_predictor']))
-        hift_path = load_custom_model_from_hf("FunAudioLLM/CosyVoice-300M", 'hift.pt', None)
-        hift_gen.load_state_dict(torch.load(hift_path, map_location='cpu'))
+        hift_gen.load_state_dict(torch.load(str(ckpt_root / "cosy_hifigan" / "hift.pt"), map_location='cpu'))
         hift_gen.eval()
         hift_gen.to(device)
         vocoder_fn = hift_gen
@@ -198,9 +205,10 @@ def semantic_fn(waves_16k):
     to_mel = lambda x: mel_spectrogram(x, **mel_fn_args)
     # f0 extractor
     from modules.rmvpe import RMVPE
-
-    model_path = load_custom_model_from_hf("lj1995/VoiceConversionWebUI", "rmvpe.pt", None)
-    rmvpe = RMVPE(model_path, is_half=False, device=device)
+    rmvpe_path = ckpt_root / "rmvpe" / "rmvpe.pt"
+    if not rmvpe_path.exists():
+        rmvpe_path = load_custom_model_from_hf("lj1995/VoiceConversionWebUI", "rmvpe.pt", None)
+    rmvpe = RMVPE(rmvpe_path, is_half=False, device=device)
     f0_fn = rmvpe.infer_from_audio
 
     return (

diff --git a/app_vc.py b/app_vc.py
@@ -9,18 +9,25 @@
 from hf_utils import load_custom_model_from_hf
 import numpy as np
 from pydub import AudioSegment
+from modules.campplus.DTDNN import CAMPPlus
 import argparse
+from pathlib import Path
 
 # Load model and configuration
 fp16 = False
 device = None
 def load_models(args):
     global sr, hop_length, fp16
     fp16 = args.fp16
+    ckpt_root = Path(__file__).parent / "checkpoints"
     print(f"Using device: {device}")
     print(f"Using fp16: {fp16}")
     if args.checkpoint_path is None or args.checkpoint_path == "":
-        dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
+        # 检测模型
+        dit_checkpoint_path = ckpt_root / "seed_vc" / "vc_model" / "DiT_seed_v2_uvit_whisper_small_wavenet_bigvgan_pruned.pth"
+        dit_config_path = ckpt_root / "seed_vc" / "vc_model" / "config_dit_mel_seed_uvit_whisper_small_wavenet.yml"
+        if not dit_checkpoint_path.exists() or not dit_config_path.exists():
+            dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
                                                                          "DiT_seed_v2_uvit_whisper_small_wavenet_bigvgan_pruned.pth",
                                                                          "config_dit_mel_seed_uvit_whisper_small_wavenet.yml")
     else:
@@ -47,12 +54,13 @@ def load_models(args):
         model[key].to(device)
     model.cfm.estimator.setup_caches(max_batch_size=1, max_seq_length=8192)
 
-    # Load additional modules
-    from modules.campplus.DTDNN import CAMPPlus
 
-    campplus_ckpt_path = load_custom_model_from_hf(
-        "funasr/campplus", "campplus_cn_common.bin", config_filename=None
-    )
+    # 检测campplus_ckpt_path
+    campplus_ckpt_path = ckpt_root / "campplus" / "campplus_cn_common.bin"
+    if not campplus_ckpt_path.exists():
+        campplus_ckpt_path = load_custom_model_from_hf(
+            "funasr/campplus", "campplus_cn_common.bin", config_filename=None
+        )
     campplus_model = CAMPPlus(feat_dim=80, embedding_size=192)
     campplus_model.load_state_dict(torch.load(campplus_ckpt_path, map_location="cpu"))
     campplus_model.eval()
@@ -73,8 +81,7 @@ def load_models(args):
         from modules.hifigan.f0_predictor import ConvRNNF0Predictor
         hift_config = yaml.safe_load(open('configs/hifigan.yml', 'r'))
         hift_gen = HiFTGenerator(**hift_config['hift'], f0_predictor=ConvRNNF0Predictor(**hift_config['f0_predictor']))
-        hift_path = load_custom_model_from_hf("FunAudioLLM/CosyVoice-300M", 'hift.pt', None)
-        hift_gen.load_state_dict(torch.load(hift_path, map_location='cpu'))
+        hift_gen.load_state_dict(torch.load(str(ckpt_root / "cosy_hifigan" / "hift.pt"), map_location='cpu'))
         hift_gen.eval()
         hift_gen.to(device)
         vocoder_fn = hift_gen

diff --git a/campplus_cn_common.bin b/campplus_cn_common.bin
diff --git a/conda-nix-vc-py310.yaml b/conda-nix-vc-py310.yaml
diff --git a/examples/reference/vo_card_yaeMiko_endOfGame_win_01.wav b/examples/reference/vo_card_yaeMiko_endOfGame_win_01.wav
diff --git a/examples/reference/vo_card_yaeMiko_invite_easy_03.wav b/examples/reference/vo_card_yaeMiko_invite_easy_03.wav
diff --git a/examples/reference/vo_dialog_DQAQ109_yaeMiko_01.wav b/examples/reference/vo_dialog_DQAQ109_yaeMiko_01.wav
diff --git a/examples/reference/vo_dialog_SDEQ004_yaeMiko_01.wav b/examples/reference/vo_dialog_SDEQ004_yaeMiko_01.wav
diff --git a/examples/reference/vo_dialog_SGLQ001_yaeMiko_02.wav b/examples/reference/vo_dialog_SGLQ001_yaeMiko_02.wav
diff --git a/examples/reference/vo_dialog_YMLQ004_yaeMiko_01.wav b/examples/reference/vo_dialog_YMLQ004_yaeMiko_01.wav
diff --git a/ft.txt b/ft.txt
@@ -0,0 +1,19 @@
+python app_svc.py --checkpoint modeldata/seed_vc/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ema.pth --config modeldata/seed_vc/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml --fp16 True
+python app_svc.py --checkpoint runs/hutao_svc/ft_model.pth --config configs/presets/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml
+python real-time-gui.py --checkpoint runs/real_time_bachong/ft_model.pth --config runs/real_time_bachong/config_dit_mel_seed_uvit_xlsr_tiny.yml
+python real-time-gui.py --checkpoint runs/bachongshenzi/ft_model.pth --config configs/presets/config_dit_mel_seed_uvit_whisper_small_wavenet.yml
+
+
+
+python inference.py --source examples/reference/s1p2.wav
+--target examples/vo_card_yaeMiko_invite_easy_03.wav
+--output examples
+--diffusion-steps 10 # recommended 30~50 for singingvoice conversion
+--length-adjust 1.0
+--inference-cfg-rate 0.7
+--f0-condition False # set to True for singing voice conversion
+--auto-f0-adjust False # set to True to auto adjust source pitch to target pitch level, normally not used in singing voice conversion
+--semi-tone-shift 0 # pitch shift in semitones for singing voice conversion
+--checkpoint runs/real_time_bachong/ft_model.pth
+--config runs/real_time_bachong/config_dit_mel_seed_uvit_xlsr_tiny.yml
+ --fp16 True
diff --git a/launcher.py b/launcher.py
@@ -0,0 +1,135 @@
+import sys
+import os
+import subprocess
+from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout, 
+                            QPushButton, QLabel, QFileDialog, QComboBox, QMessageBox)
+from PyQt5.QtCore import Qt
+
+class LauncherWindow(QMainWindow):
+    def __init__(self):
+        super().__init__()
+        self.setWindowTitle("Seed-VC Launcher")
+        self.setFixedSize(600, 400)
+
+        # 主窗口部件
+        widget = QWidget()
+        self.setCentralWidget(widget)
+        layout = QVBoxLayout()
+        widget.setLayout(layout)
+
+        # 标题
+        title = QLabel("Seed-VC 启动器")
+        title.setAlignment(Qt.AlignCenter)
+        title.setStyleSheet("font-size: 24px; margin: 20px;")
+        layout.addWidget(title)
+
+        # 模式选择
+        self.mode_selector = QComboBox()
+        self.mode_selector.addItems([
+            "语音转换 (app_vc.py)", 
+            "歌声转换 (app_svc.py)", 
+            "实时转换 (real-time-gui.py)"
+        ])
+        layout.addWidget(QLabel("选择转换模式:"))
+        layout.addWidget(self.mode_selector)
+
+        # 模型选择
+        layout.addWidget(QLabel("模型文件 (可选):"))
+        self.model_path = ""
+        self.model_label = QLabel("未选择")
+        model_button = QPushButton("选择模型文件")
+        model_button.clicked.connect(self.select_model)
+        layout.addWidget(self.model_label)
+        layout.addWidget(model_button)
+
+        # 配置选择
+        layout.addWidget(QLabel("配置文件 (可选):"))
+        self.config_path = ""
+        self.config_label = QLabel("未选择")
+        config_button = QPushButton("选择配置文件")
+        config_button.clicked.connect(self.select_config)
+        layout.addWidget(self.config_label)
+        layout.addWidget(config_button)
+
+        # 启动按钮
+        launch_button = QPushButton("启动")
+        launch_button.setStyleSheet("font-size: 18px; padding: 10px;")
+        launch_button.clicked.connect(self.launch)
+        layout.addWidget(launch_button)
+
+        # 添加说明文本
+        note = QLabel("注意：如果不选择模型和配置文件，将使用默认设置")
+        note.setStyleSheet("color: gray;")
+        layout.addWidget(note)
+
+    def select_model(self):
+        file_name, _ = QFileDialog.getOpenFileName(self, "选择模型文件", "", "Model Files (*.pth)")
+        if file_name:
+            self.model_path = file_name
+            self.model_label.setText(os.path.basename(file_name))
+
+    def select_config(self):
+        file_name, _ = QFileDialog.getOpenFileName(self, "选择配置文件", "", "Config Files (*.yml *.yaml)")
+        if file_name:
+            self.config_path = file_name
+            self.config_label.setText(os.path.basename(file_name))
+
+    def launch(self):
+        mode = self.mode_selector.currentText()
+        script = mode.split("(")[1].strip(")").strip()
+
+        # 构建路径
+        project_root = os.getcwd()
+        venv_path = os.path.join(project_root, "venv")
+
+        # 构建激活命令和运行命令
+        if os.name == 'nt':  # Windows
+            activate_cmd = os.path.join(venv_path, "Scripts", "activate.bat")
+            cmd = [
+                "cmd.exe", "/c",
+                f"{activate_cmd} && python {script}"
+            ]
+        else:  # Linux/Mac
+            activate_cmd = os.path.join(venv_path, "bin", "activate")
+            cmd = [
+                "bash", "-c",
+                f"source {activate_cmd} && python {script}"
+            ]
+
+        # 添加参数
+        if self.model_path:
+            cmd[-1] += f" --checkpoint-path {self.model_path}"
+        if self.config_path:
+            cmd[-1] += f" --config-path {self.config_path}"
+        if "svc" in script or "vc" in script:
+            cmd[-1] += " --fp16 True"
+            # 添加 GPU 设备选择
+            cmd[-1] += " --gpu 0"  # 默认使用第一个 GPU
+
+        try:
+            # 使用 shell 命令运行，不重定向输出
+            process = subprocess.Popen(
+                cmd,
+                cwd=project_root,
+                env=os.environ.copy(),
+                shell=True
+            )
+
+            # 等待一小段时间确保进程启动
+            import time
+            time.sleep(2)
+
+            # 检查进程是否还在运行
+            if process.poll() is None:
+                QMessageBox.information(self, "启动成功", f"已启动 {script}")
+            else:
+                QMessageBox.critical(self, "错误", f"启动失败")
+
+        except Exception as e:
+            QMessageBox.critical(self, "错误", f"启动失败: {str(e)}")
+
+if __name__ == "__main__":
+    app = QApplication(sys.argv)
+    window = LauncherWindow()
+    window.show()
+    sys.exit(app.exec_())