Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: real time gui not work #123

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,7 @@ reconstructed/
.python-version
ruff.log
/configs/inuse/
modeldata
dataset
runs
venv
23 changes: 23 additions & 0 deletions README.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
Seed-VC 语音转换工具

使用前准备:
1. 安装 Python (3.10 或更高版本)
- 访问 https://www.python.org/downloads/
- 下载并安装 Python
- 安装时请勾选 "Add Python to PATH"

2. 安装 NVIDIA 显卡驱动 (如果有NVIDIA显卡)
- 访问 https://www.nvidia.com/download/index.aspx
- 下载并安装适合您显卡的最新驱动

使用方法:
1. 双击 start.bat
2. 首次运行会自动安装所需环境
3. 在启动器中选择转换模式
4. 可选择自定义模型和配置文件
5. 点击启动开始使用

注意事项:
- 首次运行需要下载依赖,请保持网络连接
- 建议使用NVIDIA显卡以获得最佳性能
- 如遇到问题,请查看错误提示或联系技术支持
30 changes: 19 additions & 11 deletions app_svc.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,20 +10,26 @@
import numpy as np
from pydub import AudioSegment
import argparse
from pathlib import Path
# Load model and configuration

fp16 = False
device = None
def load_models(args):
global sr, hop_length, fp16
fp16 = args.fp16
ckpt_root = Path(__file__).parent / "checkpoints"
print(f"Using device: {device}")
print(f"Using fp16: {fp16}")
# f0 conditioned model
if args.checkpoint_path is None or args.checkpoint_path == "":
dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
"DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth",
"config_dit_mel_seed_uvit_whisper_base_f0_44k.yml")
# 检测模型
dit_checkpoint_path = ckpt_root / "seed_vc" / "svc_model" / "DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth"
dit_config_path = ckpt_root / "seed_vc" / "svc_model" / "config_dit_mel_seed_uvit_whisper_base_f0_44k.yml"
if not dit_checkpoint_path.exists() or not dit_config_path.exists():
dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
"DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ft_ema_v2.pth",
"config_dit_mel_seed_uvit_whisper_base_f0_44k.yml")
else:
print(f"Using custom checkpoint: {args.checkpoint_path}")
dit_checkpoint_path = args.checkpoint_path
Expand Down Expand Up @@ -52,9 +58,11 @@ def load_models(args):
# Load additional modules
from modules.campplus.DTDNN import CAMPPlus

campplus_ckpt_path = load_custom_model_from_hf(
"funasr/campplus", "campplus_cn_common.bin", config_filename=None
)
campplus_ckpt_path = ckpt_root / "campplus" / "campplus_cn_common.bin"
if not campplus_ckpt_path.exists():
campplus_ckpt_path = load_custom_model_from_hf(
"funasr/campplus", "campplus_cn_common.bin", config_filename=None
)
campplus_model = CAMPPlus(feat_dim=80, embedding_size=192)
campplus_model.load_state_dict(torch.load(campplus_ckpt_path, map_location="cpu"))
campplus_model.eval()
Expand All @@ -75,8 +83,7 @@ def load_models(args):
from modules.hifigan.f0_predictor import ConvRNNF0Predictor
hift_config = yaml.safe_load(open('configs/hifigan.yml', 'r'))
hift_gen = HiFTGenerator(**hift_config['hift'], f0_predictor=ConvRNNF0Predictor(**hift_config['f0_predictor']))
hift_path = load_custom_model_from_hf("FunAudioLLM/CosyVoice-300M", 'hift.pt', None)
hift_gen.load_state_dict(torch.load(hift_path, map_location='cpu'))
hift_gen.load_state_dict(torch.load(str(ckpt_root / "cosy_hifigan" / "hift.pt"), map_location='cpu'))
hift_gen.eval()
hift_gen.to(device)
vocoder_fn = hift_gen
Expand Down Expand Up @@ -198,9 +205,10 @@ def semantic_fn(waves_16k):
to_mel = lambda x: mel_spectrogram(x, **mel_fn_args)
# f0 extractor
from modules.rmvpe import RMVPE

model_path = load_custom_model_from_hf("lj1995/VoiceConversionWebUI", "rmvpe.pt", None)
rmvpe = RMVPE(model_path, is_half=False, device=device)
rmvpe_path = ckpt_root / "rmvpe" / "rmvpe.pt"
if not rmvpe_path.exists():
rmvpe_path = load_custom_model_from_hf("lj1995/VoiceConversionWebUI", "rmvpe.pt", None)
rmvpe = RMVPE(rmvpe_path, is_half=False, device=device)
f0_fn = rmvpe.infer_from_audio

return (
Expand Down
23 changes: 15 additions & 8 deletions app_vc.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,25 @@
from hf_utils import load_custom_model_from_hf
import numpy as np
from pydub import AudioSegment
from modules.campplus.DTDNN import CAMPPlus
import argparse
from pathlib import Path

# Load model and configuration
fp16 = False
device = None
def load_models(args):
global sr, hop_length, fp16
fp16 = args.fp16
ckpt_root = Path(__file__).parent / "checkpoints"
print(f"Using device: {device}")
print(f"Using fp16: {fp16}")
if args.checkpoint_path is None or args.checkpoint_path == "":
dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
# 检测模型
dit_checkpoint_path = ckpt_root / "seed_vc" / "vc_model" / "DiT_seed_v2_uvit_whisper_small_wavenet_bigvgan_pruned.pth"
dit_config_path = ckpt_root / "seed_vc" / "vc_model" / "config_dit_mel_seed_uvit_whisper_small_wavenet.yml"
if not dit_checkpoint_path.exists() or not dit_config_path.exists():
dit_checkpoint_path, dit_config_path = load_custom_model_from_hf("Plachta/Seed-VC",
"DiT_seed_v2_uvit_whisper_small_wavenet_bigvgan_pruned.pth",
"config_dit_mel_seed_uvit_whisper_small_wavenet.yml")
else:
Expand All @@ -47,12 +54,13 @@ def load_models(args):
model[key].to(device)
model.cfm.estimator.setup_caches(max_batch_size=1, max_seq_length=8192)

# Load additional modules
from modules.campplus.DTDNN import CAMPPlus

campplus_ckpt_path = load_custom_model_from_hf(
"funasr/campplus", "campplus_cn_common.bin", config_filename=None
)
# 检测campplus_ckpt_path
campplus_ckpt_path = ckpt_root / "campplus" / "campplus_cn_common.bin"
if not campplus_ckpt_path.exists():
campplus_ckpt_path = load_custom_model_from_hf(
"funasr/campplus", "campplus_cn_common.bin", config_filename=None
)
campplus_model = CAMPPlus(feat_dim=80, embedding_size=192)
campplus_model.load_state_dict(torch.load(campplus_ckpt_path, map_location="cpu"))
campplus_model.eval()
Expand All @@ -73,8 +81,7 @@ def load_models(args):
from modules.hifigan.f0_predictor import ConvRNNF0Predictor
hift_config = yaml.safe_load(open('configs/hifigan.yml', 'r'))
hift_gen = HiFTGenerator(**hift_config['hift'], f0_predictor=ConvRNNF0Predictor(**hift_config['f0_predictor']))
hift_path = load_custom_model_from_hf("FunAudioLLM/CosyVoice-300M", 'hift.pt', None)
hift_gen.load_state_dict(torch.load(hift_path, map_location='cpu'))
hift_gen.load_state_dict(torch.load(str(ckpt_root / "cosy_hifigan" / "hift.pt"), map_location='cpu'))
hift_gen.eval()
hift_gen.to(device)
vocoder_fn = hift_gen
Expand Down
Binary file removed campplus_cn_common.bin
Binary file not shown.
25 changes: 0 additions & 25 deletions conda-nix-vc-py310.yaml

This file was deleted.

Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
19 changes: 19 additions & 0 deletions ft.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
python app_svc.py --checkpoint modeldata/seed_vc/DiT_seed_v2_uvit_whisper_base_f0_44k_bigvgan_pruned_ema.pth --config modeldata/seed_vc/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml --fp16 True
python app_svc.py --checkpoint runs/hutao_svc/ft_model.pth --config configs/presets/config_dit_mel_seed_uvit_whisper_base_f0_44k.yml
python real-time-gui.py --checkpoint runs/real_time_bachong/ft_model.pth --config runs/real_time_bachong/config_dit_mel_seed_uvit_xlsr_tiny.yml
python real-time-gui.py --checkpoint runs/bachongshenzi/ft_model.pth --config configs/presets/config_dit_mel_seed_uvit_whisper_small_wavenet.yml



python inference.py --source examples/reference/s1p2.wav
--target examples/vo_card_yaeMiko_invite_easy_03.wav
--output examples
--diffusion-steps 10 # recommended 30~50 for singingvoice conversion
--length-adjust 1.0
--inference-cfg-rate 0.7
--f0-condition False # set to True for singing voice conversion
--auto-f0-adjust False # set to True to auto adjust source pitch to target pitch level, normally not used in singing voice conversion
--semi-tone-shift 0 # pitch shift in semitones for singing voice conversion
--checkpoint runs/real_time_bachong/ft_model.pth
--config runs/real_time_bachong/config_dit_mel_seed_uvit_xlsr_tiny.yml
--fp16 True
135 changes: 135 additions & 0 deletions launcher.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import sys
import os
import subprocess
from PyQt5.QtWidgets import (QApplication, QMainWindow, QWidget, QVBoxLayout,
QPushButton, QLabel, QFileDialog, QComboBox, QMessageBox)
from PyQt5.QtCore import Qt

class LauncherWindow(QMainWindow):
def __init__(self):
super().__init__()
self.setWindowTitle("Seed-VC Launcher")
self.setFixedSize(600, 400)

# 主窗口部件
widget = QWidget()
self.setCentralWidget(widget)
layout = QVBoxLayout()
widget.setLayout(layout)

# 标题
title = QLabel("Seed-VC 启动器")
title.setAlignment(Qt.AlignCenter)
title.setStyleSheet("font-size: 24px; margin: 20px;")
layout.addWidget(title)

# 模式选择
self.mode_selector = QComboBox()
self.mode_selector.addItems([
"语音转换 (app_vc.py)",
"歌声转换 (app_svc.py)",
"实时转换 (real-time-gui.py)"
])
layout.addWidget(QLabel("选择转换模式:"))
layout.addWidget(self.mode_selector)

# 模型选择
layout.addWidget(QLabel("模型文件 (可选):"))
self.model_path = ""
self.model_label = QLabel("未选择")
model_button = QPushButton("选择模型文件")
model_button.clicked.connect(self.select_model)
layout.addWidget(self.model_label)
layout.addWidget(model_button)

# 配置选择
layout.addWidget(QLabel("配置文件 (可选):"))
self.config_path = ""
self.config_label = QLabel("未选择")
config_button = QPushButton("选择配置文件")
config_button.clicked.connect(self.select_config)
layout.addWidget(self.config_label)
layout.addWidget(config_button)

# 启动按钮
launch_button = QPushButton("启动")
launch_button.setStyleSheet("font-size: 18px; padding: 10px;")
launch_button.clicked.connect(self.launch)
layout.addWidget(launch_button)

# 添加说明文本
note = QLabel("注意:如果不选择模型和配置文件,将使用默认设置")
note.setStyleSheet("color: gray;")
layout.addWidget(note)

def select_model(self):
file_name, _ = QFileDialog.getOpenFileName(self, "选择模型文件", "", "Model Files (*.pth)")
if file_name:
self.model_path = file_name
self.model_label.setText(os.path.basename(file_name))

def select_config(self):
file_name, _ = QFileDialog.getOpenFileName(self, "选择配置文件", "", "Config Files (*.yml *.yaml)")
if file_name:
self.config_path = file_name
self.config_label.setText(os.path.basename(file_name))

def launch(self):
mode = self.mode_selector.currentText()
script = mode.split("(")[1].strip(")").strip()

# 构建路径
project_root = os.getcwd()
venv_path = os.path.join(project_root, "venv")

# 构建激活命令和运行命令
if os.name == 'nt': # Windows
activate_cmd = os.path.join(venv_path, "Scripts", "activate.bat")
cmd = [
"cmd.exe", "/c",
f"{activate_cmd} && python {script}"
]
else: # Linux/Mac
activate_cmd = os.path.join(venv_path, "bin", "activate")
cmd = [
"bash", "-c",
f"source {activate_cmd} && python {script}"
]

# 添加参数
if self.model_path:
cmd[-1] += f" --checkpoint-path {self.model_path}"
if self.config_path:
cmd[-1] += f" --config-path {self.config_path}"
if "svc" in script or "vc" in script:
cmd[-1] += " --fp16 True"
# 添加 GPU 设备选择
cmd[-1] += " --gpu 0" # 默认使用第一个 GPU

try:
# 使用 shell 命令运行,不重定向输出
process = subprocess.Popen(
cmd,
cwd=project_root,
env=os.environ.copy(),
shell=True
)

# 等待一小段时间确保进程启动
import time
time.sleep(2)

# 检查进程是否还在运行
if process.poll() is None:
QMessageBox.information(self, "启动成功", f"已启动 {script}")
else:
QMessageBox.critical(self, "错误", f"启动失败")

except Exception as e:
QMessageBox.critical(self, "错误", f"启动失败: {str(e)}")

if __name__ == "__main__":
app = QApplication(sys.argv)
window = LauncherWindow()
window.show()
sys.exit(app.exec_())
Loading