The implementation of paper "SpeechTripleNet: End-to-End Disentangled Speech Representation Learning for Content, Timbre and Prosody"
Demo | Paper | Checkpoint
conda env create -f environment.yml
conda activate speechtriplenet-env
# Download the pretrained model from https://drive.google.com/file/d/1dAdPXtENtACtVokBZyzn32DlWd1Zk3Yy/view?usp=sharing;
# Put it under output-CCDPJ-c_100.0_1.3-s_10.0_60.0-p_10.0_3.0/ckpt/VCTK/
jupyter notebook speech_editing.ipynb
python preprocess.py --config ./configs/VCTK/preprocess.yaml
CUDA_VISIBLE_DEVICES=0 python train.py --mdl CCDPJ -p ./configs/VCTK/preprocess.yaml -t ./configs/VCTK/train.yaml -m ./configs/VCTK/model.yaml
See speech_editing.ipynb
.