Skip to content

Commit

Permalink
NsfHifigan在DML中上采样出现错误以及SourceModuleHnNSF这两个BUG的修复
Browse files Browse the repository at this point in the history
  • Loading branch information
NaruseMioShirakana committed Jul 17, 2023
1 parent 72deb15 commit 90c9ccc
Show file tree
Hide file tree
Showing 5 changed files with 276 additions and 220 deletions.
184 changes: 133 additions & 51 deletions onnx_export.py
Original file line number Diff line number Diff line change
@@ -1,56 +1,138 @@
import json
import torch

import utils
from onnxexport.model_onnx import SynthesizerTrn


def main(NetExport):
path = "SoVits4.0"
if NetExport:
device = torch.device("cpu")
hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
SVCVITS = SynthesizerTrn(
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model)
_ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None)
_ = SVCVITS.eval().to(device)
for i in SVCVITS.parameters():
i.requires_grad = False

n_frame = 10
test_hidden_unit = torch.rand(1, n_frame, 256)
test_pitch = torch.rand(1, n_frame)
test_mel2ph = torch.arange(0, n_frame, dtype=torch.int64)[None] # torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0)
test_uv = torch.ones(1, n_frame, dtype=torch.float32)
test_noise = torch.randn(1, 192, n_frame)
test_sid = torch.LongTensor([0])
input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
output_names = ["audio", ]

torch.onnx.export(SVCVITS,
(
test_hidden_unit.to(device),
test_pitch.to(device),
test_mel2ph.to(device),
test_uv.to(device),
test_noise.to(device),
test_sid.to(device)
),
f"checkpoints/{path}/model.onnx",
dynamic_axes={
"c": [0, 1],
"f0": [1],
"mel2ph": [1],
"uv": [1],
"noise": [2],
},
do_constant_folding=False,
opset_version=16,
verbose=False,
input_names=input_names,
output_names=output_names)
from onnxexport.model_onnx_speaker_mix import SynthesizerTrn


def main():
path = "crs"

device = torch.device("cpu")
hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
SVCVITS = SynthesizerTrn(
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model)
_ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None)
_ = SVCVITS.eval().to(device)
for i in SVCVITS.parameters():
i.requires_grad = False

num_frames = 200

test_hidden_unit = torch.rand(1, num_frames, SVCVITS.gin_channels)
test_pitch = torch.rand(1, num_frames)
test_vol = torch.rand(1, num_frames)
test_mel2ph = torch.LongTensor(torch.arange(0, num_frames)).unsqueeze(0)
test_uv = torch.ones(1, num_frames, dtype=torch.float32)
test_noise = torch.randn(1, 192, num_frames)
test_sid = torch.LongTensor([0])
export_mix = True
if len(hps.spk) < 2:
export_mix = False

if export_mix:
spk_mix = []
n_spk = len(hps.spk)
for i in range(n_spk):
spk_mix.append(1.0/float(n_spk))
test_sid = torch.tensor(spk_mix)
SVCVITS.export_chara_mix(hps.spk)
test_sid = test_sid.unsqueeze(0)
test_sid = test_sid.repeat(num_frames, 1)

SVCVITS.eval()

if export_mix:
daxes = {
"c": [0, 1],
"f0": [1],
"mel2ph": [1],
"uv": [1],
"noise": [2],
"sid":[0]
}
else:
daxes = {
"c": [0, 1],
"f0": [1],
"mel2ph": [1],
"uv": [1],
"noise": [2]
}

input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
output_names = ["audio", ]

if SVCVITS.vol_embedding:
input_names.append("vol")
vol_dadict = {"vol" : [1]}
daxes.update(vol_dadict)
test_inputs = (
test_hidden_unit.to(device),
test_pitch.to(device),
test_mel2ph.to(device),
test_uv.to(device),
test_noise.to(device),
test_sid.to(device),
test_vol.to(device)
)
else:
test_inputs = (
test_hidden_unit.to(device),
test_pitch.to(device),
test_mel2ph.to(device),
test_uv.to(device),
test_noise.to(device),
test_sid.to(device)
)

# SVCVITS = torch.jit.script(SVCVITS)
SVCVITS(test_hidden_unit.to(device),
test_pitch.to(device),
test_mel2ph.to(device),
test_uv.to(device),
test_noise.to(device),
test_sid.to(device),
test_vol.to(device))

SVCVITS.dec.OnnxExport()

torch.onnx.export(
SVCVITS,
test_inputs,
f"checkpoints/{path}/{path}_SoVits.onnx",
dynamic_axes=daxes,
do_constant_folding=False,
opset_version=16,
verbose=False,
input_names=input_names,
output_names=output_names
)

vec_lay = "layer-12" if SVCVITS.gin_channels == 768 else "layer-9"
spklist = []
for key in hps.spk.keys():
spklist.append(key)

MoeVSConf = {
"Folder" : f"{path}",
"Name" : f"{path}",
"Type" : "SoVits",
"Rate" : hps.data.sampling_rate,
"Hop" : hps.data.hop_length,
"Hubert": f"vec-{SVCVITS.gin_channels}-{vec_lay}",
"SoVits4": True,
"SoVits3": False,
"CharaMix": export_mix,
"Volume": SVCVITS.vol_embedding,
"HiddenSize": SVCVITS.gin_channels,
"Characters": spklist
}

with open(f"checkpoints/{path}.json", 'w') as MoeVsConfFile:
json.dump(MoeVSConf, MoeVsConfFile, indent = 4)


if __name__ == '__main__':
main(True)
main()
56 changes: 56 additions & 0 deletions onnx_export_old.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
import torch

import utils
from onnxexport.model_onnx import SynthesizerTrn


def main(NetExport):
path = "SoVits4.0"
if NetExport:
device = torch.device("cpu")
hps = utils.get_hparams_from_file(f"checkpoints/{path}/config.json")
SVCVITS = SynthesizerTrn(
hps.data.filter_length // 2 + 1,
hps.train.segment_size // hps.data.hop_length,
**hps.model)
_ = utils.load_checkpoint(f"checkpoints/{path}/model.pth", SVCVITS, None)
_ = SVCVITS.eval().to(device)
for i in SVCVITS.parameters():
i.requires_grad = False

n_frame = 10
test_hidden_unit = torch.rand(1, n_frame, 256)
test_pitch = torch.rand(1, n_frame)
test_mel2ph = torch.arange(0, n_frame, dtype=torch.int64)[None] # torch.LongTensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]).unsqueeze(0)
test_uv = torch.ones(1, n_frame, dtype=torch.float32)
test_noise = torch.randn(1, 192, n_frame)
test_sid = torch.LongTensor([0])
input_names = ["c", "f0", "mel2ph", "uv", "noise", "sid"]
output_names = ["audio", ]

torch.onnx.export(SVCVITS,
(
test_hidden_unit.to(device),
test_pitch.to(device),
test_mel2ph.to(device),
test_uv.to(device),
test_noise.to(device),
test_sid.to(device)
),
f"checkpoints/{path}/model.onnx",
dynamic_axes={
"c": [0, 1],
"f0": [1],
"mel2ph": [1],
"uv": [1],
"noise": [2],
},
do_constant_folding=False,
opset_version=16,
verbose=False,
input_names=input_names,
output_names=output_names)


if __name__ == '__main__':
main(True)
138 changes: 0 additions & 138 deletions onnx_export_speaker_mix.py

This file was deleted.

Loading

0 comments on commit 90c9ccc

Please sign in to comment.