forked from svc-develop-team/so-vits-svc
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
12 changed files
with
435 additions
and
32 deletions.
There are no files selected for viewing
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
data: | ||
sampling_rate: 44100 | ||
block_size: 512 # Equal to hop_length | ||
duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip | ||
encoder: 'vec768l12' # 'hubertsoft', 'vec256l9', 'vec768l12' | ||
cnhubertsoft_gate: 10 | ||
encoder_sample_rate: 16000 | ||
encoder_hop_size: 320 | ||
encoder_out_channels: 768 # 256 if using 'hubertsoft' | ||
train_path: dataset/44k # Create a folder named "audio" under this path and put the audio clip in it | ||
filelists_path: filelists/ # FileLists path | ||
extensions: # List of extension included in the data collection | ||
- wav | ||
model: | ||
type: 'Diffusion' | ||
n_layers: 20 | ||
n_chans: 512 | ||
n_hidden: 256 | ||
use_pitch_aug: true | ||
n_spk: 1 # max number of different speakers | ||
device: cuda | ||
vocoder: | ||
type: 'nsf-hifigan' | ||
ckpt: 'pretrain/nsf_hifigan/model' | ||
infer: | ||
speedup: 10 | ||
method: 'dpm-solver' # 'pndm' or 'dpm-solver' | ||
env: | ||
expdir: exp/diffusion-test | ||
gpu_id: 0 | ||
train: | ||
num_workers: 2 # If your cpu and gpu are both very strong, set to 0 may be faster! | ||
amp_dtype: fp32 # fp32, fp16 or bf16 (fp16 or bf16 may be faster if it is supported by your gpu) | ||
batch_size: 48 | ||
cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow | ||
cache_device: 'cpu' # Set to 'cuda' to cache the data into the Graphics-Memory, fastest speed for strong gpu | ||
cache_fp16: true | ||
epochs: 100000 | ||
interval_log: 10 | ||
interval_val: 2000 | ||
interval_force_save: 10000 | ||
lr: 0.0002 | ||
decay_step: 100000 | ||
gamma: 0.5 | ||
weight_decay: 0 | ||
save_opt: false | ||
spk: | ||
'nyaru': 0 |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Empty file.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,150 @@ | ||
''' | ||
author: wayn391@mastertones | ||
''' | ||
|
||
import os | ||
import json | ||
import time | ||
import yaml | ||
import datetime | ||
import torch | ||
import matplotlib.pyplot as plt | ||
from . import utils | ||
from torch.utils.tensorboard import SummaryWriter | ||
|
||
class Saver(object): | ||
def __init__( | ||
self, | ||
args, | ||
initial_global_step=-1): | ||
|
||
self.expdir = args.env.expdir | ||
self.sample_rate = args.data.sampling_rate | ||
|
||
# cold start | ||
self.global_step = initial_global_step | ||
self.init_time = time.time() | ||
self.last_time = time.time() | ||
|
||
# makedirs | ||
os.makedirs(self.expdir, exist_ok=True) | ||
|
||
# path | ||
self.path_log_info = os.path.join(self.expdir, 'log_info.txt') | ||
|
||
# ckpt | ||
os.makedirs(self.expdir, exist_ok=True) | ||
|
||
# writer | ||
self.writer = SummaryWriter(os.path.join(self.expdir, 'logs')) | ||
|
||
# save config | ||
path_config = os.path.join(self.expdir, 'config.yaml') | ||
with open(path_config, "w") as out_config: | ||
yaml.dump(dict(args), out_config) | ||
|
||
|
||
def log_info(self, msg): | ||
'''log method''' | ||
if isinstance(msg, dict): | ||
msg_list = [] | ||
for k, v in msg.items(): | ||
tmp_str = '' | ||
if isinstance(v, int): | ||
tmp_str = '{}: {:,}'.format(k, v) | ||
else: | ||
tmp_str = '{}: {}'.format(k, v) | ||
|
||
msg_list.append(tmp_str) | ||
msg_str = '\n'.join(msg_list) | ||
else: | ||
msg_str = msg | ||
|
||
# dsplay | ||
print(msg_str) | ||
|
||
# save | ||
with open(self.path_log_info, 'a') as fp: | ||
fp.write(msg_str+'\n') | ||
|
||
def log_value(self, dict): | ||
for k, v in dict.items(): | ||
self.writer.add_scalar(k, v, self.global_step) | ||
|
||
def log_spec(self, name, spec, spec_out, vmin=-14, vmax=3.5): | ||
spec_cat = torch.cat([(spec_out - spec).abs() + vmin, spec, spec_out], -1) | ||
spec = spec_cat[0] | ||
if isinstance(spec, torch.Tensor): | ||
spec = spec.cpu().numpy() | ||
fig = plt.figure(figsize=(12, 9)) | ||
plt.pcolor(spec.T, vmin=vmin, vmax=vmax) | ||
plt.tight_layout() | ||
self.writer.add_figure(name, fig, self.global_step) | ||
|
||
def log_audio(self, dict): | ||
for k, v in dict.items(): | ||
self.writer.add_audio(k, v, global_step=self.global_step, sample_rate=self.sample_rate) | ||
|
||
def get_interval_time(self, update=True): | ||
cur_time = time.time() | ||
time_interval = cur_time - self.last_time | ||
if update: | ||
self.last_time = cur_time | ||
return time_interval | ||
|
||
def get_total_time(self, to_str=True): | ||
total_time = time.time() - self.init_time | ||
if to_str: | ||
total_time = str(datetime.timedelta( | ||
seconds=total_time))[:-5] | ||
return total_time | ||
|
||
def save_model( | ||
self, | ||
model, | ||
optimizer, | ||
name='model', | ||
postfix='', | ||
to_json=False): | ||
# path | ||
if postfix: | ||
postfix = '_' + postfix | ||
path_pt = os.path.join( | ||
self.expdir , name+postfix+'.pt') | ||
|
||
# check | ||
print(' [*] model checkpoint saved: {}'.format(path_pt)) | ||
|
||
# save | ||
if optimizer is not None: | ||
torch.save({ | ||
'global_step': self.global_step, | ||
'model': model.state_dict(), | ||
'optimizer': optimizer.state_dict()}, path_pt) | ||
else: | ||
torch.save({ | ||
'global_step': self.global_step, | ||
'model': model.state_dict()}, path_pt) | ||
|
||
# to json | ||
if to_json: | ||
path_json = os.path.join( | ||
self.expdir , name+'.json') | ||
utils.to_json(path_params, path_json) | ||
|
||
def delete_model(self, name='model', postfix=''): | ||
# path | ||
if postfix: | ||
postfix = '_' + postfix | ||
path_pt = os.path.join( | ||
self.expdir , name+postfix+'.pt') | ||
|
||
# delete | ||
if os.path.exists(path_pt): | ||
os.remove(path_pt) | ||
print(' [*] model checkpoint deleted: {}'.format(path_pt)) | ||
|
||
def global_step_increment(self): | ||
self.global_step += 1 | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,126 @@ | ||
import os | ||
import yaml | ||
import json | ||
import pickle | ||
import torch | ||
|
||
def traverse_dir( | ||
root_dir, | ||
extensions, | ||
amount=None, | ||
str_include=None, | ||
str_exclude=None, | ||
is_pure=False, | ||
is_sort=False, | ||
is_ext=True): | ||
|
||
file_list = [] | ||
cnt = 0 | ||
for root, _, files in os.walk(root_dir): | ||
for file in files: | ||
if any([file.endswith(f".{ext}") for ext in extensions]): | ||
# path | ||
mix_path = os.path.join(root, file) | ||
pure_path = mix_path[len(root_dir)+1:] if is_pure else mix_path | ||
|
||
# amount | ||
if (amount is not None) and (cnt == amount): | ||
if is_sort: | ||
file_list.sort() | ||
return file_list | ||
|
||
# check string | ||
if (str_include is not None) and (str_include not in pure_path): | ||
continue | ||
if (str_exclude is not None) and (str_exclude in pure_path): | ||
continue | ||
|
||
if not is_ext: | ||
ext = pure_path.split('.')[-1] | ||
pure_path = pure_path[:-(len(ext)+1)] | ||
file_list.append(pure_path) | ||
cnt += 1 | ||
if is_sort: | ||
file_list.sort() | ||
return file_list | ||
|
||
|
||
|
||
class DotDict(dict): | ||
def __getattr__(*args): | ||
val = dict.get(*args) | ||
return DotDict(val) if type(val) is dict else val | ||
|
||
__setattr__ = dict.__setitem__ | ||
__delattr__ = dict.__delitem__ | ||
|
||
|
||
def get_network_paras_amount(model_dict): | ||
info = dict() | ||
for model_name, model in model_dict.items(): | ||
# all_params = sum(p.numel() for p in model.parameters()) | ||
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad) | ||
|
||
info[model_name] = trainable_params | ||
return info | ||
|
||
|
||
def load_config(path_config): | ||
with open(path_config, "r") as config: | ||
args = yaml.safe_load(config) | ||
args = DotDict(args) | ||
# print(args) | ||
return args | ||
|
||
def save_config(path_config,config): | ||
config = dict(config) | ||
with open(path_config, "w") as f: | ||
yaml.dump(config, f) | ||
|
||
def to_json(path_params, path_json): | ||
params = torch.load(path_params, map_location=torch.device('cpu')) | ||
raw_state_dict = {} | ||
for k, v in params.items(): | ||
val = v.flatten().numpy().tolist() | ||
raw_state_dict[k] = val | ||
|
||
with open(path_json, 'w') as outfile: | ||
json.dump(raw_state_dict, outfile,indent= "\t") | ||
|
||
|
||
def convert_tensor_to_numpy(tensor, is_squeeze=True): | ||
if is_squeeze: | ||
tensor = tensor.squeeze() | ||
if tensor.requires_grad: | ||
tensor = tensor.detach() | ||
if tensor.is_cuda: | ||
tensor = tensor.cpu() | ||
return tensor.numpy() | ||
|
||
|
||
def load_model( | ||
expdir, | ||
model, | ||
optimizer, | ||
name='model', | ||
postfix='', | ||
device='cpu'): | ||
if postfix == '': | ||
postfix = '_' + postfix | ||
path = os.path.join(expdir, name+postfix) | ||
path_pt = traverse_dir(expdir, ['pt'], is_ext=False) | ||
global_step = 0 | ||
if len(path_pt) > 0: | ||
steps = [s[len(path):] for s in path_pt] | ||
maxstep = max([int(s) if s.isdigit() else 0 for s in steps]) | ||
if maxstep >= 0: | ||
path_pt = path+str(maxstep)+'.pt' | ||
else: | ||
path_pt = path+'best.pt' | ||
print(' [*] restoring model from', path_pt) | ||
ckpt = torch.load(path_pt, map_location=torch.device(device)) | ||
global_step = ckpt['global_step'] | ||
model.load_state_dict(ckpt['model'], strict=False) | ||
if ckpt.get('optimizer') != None: | ||
optimizer.load_state_dict(ckpt['optimizer']) | ||
return global_step, model, optimizer |
Oops, something went wrong.