Skip to content

Commit

Permalink
diff
Browse files Browse the repository at this point in the history
  • Loading branch information
ylzz1997 committed May 16, 2023
1 parent ddc594a commit 76e0615
Show file tree
Hide file tree
Showing 12 changed files with 435 additions and 32 deletions.
Empty file added configs/diffusion.yaml
Empty file.
48 changes: 48 additions & 0 deletions configs_template/diffusion_template.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
data:
sampling_rate: 44100
block_size: 512 # Equal to hop_length
duration: 2 # Audio duration during training, must be less than the duration of the shortest audio clip
encoder: 'vec768l12' # 'hubertsoft', 'vec256l9', 'vec768l12'
cnhubertsoft_gate: 10
encoder_sample_rate: 16000
encoder_hop_size: 320
encoder_out_channels: 768 # 256 if using 'hubertsoft'
train_path: dataset/44k # Create a folder named "audio" under this path and put the audio clip in it
filelists_path: filelists/ # FileLists path
extensions: # List of extension included in the data collection
- wav
model:
type: 'Diffusion'
n_layers: 20
n_chans: 512
n_hidden: 256
use_pitch_aug: true
n_spk: 1 # max number of different speakers
device: cuda
vocoder:
type: 'nsf-hifigan'
ckpt: 'pretrain/nsf_hifigan/model'
infer:
speedup: 10
method: 'dpm-solver' # 'pndm' or 'dpm-solver'
env:
expdir: exp/diffusion-test
gpu_id: 0
train:
num_workers: 2 # If your cpu and gpu are both very strong, set to 0 may be faster!
amp_dtype: fp32 # fp32, fp16 or bf16 (fp16 or bf16 may be faster if it is supported by your gpu)
batch_size: 48
cache_all_data: true # Save Internal-Memory or Graphics-Memory if it is false, but may be slow
cache_device: 'cpu' # Set to 'cuda' to cache the data into the Graphics-Memory, fastest speed for strong gpu
cache_fp16: true
epochs: 100000
interval_log: 10
interval_val: 2000
interval_force_save: 10000
lr: 0.0002
decay_step: 100000
gamma: 0.5
weight_decay: 0
save_opt: false
spk:
'nyaru': 0
8 changes: 4 additions & 4 deletions diffusion/data_loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,7 +51,7 @@ def traverse_dir(

def get_data_loaders(args, whole_audio=False):
data_train = AudioDataset(
args.data.train_path,
filelists_path = args.filelists_path,
waveform_sec=args.data.duration,
hop_size=args.data.block_size,
sample_rate=args.data.sampling_rate,
Expand All @@ -71,7 +71,7 @@ def get_data_loaders(args, whole_audio=False):
pin_memory=True if args.train.cache_device=='cpu' else False
)
data_valid = AudioDataset(
args.data.valid_path,
filelists_path = args.filelists_path,
waveform_sec=args.data.duration,
hop_size=args.data.block_size,
sample_rate=args.data.sampling_rate,
Expand All @@ -92,7 +92,7 @@ def get_data_loaders(args, whole_audio=False):
class AudioDataset(Dataset):
def __init__(
self,
path_root,
filelists,
waveform_sec,
hop_size,
sample_rate,
Expand All @@ -109,7 +109,7 @@ def __init__(
self.waveform_sec = waveform_sec
self.sample_rate = sample_rate
self.hop_size = hop_size
self.path_root = path_root
self.filelists = filelists
self.paths = traverse_dir(
os.path.join(path_root, 'audio'),
extensions=extensions,
Expand Down
Empty file added diffusion/logger/__init__.py
Empty file.
150 changes: 150 additions & 0 deletions diffusion/logger/saver.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,150 @@
'''
author: wayn391@mastertones
'''

import os
import json
import time
import yaml
import datetime
import torch
import matplotlib.pyplot as plt
from . import utils
from torch.utils.tensorboard import SummaryWriter

class Saver(object):
def __init__(
self,
args,
initial_global_step=-1):

self.expdir = args.env.expdir
self.sample_rate = args.data.sampling_rate

# cold start
self.global_step = initial_global_step
self.init_time = time.time()
self.last_time = time.time()

# makedirs
os.makedirs(self.expdir, exist_ok=True)

# path
self.path_log_info = os.path.join(self.expdir, 'log_info.txt')

# ckpt
os.makedirs(self.expdir, exist_ok=True)

# writer
self.writer = SummaryWriter(os.path.join(self.expdir, 'logs'))

# save config
path_config = os.path.join(self.expdir, 'config.yaml')
with open(path_config, "w") as out_config:
yaml.dump(dict(args), out_config)


def log_info(self, msg):
'''log method'''
if isinstance(msg, dict):
msg_list = []
for k, v in msg.items():
tmp_str = ''
if isinstance(v, int):
tmp_str = '{}: {:,}'.format(k, v)
else:
tmp_str = '{}: {}'.format(k, v)

msg_list.append(tmp_str)
msg_str = '\n'.join(msg_list)
else:
msg_str = msg

# dsplay
print(msg_str)

# save
with open(self.path_log_info, 'a') as fp:
fp.write(msg_str+'\n')

def log_value(self, dict):
for k, v in dict.items():
self.writer.add_scalar(k, v, self.global_step)

def log_spec(self, name, spec, spec_out, vmin=-14, vmax=3.5):
spec_cat = torch.cat([(spec_out - spec).abs() + vmin, spec, spec_out], -1)
spec = spec_cat[0]
if isinstance(spec, torch.Tensor):
spec = spec.cpu().numpy()
fig = plt.figure(figsize=(12, 9))
plt.pcolor(spec.T, vmin=vmin, vmax=vmax)
plt.tight_layout()
self.writer.add_figure(name, fig, self.global_step)

def log_audio(self, dict):
for k, v in dict.items():
self.writer.add_audio(k, v, global_step=self.global_step, sample_rate=self.sample_rate)

def get_interval_time(self, update=True):
cur_time = time.time()
time_interval = cur_time - self.last_time
if update:
self.last_time = cur_time
return time_interval

def get_total_time(self, to_str=True):
total_time = time.time() - self.init_time
if to_str:
total_time = str(datetime.timedelta(
seconds=total_time))[:-5]
return total_time

def save_model(
self,
model,
optimizer,
name='model',
postfix='',
to_json=False):
# path
if postfix:
postfix = '_' + postfix
path_pt = os.path.join(
self.expdir , name+postfix+'.pt')

# check
print(' [*] model checkpoint saved: {}'.format(path_pt))

# save
if optimizer is not None:
torch.save({
'global_step': self.global_step,
'model': model.state_dict(),
'optimizer': optimizer.state_dict()}, path_pt)
else:
torch.save({
'global_step': self.global_step,
'model': model.state_dict()}, path_pt)

# to json
if to_json:
path_json = os.path.join(
self.expdir , name+'.json')
utils.to_json(path_params, path_json)

def delete_model(self, name='model', postfix=''):
# path
if postfix:
postfix = '_' + postfix
path_pt = os.path.join(
self.expdir , name+postfix+'.pt')

# delete
if os.path.exists(path_pt):
os.remove(path_pt)
print(' [*] model checkpoint deleted: {}'.format(path_pt))

def global_step_increment(self):
self.global_step += 1


126 changes: 126 additions & 0 deletions diffusion/logger/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
import os
import yaml
import json
import pickle
import torch

def traverse_dir(
root_dir,
extensions,
amount=None,
str_include=None,
str_exclude=None,
is_pure=False,
is_sort=False,
is_ext=True):

file_list = []
cnt = 0
for root, _, files in os.walk(root_dir):
for file in files:
if any([file.endswith(f".{ext}") for ext in extensions]):
# path
mix_path = os.path.join(root, file)
pure_path = mix_path[len(root_dir)+1:] if is_pure else mix_path

# amount
if (amount is not None) and (cnt == amount):
if is_sort:
file_list.sort()
return file_list

# check string
if (str_include is not None) and (str_include not in pure_path):
continue
if (str_exclude is not None) and (str_exclude in pure_path):
continue

if not is_ext:
ext = pure_path.split('.')[-1]
pure_path = pure_path[:-(len(ext)+1)]
file_list.append(pure_path)
cnt += 1
if is_sort:
file_list.sort()
return file_list



class DotDict(dict):
def __getattr__(*args):
val = dict.get(*args)
return DotDict(val) if type(val) is dict else val

__setattr__ = dict.__setitem__
__delattr__ = dict.__delitem__


def get_network_paras_amount(model_dict):
info = dict()
for model_name, model in model_dict.items():
# all_params = sum(p.numel() for p in model.parameters())
trainable_params = sum(p.numel() for p in model.parameters() if p.requires_grad)

info[model_name] = trainable_params
return info


def load_config(path_config):
with open(path_config, "r") as config:
args = yaml.safe_load(config)
args = DotDict(args)
# print(args)
return args

def save_config(path_config,config):
config = dict(config)
with open(path_config, "w") as f:
yaml.dump(config, f)

def to_json(path_params, path_json):
params = torch.load(path_params, map_location=torch.device('cpu'))
raw_state_dict = {}
for k, v in params.items():
val = v.flatten().numpy().tolist()
raw_state_dict[k] = val

with open(path_json, 'w') as outfile:
json.dump(raw_state_dict, outfile,indent= "\t")


def convert_tensor_to_numpy(tensor, is_squeeze=True):
if is_squeeze:
tensor = tensor.squeeze()
if tensor.requires_grad:
tensor = tensor.detach()
if tensor.is_cuda:
tensor = tensor.cpu()
return tensor.numpy()


def load_model(
expdir,
model,
optimizer,
name='model',
postfix='',
device='cpu'):
if postfix == '':
postfix = '_' + postfix
path = os.path.join(expdir, name+postfix)
path_pt = traverse_dir(expdir, ['pt'], is_ext=False)
global_step = 0
if len(path_pt) > 0:
steps = [s[len(path):] for s in path_pt]
maxstep = max([int(s) if s.isdigit() else 0 for s in steps])
if maxstep >= 0:
path_pt = path+str(maxstep)+'.pt'
else:
path_pt = path+'best.pt'
print(' [*] restoring model from', path_pt)
ckpt = torch.load(path_pt, map_location=torch.device(device))
global_step = ckpt['global_step']
model.load_state_dict(ckpt['model'], strict=False)
if ckpt.get('optimizer') != None:
optimizer.load_state_dict(ckpt['optimizer'])
return global_step, model, optimizer
Loading

0 comments on commit 76e0615

Please sign in to comment.