25/10

TaoRuijie · Oct 25, 2021 · ab8c187 · ab8c187
1 parent 818aeed
commit ab8c187
Show file tree

Hide file tree

Showing 7 changed files with 38 additions and 28 deletions.
diff --git a/ECAPAModel.py b/ECAPAModel.py
@@ -1,3 +1,7 @@
+'''
+This part is used to train the speaker model and evaluate the performances
+'''
+
 import torch, sys, os, tqdm, numpy, soundfile, time, pickle
 import torch.nn as nn
 from tools import *

diff --git a/README.md b/README.md
@@ -1,20 +1,18 @@
-## ECAPA_TDNN_Trainer
+## Introduction
 
-This repository contains unofficial code to train the speaker recognition model (Standard ECAPA-TDNN) in VoxCeleb2 dataset.
+This repository contains unofficial code to train the [standard ECAPA-TDNN](https://arxiv.org/pdf/2005.07143.pdf) for speaker recognition in VoxCeleb2 dataset.
 
 This repository is modified based on [voxceleb_trainer](https://github.com/clovaai/voxceleb_trainer)
 
 Advantages of this repository: 
 
 1) Simple: 6 python files with 600 lines.
 
-2) Fast:  36 hours to train in one 3090 GPU 
+2) Fast:  36 hours to train by one 3090 GPU 
 
 3) Robust: EER = 0.86 in VoxCeleb1
 
-## Best Performance in this project
-
-With AS-norm
+## Best Performance in this project (with AS-norm)
 
 | Dataset |  Vox1_O  |  Vox1_E  |  Vox1_H  |
 | ------- |  ------  |  ------  |  ------  |
@@ -23,11 +21,10 @@ With AS-norm
 
 ***
 
-### System Description
+## System Description
 
 I will write a technique report about this system and all the details later. Please wait.
 
-
 ### Dependencies
 
 Start from building the environment
@@ -42,17 +39,13 @@ Start from the existing environment
 pip install -r requirements.txt
 ```
 
-***
-
 ### Data preparation
 
 Please follow the official coda to perpare your VoxCeleb2 dataset from [here](https://github.com/clovaai/voxceleb_trainer), the 'Data preparation' part. Please read that part carefully and make sure you do it correctly.
 
-Dataset you need to perpare for training: VoxCeleb2 training set, MUSAN dataset, RIR dataset.
-
-Dataset you need to perpare for evaluation: VoxCeleb1 test set (Vox1_O, Compulsory), VoxCeleb1 train set (Vox1_E and Vox1_H, Optional).
+Dataset you need to perpare for training: 1) VoxCeleb2 training set, 2) MUSAN dataset, 3) RIR dataset.
 
-***
+Dataset you need to perpare for evaluation: 1) VoxCeleb1 test set (Vox1_O, Compulsory), 2) VoxCeleb1 train set (Vox1_E and Vox1_H, Optional).
 
 ### Training
 
@@ -64,8 +57,6 @@ python trainSpeakerNet.py --save_path exps/exp1
 
 In every `test_step` epoches, system will be evaluated in Vox1_O set and print the EER. The result will be saved in `exps/exp1/score.txt`, the model will saved in `exps/exp1/model`
 
-***
-
 ### Pretrained model
 
 Our pretrained model performs `EER: 0.96` in Vox1_O set without AS-norm, you can download our pretrain model you can check it by using: 
@@ -95,8 +86,6 @@ With AS-norm, this system performs `EER: 0.86`, we will release the code of AS-n
 }
 ```
 
-***
-
 ### Acknowledge
 
 We study many useful projects in our codeing process, which includes:

diff --git a/dataLoader.py b/dataLoader.py
@@ -1,3 +1,7 @@
+'''
+DataLoader for training
+'''
+
 import glob, numpy, os, random, soundfile, torch
 from scipy import signal
 

diff --git a/loss.py b/loss.py
@@ -1,3 +1,7 @@
+'''
+AAMsoftmax loss function copied from voxceleb_trainer: https://github.com/clovaai/voxceleb_trainer/blob/master/loss/aamsoftmax.py
+'''
+
 import torch, math
 import torch.nn as nn
 import torch.nn.functional as F

diff --git a/model.py b/model.py
@@ -1,11 +1,16 @@
+'''
+This is the ECAPA-TDNN model.
+This model is modified and combined based on the following three projects:
+  1. https://github.com/clovaai/voxceleb_trainer/issues/86
+  2. https://github.com/lawlict/ECAPA-TDNN/blob/master/ecapa_tdnn.py
+  3. https://github.com/speechbrain/speechbrain/blob/96077e9a1afff89d3f5ff47cab4bca0202770e4f/speechbrain/lobes/models/ECAPA_TDNN.py
+
+'''
+
 import math, torch, torchaudio
 import torch.nn as nn
 import torch.nn.functional as F
 
-# This model is modified and combined based on the following three projects:
-#   1. https://github.com/clovaai/voxceleb_trainer/issues/86
-#   2. https://github.com/lawlict/ECAPA-TDNN/blob/master/ecapa_tdnn.py
-#   3. https://github.com/speechbrain/speechbrain/blob/96077e9a1afff89d3f5ff47cab4bca0202770e4f/speechbrain/lobes/models/ECAPA_TDNN.py
 
 class SEModule(nn.Module):
     def __init__(self, channels, bottleneck=128):

diff --git a/tools.py b/tools.py
@@ -1,8 +1,11 @@
-import os, numpy
+'''
+Some utilized functions
+These functions are all copied from voxceleb_trainer: https://github.com/clovaai/voxceleb_trainer/blob/master/tuneThreshold.py
+'''
+
+import os, numpy, torch
 from sklearn import metrics
 from operator import itemgetter
-
-import torch
 import torch.nn.functional as F
 
 def init_args(args):
@@ -11,9 +14,6 @@ def init_args(args):
 	os.makedirs(args.model_save_path, exist_ok = True)
 	return args
 
-
-## The rest functions are all copied from voxceleb_trainer: https://github.com/clovaai/voxceleb_trainer/blob/master/tuneThreshold.py
-
 def tuneThresholdfromScore(scores, labels, target_fa, target_fr = None):
 
 	fpr, tpr, thresholds = metrics.roc_curve(labels, scores, pos_label=1)

diff --git a/trainECAPAModel.py b/trainECAPAModel.py
@@ -1,3 +1,7 @@
+'''
+This is the main code of the ECAPATDNN project, to define the parameters and build the construction
+'''
+
 import argparse, glob, os, torch, warnings, time
 from tools import *
 from dataLoader import train_loader