Skip to content

Commit bcaf65b

Browse files
committed
black with 120 char line-length
Fix #118
1 parent f82d114 commit bcaf65b

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

47 files changed

+406
-910
lines changed

.github/workflows/black.yml

+2-2
Original file line numberDiff line numberDiff line change
@@ -15,5 +15,5 @@ jobs:
1515
with:
1616
python-version: 3.8
1717
- run: pip install black==22.3.0
18-
- run: black --diff common/
19-
- run: black --check common/
18+
- run: black --diff .
19+
- run: black --check .

common/baselines/librispeech/data.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,7 @@ def get_corpus_data_inputs(
3939
"""
4040

4141
# Dictionary containing all LibriSpeech CorpusObject entries
42-
corpus_object_dict = get_corpus_object_dict(
43-
audio_format="wav", output_prefix="corpora"
44-
)
42+
corpus_object_dict = get_corpus_object_dict(audio_format="wav", output_prefix="corpora")
4543

4644
# Definition of the official 4-gram LM to be used as default LM
4745
lm = {

common/baselines/librispeech/default_tools.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -16,9 +16,7 @@
1616
# RASR_BINARY_PATH = None
1717
# RASR_BINARY_PATH = compile_rasr_binaries_i6mode(commit="907eec4f4e36c11153f6ab6b5dd7675116f909f6") # use tested RASR
1818
RASR_BINARY_PATH = compile_rasr_binaries_i6mode() # use most recent RASR
19-
assert (
20-
RASR_BINARY_PATH
21-
), "Please set a specific RASR_BINARY_PATH before running the pipeline"
19+
assert RASR_BINARY_PATH, "Please set a specific RASR_BINARY_PATH before running the pipeline"
2220
RASR_BINARY_PATH.hash_overwrite = "LIBRISPEECH_DEFAULT_RASR_BINARY_PATH"
2321

2422

common/baselines/librispeech/ls100/gmm/baseline_args.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -139,9 +139,7 @@ def get_monophone_args():
139139
"use_gpu": False,
140140
}
141141

142-
return util.GmmMonophoneArgs(
143-
linear_alignment_args, monophone_training_args, monophone_recognition_args
144-
)
142+
return util.GmmMonophoneArgs(linear_alignment_args, monophone_training_args, monophone_recognition_args)
145143

146144

147145
def get_cart_args(
@@ -163,9 +161,7 @@ def get_cart_args(
163161
:return:
164162
"""
165163

166-
CartQuestions = (
167-
CartQuestionsWithStress if use_stress_marker else CartQuestionsWithoutStress
168-
)
164+
CartQuestions = CartQuestionsWithStress if use_stress_marker else CartQuestionsWithoutStress
169165

170166
cart_questions_class = CartQuestions(
171167
max_leaves=max_leaves,

common/baselines/librispeech/ls100/gmm/baseline_config.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,7 @@ def run_librispeech_100_common_baseline(
4545
steps.add_step("vtln+sat", vtln_sat_args)
4646
steps.add_step("output", final_output_args)
4747

48-
corpus_data = get_corpus_data_inputs(
49-
corpus_key="train-clean-100", use_g2p_training=True, use_stress_marker=False
50-
)
48+
corpus_data = get_corpus_data_inputs(corpus_key="train-clean-100", use_g2p_training=True, use_stress_marker=False)
5149

5250
system = gmm_system.GmmSystem(rasr_binary_path=RASR_BINARY_PATH)
5351
system.init_system(

common/baselines/librispeech/ls960/gmm/baseline_args.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -143,9 +143,7 @@ def get_monophone_args(feature_flow: str = "mfcc+deriv+norm") -> util.GmmMonopho
143143
"use_gpu": False,
144144
}
145145

146-
return util.GmmMonophoneArgs(
147-
linear_alignment_args, monophone_training_args, monophone_recognition_args
148-
)
146+
return util.GmmMonophoneArgs(linear_alignment_args, monophone_training_args, monophone_recognition_args)
149147

150148

151149
def get_cart_args(
@@ -168,9 +166,7 @@ def get_cart_args(
168166
:param add_unknown: set to true if an unknown phoneme exists
169167
"""
170168

171-
CartQuestions = (
172-
CartQuestionsWithStress if use_stress_marker else CartQuestionsWithoutStress
173-
)
169+
CartQuestions = CartQuestionsWithStress if use_stress_marker else CartQuestionsWithoutStress
174170

175171
cart_questions_class = CartQuestions(
176172
max_leaves=max_leaves,

common/baselines/librispeech/ls960/gmm/baseline_config.py

+1-3
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,7 @@ def run_librispeech_960_common_baseline(
5353

5454
# ******************** Data ********************
5555

56-
corpus_data = get_corpus_data_inputs(
57-
corpus_key="train-other-960", use_g2p_training=True, use_stress_marker=False
58-
)
56+
corpus_data = get_corpus_data_inputs(corpus_key="train-other-960", use_g2p_training=True, use_stress_marker=False)
5957

6058
# ******************** GMM System ********************
6159

common/baselines/librispeech/report.py

+2-6
Original file line numberDiff line numberDiff line change
@@ -78,14 +78,10 @@ def gmm_example_report_format(report: _Report_Type) -> str:
7878
+ str(results["dev-other"]["VTLN"]["10"])
7979
)
8080
out.append(
81-
"SAT 08".ljust(23)
82-
+ str(results["dev-clean"]["SAT"]["08"]).ljust(14)
83-
+ str(results["dev-other"]["SAT"]["08"])
81+
"SAT 08".ljust(23) + str(results["dev-clean"]["SAT"]["08"]).ljust(14) + str(results["dev-other"]["SAT"]["08"])
8482
)
8583
out.append(
86-
"SAT 10".ljust(23)
87-
+ str(results["dev-clean"]["SAT"]["10"]).ljust(14)
88-
+ str(results["dev-other"]["SAT"]["10"])
84+
"SAT 10".ljust(23) + str(results["dev-clean"]["SAT"]["10"]).ljust(14) + str(results["dev-other"]["SAT"]["10"])
8985
)
9086
out.append(
9187
"VTLN+SAT 08".ljust(17)

common/datasets/librispeech/cart.py

+5-15
Original file line numberDiff line numberDiff line change
@@ -139,16 +139,12 @@ def __init__(
139139
},
140140
{
141141
"type": "for-each-key",
142-
"keys": (" ").join(
143-
"history[0] central future[0]".split(" ")[:n_phones]
144-
),
142+
"keys": (" ").join("history[0] central future[0]".split(" ")[:n_phones]),
145143
"questions": [
146144
{
147145
"type": "for-each-value",
148146
"values": self.phonemes_boundary_str,
149-
"questions": [
150-
{"type": "question", "description": "context-phone"}
151-
],
147+
"questions": [{"type": "question", "description": "context-phone"}],
152148
},
153149
{
154150
"type": "question",
@@ -374,9 +370,7 @@ def __init__(
374370

375371

376372
class CartQuestionsWithStress:
377-
def __init__(
378-
self, max_leaves=12001, min_obs=1000, add_unknown: bool = True, n_phones=3
379-
):
373+
def __init__(self, max_leaves=12001, min_obs=1000, add_unknown: bool = True, n_phones=3):
380374
self.max_leaves = max_leaves
381375
self.min_obs = min_obs
382376
self.boundary = "#"
@@ -528,16 +522,12 @@ def __init__(
528522
},
529523
{
530524
"type": "for-each-key",
531-
"keys": (" ").join(
532-
"history[0] central future[0]".split(" ")[:n_phones]
533-
),
525+
"keys": (" ").join("history[0] central future[0]".split(" ")[:n_phones]),
534526
"questions": [
535527
{
536528
"type": "for-each-value",
537529
"values": self.phonemes_boundary_str,
538-
"questions": [
539-
{"type": "question", "description": "context-phone"}
540-
],
530+
"questions": [{"type": "question", "description": "context-phone"}],
541531
},
542532
{
543533
"type": "question",

common/datasets/librispeech/constants.py

+6-18
Original file line numberDiff line numberDiff line change
@@ -30,23 +30,11 @@
3030
"test-other": 10,
3131
}
3232

33-
durations["train-clean-460"] = (
34-
durations["train-clean-100"] + durations["train-clean-360"]
35-
)
36-
durations["train-other-960"] = (
37-
durations["train-clean-460"] + durations["train-other-500"]
38-
)
33+
durations["train-clean-460"] = durations["train-clean-100"] + durations["train-clean-360"]
34+
durations["train-other-960"] = durations["train-clean-460"] + durations["train-other-500"]
3935

40-
num_segments["train-clean-460"] = (
41-
num_segments["train-clean-100"] + num_segments["train-clean-360"]
42-
)
43-
num_segments["train-other-960"] = (
44-
num_segments["train-clean-460"] + num_segments["train-other-500"]
45-
)
36+
num_segments["train-clean-460"] = num_segments["train-clean-100"] + num_segments["train-clean-360"]
37+
num_segments["train-other-960"] = num_segments["train-clean-460"] + num_segments["train-other-500"]
4638

47-
concurrent["train-clean-460"] = (
48-
concurrent["train-clean-100"] + concurrent["train-clean-360"]
49-
)
50-
concurrent["train-other-960"] = (
51-
concurrent["train-clean-460"] + concurrent["train-other-500"]
52-
)
39+
concurrent["train-clean-460"] = concurrent["train-clean-100"] + concurrent["train-clean-360"]
40+
concurrent["train-other-960"] = concurrent["train-clean-460"] + concurrent["train-other-500"]

common/datasets/librispeech/corpus.py

+10-30
Original file line numberDiff line numberDiff line change
@@ -39,22 +39,16 @@ def get_bliss_corpus_dict(audio_format="flac", output_prefix="datasets"):
3939
output_prefix = os.path.join(output_prefix, "LibriSpeech")
4040

4141
download_metadata_job = DownloadLibriSpeechMetadataJob()
42-
download_metadata_job.add_alias(
43-
os.path.join(output_prefix, "download", "metadata_job")
44-
)
42+
download_metadata_job.add_alias(os.path.join(output_prefix, "download", "metadata_job"))
4543

4644
def _get_corpus(corpus_name):
4745
download_corpus_job = DownloadLibriSpeechCorpusJob(corpus_key=corpus_name)
4846
create_bliss_corpus_job = LibriSpeechCreateBlissCorpusJob(
4947
corpus_folder=download_corpus_job.out_corpus_folder,
5048
speaker_metadata=download_metadata_job.out_speakers,
5149
)
52-
download_corpus_job.add_alias(
53-
os.path.join(output_prefix, "download", corpus_name)
54-
)
55-
create_bliss_corpus_job.add_alias(
56-
os.path.join(output_prefix, "create_bliss", corpus_name)
57-
)
50+
download_corpus_job.add_alias(os.path.join(output_prefix, "download", corpus_name))
51+
create_bliss_corpus_job.add_alias(os.path.join(output_prefix, "create_bliss", corpus_name))
5852
return create_bliss_corpus_job.out_corpus
5953

6054
corpus_names = [
@@ -67,9 +61,7 @@ def _get_corpus(corpus_name):
6761
"train-other-500",
6862
]
6963

70-
bliss_corpus_dict = {
71-
corpus_name: _get_corpus(corpus_name) for corpus_name in corpus_names
72-
}
64+
bliss_corpus_dict = {corpus_name: _get_corpus(corpus_name) for corpus_name in corpus_names}
7365

7466
audio_format_options = {
7567
"wav": {
@@ -94,19 +86,13 @@ def _get_corpus(corpus_name):
9486
corpus_name,
9587
)
9688
)
97-
converted_bliss_corpus_dict[
98-
corpus_name
99-
] = bliss_change_encoding_job.out_corpus
89+
converted_bliss_corpus_dict[corpus_name] = bliss_change_encoding_job.out_corpus
10090
else:
10191
converted_bliss_corpus_dict = bliss_corpus_dict
10292

10393
def _merge_corpora(corpora, name):
104-
merge_job = MergeCorporaJob(
105-
bliss_corpora=corpora, name=name, merge_strategy=MergeStrategy.FLAT
106-
)
107-
merge_job.add_alias(
108-
os.path.join(output_prefix, "%s_merge" % audio_format, name)
109-
)
94+
merge_job = MergeCorporaJob(bliss_corpora=corpora, name=name, merge_strategy=MergeStrategy.FLAT)
95+
merge_job.add_alias(os.path.join(output_prefix, "%s_merge" % audio_format, name))
11096
return merge_job.out_merged_corpus
11197

11298
converted_bliss_corpus_dict["train-clean-460"] = _merge_corpora(
@@ -151,9 +137,7 @@ def get_corpus_object_dict(audio_format="flac", output_prefix="datasets"):
151137
- 'train-other-960'
152138
:rtype: dict[str, CorpusObject]
153139
"""
154-
bliss_corpus_dict = get_bliss_corpus_dict(
155-
audio_format=audio_format, output_prefix=output_prefix
156-
)
140+
bliss_corpus_dict = get_bliss_corpus_dict(audio_format=audio_format, output_prefix=output_prefix)
157141

158142
corpus_object_dict = {}
159143

@@ -196,19 +180,15 @@ def get_ogg_zip_dict(
196180
from i6_core.returnn.oggzip import BlissToOggZipJob
197181

198182
ogg_zip_dict = {}
199-
bliss_corpus_dict = get_bliss_corpus_dict(
200-
audio_format="ogg", output_prefix=output_prefix
201-
)
183+
bliss_corpus_dict = get_bliss_corpus_dict(audio_format="ogg", output_prefix=output_prefix)
202184
for name, bliss_corpus in bliss_corpus_dict.items():
203185
ogg_zip_job = BlissToOggZipJob(
204186
bliss_corpus,
205187
no_conversion=True,
206188
returnn_python_exe=returnn_python_exe,
207189
returnn_root=returnn_root,
208190
)
209-
ogg_zip_job.add_alias(
210-
os.path.join(output_prefix, "LibriSpeech", "%s_ogg_zip_job" % name)
211-
)
191+
ogg_zip_job.add_alias(os.path.join(output_prefix, "LibriSpeech", "%s_ogg_zip_job" % name))
212192
ogg_zip_dict[name] = ogg_zip_job.out_ogg_zip
213193

214194
return ogg_zip_dict

common/datasets/librispeech/export.py

+10-30
Original file line numberDiff line numberDiff line change
@@ -15,23 +15,17 @@ def _export_datasets(output_prefix):
1515

1616
# export all bliss corpora
1717
for audio_format in ["flac", "ogg", "wav"]:
18-
bliss_corpus_dict = get_bliss_corpus_dict(
19-
audio_format=audio_format, output_prefix=output_prefix
20-
)
18+
bliss_corpus_dict = get_bliss_corpus_dict(audio_format=audio_format, output_prefix=output_prefix)
2119
for name, bliss_corpus in bliss_corpus_dict.items():
2220
tk.register_output(
23-
os.path.join(
24-
output_prefix, "LibriSpeech", "%s-%s.xml.gz" % (name, audio_format)
25-
),
21+
os.path.join(output_prefix, "LibriSpeech", "%s-%s.xml.gz" % (name, audio_format)),
2622
bliss_corpus,
2723
)
2824

2925
# export all ogg zip corpora
3026
ogg_corpus_dict = get_ogg_zip_dict(output_prefix=output_prefix)
3127
for name, ogg_corpus in ogg_corpus_dict.items():
32-
tk.register_output(
33-
os.path.join(output_prefix, "LibriSpeech", "%s.ogg.zip" % name), ogg_corpus
34-
)
28+
tk.register_output(os.path.join(output_prefix, "LibriSpeech", "%s.ogg.zip" % name), ogg_corpus)
3529

3630

3731
def _export_lm_data(output_prefix):
@@ -57,37 +51,27 @@ def _export_lexicon_and_vocab(output_prefix):
5751
lexicon_output_prefix = os.path.join(output_prefix, "LibriSpeech", "lexicon")
5852

5953
# folded / without stress marker
60-
bliss_lexicon = get_bliss_lexicon(
61-
output_prefix=output_prefix, use_stress_marker=False
62-
)
54+
bliss_lexicon = get_bliss_lexicon(output_prefix=output_prefix, use_stress_marker=False)
6355
tk.register_output(
6456
os.path.join(lexicon_output_prefix, "librispeech.lexicon.folded.xml.gz"),
6557
bliss_lexicon,
6658
)
6759

68-
g2p_lexicon_dict = get_g2p_augmented_bliss_lexicon_dict(
69-
use_stress_marker=True, output_prefix=output_prefix
70-
)
60+
g2p_lexicon_dict = get_g2p_augmented_bliss_lexicon_dict(use_stress_marker=True, output_prefix=output_prefix)
7161
for k, lexicon in g2p_lexicon_dict.items():
7262
tk.register_output(
73-
os.path.join(
74-
lexicon_output_prefix, "%s.lexicon_with_g2p.folded.xml.gz" % k
75-
),
63+
os.path.join(lexicon_output_prefix, "%s.lexicon_with_g2p.folded.xml.gz" % k),
7664
lexicon,
7765
)
7866

7967
# with stress marker
80-
bliss_lexicon = get_bliss_lexicon(
81-
output_prefix=output_prefix, use_stress_marker=True
82-
)
68+
bliss_lexicon = get_bliss_lexicon(output_prefix=output_prefix, use_stress_marker=True)
8369
tk.register_output(
8470
os.path.join(lexicon_output_prefix, "librispeech.lexicon.xml.gz"),
8571
bliss_lexicon,
8672
)
8773

88-
g2p_lexicon_dict = get_g2p_augmented_bliss_lexicon_dict(
89-
use_stress_marker=False, output_prefix=output_prefix
90-
)
74+
g2p_lexicon_dict = get_g2p_augmented_bliss_lexicon_dict(use_stress_marker=False, output_prefix=output_prefix)
9175
for k, lexicon in g2p_lexicon_dict.items():
9276
tk.register_output(
9377
os.path.join(lexicon_output_prefix, "%s.lexicon_with_g2p.xml.gz" % k),
@@ -102,12 +86,8 @@ def _export_legacy_bpe(output_prefix):
10286
:param str output_prefix
10387
"""
10488
lexicon_output_prefix = os.path.join(output_prefix, "LibriSpeech", "bpe")
105-
ls960_bpe_settings = get_subword_nmt_bpe(
106-
corpus_key="train-other-960", bpe_size=10000, output_prefix=output_prefix
107-
)
108-
ls100_bpe_settings = get_subword_nmt_bpe(
109-
corpus_key="train-clean-100", bpe_size=2000, output_prefix=output_prefix
110-
)
89+
ls960_bpe_settings = get_subword_nmt_bpe(corpus_key="train-other-960", bpe_size=10000, output_prefix=output_prefix)
90+
ls100_bpe_settings = get_subword_nmt_bpe(corpus_key="train-clean-100", bpe_size=2000, output_prefix=output_prefix)
11191
tk.register_output(
11292
os.path.join(lexicon_output_prefix, "train-other-960", "bpe_10k.codes"),
11393
ls960_bpe_settings.bpe_codes,

common/datasets/librispeech/language_model.py

+4-12
Original file line numberDiff line numberDiff line change
@@ -33,12 +33,8 @@ def get_arpa_lm_dict(output_prefix="datasets"):
3333
lm_dict["3gram"] = download_arpa_3gram_lm_job.out_file
3434

3535
lm_prefix = os.path.join(output_prefix, "LibriSpeech", "lm")
36-
download_arpa_3gram_lm_job.add_alias(
37-
os.path.join(lm_prefix, "download_3gram_lm_job")
38-
)
39-
download_arpa_4gram_lm_job.add_alias(
40-
os.path.join(lm_prefix, "download_4gram_lm_job")
41-
)
36+
download_arpa_3gram_lm_job.add_alias(os.path.join(lm_prefix, "download_3gram_lm_job"))
37+
download_arpa_4gram_lm_job.add_alias(os.path.join(lm_prefix, "download_4gram_lm_job"))
4238

4339
return lm_dict
4440

@@ -51,10 +47,6 @@ def get_librispeech_normalized_lm_data(output_prefix="datasets") -> tk.Path:
5147
:param output_prefix:
5248
:return: gzipped text file containing the LM training data
5349
"""
54-
download_job = DownloadJob(
55-
url="https://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz"
56-
)
57-
download_job.add_alias(
58-
os.path.join(output_prefix, "LibriSpeech", "lm", "download_lm_data")
59-
)
50+
download_job = DownloadJob(url="https://www.openslr.org/resources/11/librispeech-lm-norm.txt.gz")
51+
download_job.add_alias(os.path.join(output_prefix, "LibriSpeech", "lm", "download_lm_data"))
6052
return download_job.out_file

0 commit comments

Comments
 (0)