1
1
"""
2
2
Contains utility functions for mity modules.
3
3
"""
4
+
4
5
import logging
5
6
import os
6
7
import subprocess
7
8
import sys
8
9
from glob import glob
10
+ from typing import Optional , Tuple
9
11
import pysam
10
12
11
13
@@ -15,38 +17,38 @@ class MityUtil:
15
17
"""
16
18
17
19
MITY_DIR = "mitylib"
18
- GENOME_FILE = "mitylib/reference/b37d5.genome"
19
20
REF_DIR = "reference"
20
21
ANNOT_DIR = "annot"
21
22
22
- @classmethod
23
- def get_mity_dir (cls ):
23
+ @staticmethod
24
+ def get_mity_dir ():
24
25
"""
25
26
Get the directory path of the Mity library.
26
27
27
28
Returns:
28
29
str: The path to the Mity library directory.
29
30
"""
30
- path = os .path .dirname (sys .modules ["mitylib" ].__file__ )
31
- return path
31
+ return os .path .dirname (sys .modules ["mitylib" ].__file__ )
32
32
33
- @classmethod
34
- def tabix (cls , f : str ):
33
+ @staticmethod
34
+ def tabix (bgzipped_file : str ) -> None :
35
35
"""
36
36
Generate a tabix index for a bgzipped file.
37
37
38
38
Parameters:
39
- f (str): The path to a bgzip compressed file.
39
+ bgzipped_file (str): The path to a bgzip compressed file.
40
40
41
41
Returns:
42
42
None
43
43
"""
44
- tabix_call = "tabix -f " + f
44
+ tabix_call = "tabix -f " + bgzipped_file
45
45
logging .debug (tabix_call )
46
46
subprocess .run (tabix_call , shell = True , check = False )
47
47
48
- @classmethod
49
- def select_reference_fasta (cls , reference : str , custom_reference_fa : str = None ):
48
+ @staticmethod
49
+ def select_reference_fasta (
50
+ reference : str , custom_reference_fa : Optional [str ] = None
51
+ ) -> str :
50
52
"""
51
53
Select the reference genome fasta file.
52
54
@@ -57,45 +59,50 @@ def select_reference_fasta(cls, reference: str, custom_reference_fa: str = None)
57
59
Returns:
58
60
str: The path to the selected reference genome fasta file.
59
61
"""
60
- if custom_reference_fa is not None and os .path .exists (custom_reference_fa ):
61
- res = custom_reference_fa
62
- else :
63
- ref_dir = os .path .join (cls .get_mity_dir (), cls .REF_DIR )
64
- res = glob (f"{ ref_dir } /{ reference } .*.fa" )
65
- logging .debug ("," .join (res ))
66
- assert len (res ) == 1
67
- res = res [0 ]
68
- return res
69
-
70
- @classmethod
62
+ if custom_reference_fa is not None :
63
+ if not os .path .exists (custom_reference_fa ):
64
+ raise FileNotFoundError (
65
+ f"--custom-reference-fasta file: { custom_reference_fa } cannot be found."
66
+ )
67
+ return custom_reference_fa
68
+
69
+ ref_dir = os .path .join (MityUtil .get_mity_dir (), MityUtil .REF_DIR )
70
+ res = glob (f"{ ref_dir } /{ reference } .*.fa" )
71
+ logging .debug ("," .join (res ))
72
+ assert len (res ) == 1
73
+
74
+ return res [0 ]
75
+
76
+ @staticmethod
71
77
def select_reference_genome (
72
- cls , reference : str , custom_reference_genome : str = None
73
- ):
78
+ reference : str , custom_reference_genome : Optional [ str ] = None
79
+ ) -> str :
74
80
"""
75
81
Select the reference genome .genome file.
76
82
77
83
Parameters:
78
84
reference (str): One of the inbuilt reference genomes: hs37d5, hg19, hg38, mm10.
79
- custom_reference_genome (str, optional) : The path to a custom reference .genome file, or None.
85
+ custom_reference_genome: The path to a custom reference .genome file, or None.
80
86
81
87
Returns:
82
88
str: The path to the selected reference .genome file.
83
89
"""
84
- if custom_reference_genome is not None and os .path .exists (
85
- custom_reference_genome
86
- ):
87
- res = custom_reference_genome
88
- else :
89
- ref_dir = os .path .join (cls .get_mity_dir (), cls .REF_DIR )
90
- logging .debug ("Looking for .genome file in %s" , ref_dir )
91
- res = glob (f"{ ref_dir } /{ reference } .genome" )
92
- logging .debug ("," .join (res ))
93
- assert len (res ) == 1
94
- res = res [0 ]
95
- return res
96
-
97
- @classmethod
98
- def vcf_get_mt_contig (cls , vcf : str ):
90
+ if custom_reference_genome is not None :
91
+ if not os .path .exists (custom_reference_genome ):
92
+ raise FileNotFoundError (
93
+ f"--custom-reference-genome file: { custom_reference_genome } cannot be found."
94
+ )
95
+ return custom_reference_genome
96
+
97
+ ref_dir = os .path .join (MityUtil .get_mity_dir (), MityUtil .REF_DIR )
98
+ logging .debug ("Looking for .genome file in %s" , ref_dir )
99
+ res = glob (f"{ ref_dir } /{ reference } .genome" )
100
+ logging .debug ("," .join (res ))
101
+ assert len (res ) == 1
102
+ return res [0 ]
103
+
104
+ @staticmethod
105
+ def vcf_get_mt_contig (vcf : str ) -> Tuple [str , Optional [int ]]:
99
106
"""
100
107
Get the mitochondrial contig name and length from a VCF file.
101
108
@@ -107,13 +114,19 @@ def vcf_get_mt_contig(cls, vcf: str):
107
114
"""
108
115
r = pysam .VariantFile (vcf , "r" )
109
116
chroms = r .header .contigs
110
- mito_contig = set (["MT" , "chrM" ]).intersection (chroms )
111
- assert len (mito_contig ) == 1
112
- mito_contig = "" .join (mito_contig )
113
- return r .header .contigs [mito_contig ].name , r .header .contigs [mito_contig ].length
117
+ mito_contig_intersection = set (["MT" , "chrM" ]).intersection (chroms )
118
+
119
+ assert len (mito_contig_intersection ) == 1
120
+
121
+ mito_contig = "" .join (mito_contig_intersection )
122
+
123
+ mt_contig_name = r .header .contigs [mito_contig ].name
124
+ mt_contig_length = r .header .contigs [mito_contig ].length
125
+
126
+ return (mt_contig_name , mt_contig_length )
114
127
115
- @classmethod
116
- def get_annot_file (cls , annotation_file_path : str ):
128
+ @staticmethod
129
+ def get_annot_file (annotation_file_path : str ):
117
130
"""
118
131
Get the path to an annotation file.
119
132
@@ -123,13 +136,13 @@ def get_annot_file(cls, annotation_file_path: str):
123
136
Returns:
124
137
str: The path to the annotation file.
125
138
"""
126
- mitylibdir = cls .get_mity_dir ()
127
- path = os .path .join (mitylibdir , cls .ANNOT_DIR , annotation_file_path )
139
+ mitylibdir = MityUtil .get_mity_dir ()
140
+ path = os .path .join (mitylibdir , MityUtil .ANNOT_DIR , annotation_file_path )
128
141
assert os .path .exists (path )
129
142
return path
130
143
131
- @classmethod
132
- def make_prefix (cls , vcf_path : str ):
144
+ @staticmethod
145
+ def make_prefix (vcf_path : str ):
133
146
"""
134
147
Make a prefix based on the input vcf path. This handles vcf files from
135
148
previous steps of mity. e.g. from call to normalise, etc.
@@ -153,8 +166,8 @@ def make_prefix(cls, vcf_path: str):
153
166
154
167
return prefix
155
168
156
- @classmethod
157
- def gsort (cls , input_path : str , output_path : str , genome : str ):
169
+ @staticmethod
170
+ def gsort (input_path : str , output_path : str , genome : str ):
158
171
"""
159
172
Run gsort.
160
173
"""
0 commit comments