Skip to content

Commit f1f18d0

Browse files
authored
Merge pull request #60 from KCCG/feature/MITY-40-custom-reference-file
Merge custom reference file updates to development
2 parents 9d79036 + 1704b53 commit f1f18d0

File tree

3 files changed

+84
-60
lines changed

3 files changed

+84
-60
lines changed

mitylib/call.py

+13-5
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@
44
import logging
55
import os.path
66
import sys
7+
from typing import Any, Optional, Tuple
78
import urllib.request
89

910
import pysam
@@ -241,7 +242,9 @@ def bam_has_rg(self, bam):
241242
r = pysam.AlignmentFile(bam, "rb")
242243
return len(r.header["RG"]) > 0
243244

244-
def bam_get_mt_contig(self, bam: str, as_string: bool = False):
245+
def bam_get_mt_contig(
246+
self, bam: str, as_string: bool = False
247+
) -> Optional[Tuple[Any, Any] | str]:
245248
"""
246249
Retrieve mitochondrial contig information from a BAM or CRAM file.
247250
@@ -259,18 +262,23 @@ def bam_get_mt_contig(self, bam: str, as_string: bool = False):
259262
"""
260263
r = pysam.AlignmentFile(bam, "rb")
261264
chroms = [str(record.get("SN")) for record in r.header["SQ"]]
262-
mito_contig = {"MT", "chrM"}.intersection(chroms)
263-
assert len(mito_contig) == 1
264-
mito_contig = "".join(mito_contig)
265+
mito_contig_intersection = {"MT", "chrM"}.intersection(chroms)
266+
267+
assert len(mito_contig_intersection) == 1
268+
mito_contig = "".join(mito_contig_intersection)
269+
265270
res = None
271+
266272
for record in r.header["SQ"]:
267273
if mito_contig == record["SN"]:
268274
res = record["SN"], record["LN"]
275+
269276
if res is not None and as_string:
270277
res = res[0] + ":1-" + str(res[1])
278+
271279
return res
272280

273-
def make_prefix(self, file_name: str, prefix: str = None):
281+
def make_prefix(self, file_name: str, prefix: Optional[str] = None):
274282
"""
275283
Generate a prefix for Mity functions if a custom prefix is not provided,
276284
the function uses the filename without the file extension (.vcf, .bam, .cram, .bed).

mitylib/report.py

+6-3
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
import logging
66
import os.path
77
import subprocess
8-
from typing import Optional
8+
from typing import Dict, Optional
99
import pysam
1010
import pandas
1111
import yaml
@@ -192,10 +192,10 @@ def __init__(
192192
self.excel_headers = opened_report_config["excel_headers"]
193193
self.vcf_headers = opened_report_config["vcf_headers"]
194194

195-
self.excel_table = {}
195+
self.excel_table: Dict[str, list[str]] = {}
196196
self.df = None
197197

198-
self.vep = None
198+
self.vep: Optional[Vep] = None
199199

200200
self.run()
201201

@@ -433,6 +433,9 @@ def add_vep_impacts(self, variant, cohort_count: int) -> None:
433433
"""
434434
Adds vep impacts for a variant.
435435
"""
436+
if not self.vep:
437+
raise RuntimeError("Vep not initialised, something likely went wrong with vcfanno")
438+
436439
variant_impacts = self.vep.get_vep_values(variant)
437440
for _ in range(cohort_count):
438441
for vep_header in self.vep.vep_excel_headers:

mitylib/util.py

+65-52
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,13 @@
11
"""
22
Contains utility functions for mity modules.
33
"""
4+
45
import logging
56
import os
67
import subprocess
78
import sys
89
from glob import glob
10+
from typing import Optional, Tuple
911
import pysam
1012

1113

@@ -15,38 +17,38 @@ class MityUtil:
1517
"""
1618

1719
MITY_DIR = "mitylib"
18-
GENOME_FILE = "mitylib/reference/b37d5.genome"
1920
REF_DIR = "reference"
2021
ANNOT_DIR = "annot"
2122

22-
@classmethod
23-
def get_mity_dir(cls):
23+
@staticmethod
24+
def get_mity_dir():
2425
"""
2526
Get the directory path of the Mity library.
2627
2728
Returns:
2829
str: The path to the Mity library directory.
2930
"""
30-
path = os.path.dirname(sys.modules["mitylib"].__file__)
31-
return path
31+
return os.path.dirname(sys.modules["mitylib"].__file__)
3232

33-
@classmethod
34-
def tabix(cls, f: str):
33+
@staticmethod
34+
def tabix(bgzipped_file: str) -> None:
3535
"""
3636
Generate a tabix index for a bgzipped file.
3737
3838
Parameters:
39-
f (str): The path to a bgzip compressed file.
39+
bgzipped_file (str): The path to a bgzip compressed file.
4040
4141
Returns:
4242
None
4343
"""
44-
tabix_call = "tabix -f " + f
44+
tabix_call = "tabix -f " + bgzipped_file
4545
logging.debug(tabix_call)
4646
subprocess.run(tabix_call, shell=True, check=False)
4747

48-
@classmethod
49-
def select_reference_fasta(cls, reference: str, custom_reference_fa: str = None):
48+
@staticmethod
49+
def select_reference_fasta(
50+
reference: str, custom_reference_fa: Optional[str] = None
51+
) -> str:
5052
"""
5153
Select the reference genome fasta file.
5254
@@ -57,45 +59,50 @@ def select_reference_fasta(cls, reference: str, custom_reference_fa: str = None)
5759
Returns:
5860
str: The path to the selected reference genome fasta file.
5961
"""
60-
if custom_reference_fa is not None and os.path.exists(custom_reference_fa):
61-
res = custom_reference_fa
62-
else:
63-
ref_dir = os.path.join(cls.get_mity_dir(), cls.REF_DIR)
64-
res = glob(f"{ref_dir}/{reference}.*.fa")
65-
logging.debug(",".join(res))
66-
assert len(res) == 1
67-
res = res[0]
68-
return res
69-
70-
@classmethod
62+
if custom_reference_fa is not None:
63+
if not os.path.exists(custom_reference_fa):
64+
raise FileNotFoundError(
65+
f"--custom-reference-fasta file: {custom_reference_fa} cannot be found."
66+
)
67+
return custom_reference_fa
68+
69+
ref_dir = os.path.join(MityUtil.get_mity_dir(), MityUtil.REF_DIR)
70+
res = glob(f"{ref_dir}/{reference}.*.fa")
71+
logging.debug(",".join(res))
72+
assert len(res) == 1
73+
74+
return res[0]
75+
76+
@staticmethod
7177
def select_reference_genome(
72-
cls, reference: str, custom_reference_genome: str = None
73-
):
78+
reference: str, custom_reference_genome: Optional[str] = None
79+
) -> str:
7480
"""
7581
Select the reference genome .genome file.
7682
7783
Parameters:
7884
reference (str): One of the inbuilt reference genomes: hs37d5, hg19, hg38, mm10.
79-
custom_reference_genome (str, optional): The path to a custom reference .genome file, or None.
85+
custom_reference_genome: The path to a custom reference .genome file, or None.
8086
8187
Returns:
8288
str: The path to the selected reference .genome file.
8389
"""
84-
if custom_reference_genome is not None and os.path.exists(
85-
custom_reference_genome
86-
):
87-
res = custom_reference_genome
88-
else:
89-
ref_dir = os.path.join(cls.get_mity_dir(), cls.REF_DIR)
90-
logging.debug("Looking for .genome file in %s", ref_dir)
91-
res = glob(f"{ref_dir}/{reference}.genome")
92-
logging.debug(",".join(res))
93-
assert len(res) == 1
94-
res = res[0]
95-
return res
96-
97-
@classmethod
98-
def vcf_get_mt_contig(cls, vcf: str):
90+
if custom_reference_genome is not None:
91+
if not os.path.exists(custom_reference_genome):
92+
raise FileNotFoundError(
93+
f"--custom-reference-genome file: {custom_reference_genome} cannot be found."
94+
)
95+
return custom_reference_genome
96+
97+
ref_dir = os.path.join(MityUtil.get_mity_dir(), MityUtil.REF_DIR)
98+
logging.debug("Looking for .genome file in %s", ref_dir)
99+
res = glob(f"{ref_dir}/{reference}.genome")
100+
logging.debug(",".join(res))
101+
assert len(res) == 1
102+
return res[0]
103+
104+
@staticmethod
105+
def vcf_get_mt_contig(vcf: str) -> Tuple[str, Optional[int]]:
99106
"""
100107
Get the mitochondrial contig name and length from a VCF file.
101108
@@ -107,13 +114,19 @@ def vcf_get_mt_contig(cls, vcf: str):
107114
"""
108115
r = pysam.VariantFile(vcf, "r")
109116
chroms = r.header.contigs
110-
mito_contig = set(["MT", "chrM"]).intersection(chroms)
111-
assert len(mito_contig) == 1
112-
mito_contig = "".join(mito_contig)
113-
return r.header.contigs[mito_contig].name, r.header.contigs[mito_contig].length
117+
mito_contig_intersection = set(["MT", "chrM"]).intersection(chroms)
118+
119+
assert len(mito_contig_intersection) == 1
120+
121+
mito_contig = "".join(mito_contig_intersection)
122+
123+
mt_contig_name = r.header.contigs[mito_contig].name
124+
mt_contig_length = r.header.contigs[mito_contig].length
125+
126+
return (mt_contig_name, mt_contig_length)
114127

115-
@classmethod
116-
def get_annot_file(cls, annotation_file_path: str):
128+
@staticmethod
129+
def get_annot_file(annotation_file_path: str):
117130
"""
118131
Get the path to an annotation file.
119132
@@ -123,13 +136,13 @@ def get_annot_file(cls, annotation_file_path: str):
123136
Returns:
124137
str: The path to the annotation file.
125138
"""
126-
mitylibdir = cls.get_mity_dir()
127-
path = os.path.join(mitylibdir, cls.ANNOT_DIR, annotation_file_path)
139+
mitylibdir = MityUtil.get_mity_dir()
140+
path = os.path.join(mitylibdir, MityUtil.ANNOT_DIR, annotation_file_path)
128141
assert os.path.exists(path)
129142
return path
130143

131-
@classmethod
132-
def make_prefix(cls, vcf_path: str):
144+
@staticmethod
145+
def make_prefix(vcf_path: str):
133146
"""
134147
Make a prefix based on the input vcf path. This handles vcf files from
135148
previous steps of mity. e.g. from call to normalise, etc.
@@ -153,8 +166,8 @@ def make_prefix(cls, vcf_path: str):
153166

154167
return prefix
155168

156-
@classmethod
157-
def gsort(cls, input_path: str, output_path: str, genome: str):
169+
@staticmethod
170+
def gsort(input_path: str, output_path: str, genome: str):
158171
"""
159172
Run gsort.
160173
"""

0 commit comments

Comments
 (0)