Skip to content

Commit

Permalink
Fix bugs with offsets table (#144)
Browse files Browse the repository at this point in the history
  • Loading branch information
standage authored Oct 13, 2023
1 parent 0deb427 commit c8faac7
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 5 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,12 @@
All notable changes to this project will be documented in this file.
This project adheres to [Semantic Versioning](http://semver.org/).

## [0.10.1] 2023-10-13

### Fixed
- Bug with offsets table (`marker --format=offsets`) when multiple markers are defined for a locus (#144).


## [0.10] 2023-09-15

### Added
Expand Down
5 changes: 4 additions & 1 deletion microhapdb/cli/marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,7 +92,10 @@ def display(
for locus in loci.values():
print(locus.fasta)
elif view_format == "offsets":
table = pd.concat([marker.definition for marker in markers])
loci = defaultdict(Locus)
for marker in markers:
loci[marker.locus].markers.append(marker)
table = pd.concat([locus.definition for locus in loci.values()])
table = table.rename(columns={"ChromOffset": f"OffsetHg38"})
table.to_csv(sys.stdout, sep="\t", index=False)
else:
Expand Down
14 changes: 12 additions & 2 deletions microhapdb/marker.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,7 +427,7 @@ def definition(self):
variants = list()
for offset, refr_offset in zip(self.target_offsets, self.offsets):
variants.append((self.name, offset, self.chrom, refr_offset))
return pd.DataFrame(variants, columns=["Marker", "Offset", "Chrom", f"ChromOffset"])
return pd.DataFrame(variants, columns=["Marker", "Offset", "Chrom", "ChromOffset"])

def global_to_local(self, coord):
start, end = self.target_interval
Expand All @@ -446,6 +446,16 @@ class Locus:
def __init__(self, markers=None):
self.markers = list() if markers is None else markers

@property
def definition(self):
variants = list()
for marker in sorted(self.markers, key=lambda m: m.name):
start, end = self.target_interval
offsets = [o - start for o in marker.offsets]
for offset, refr_offset in zip(offsets, marker.offsets):
variants.append((marker.name, offset, marker.chrom, refr_offset))
return pd.DataFrame(variants, columns=["Marker", "Offset", "Chrom", "ChromOffset"])

@property
def fasta(self):
out = StringIO()
Expand All @@ -463,7 +473,7 @@ def fasta(self):
@property
def defline(self):
parts = [self.name, f"GRCh38:{self.target_slug}"]
for marker in self.markers:
for marker in sorted(self.markers, key=lambda m: m.name):
start, end = self.target_interval
offsets = [o - start for o in marker.offsets]
varstring = ",".join(map(str, offsets))
Expand Down
91 changes: 91 additions & 0 deletions microhapdb/tests/data/multi2.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
Marker Offset Chrom OffsetHg38
mh01KK-212.v1 47 chr1 202647418
mh01KK-212.v1 80 chr1 202647451
mh01KK-212.v1 89 chr1 202647460
mh01KK-212.v1 90 chr1 202647461
mh01KK-212.v1 97 chr1 202647468
mh01KK-212.v1 116 chr1 202647487
mh01KK-212.v1 137 chr1 202647508
mh01KK-212.v1 176 chr1 202647547
mh01KK-212.v1 187 chr1 202647558
mh01KK-212.v1 198 chr1 202647569
mh01KK-212.v1 289 chr1 202647660
mh01KK-212.v2 47 chr1 202647418
mh01KK-212.v2 80 chr1 202647451
mh01KK-212.v2 89 chr1 202647460
mh01KK-212.v2 90 chr1 202647461
mh01KK-212.v2 91 chr1 202647462
mh01KK-212.v2 96 chr1 202647467
mh01KK-212.v2 97 chr1 202647468
mh01KK-212.v2 116 chr1 202647487
mh01KK-212.v2 137 chr1 202647508
mh01KK-212.v2 155 chr1 202647526
mh01KK-212.v2 156 chr1 202647527
mh01KK-212.v2 176 chr1 202647547
mh01KK-212.v2 187 chr1 202647558
mh01KK-212.v2 197 chr1 202647568
mh01KK-212.v2 198 chr1 202647569
mh01KK-212.v2 265 chr1 202647636
mh01KK-212.v2 289 chr1 202647660
mh01KK-212.v3 47 chr1 202647418
mh01KK-212.v3 89 chr1 202647460
mh01KK-212.v3 90 chr1 202647461
mh01KK-212.v3 116 chr1 202647487
mh01KK-212.v3 137 chr1 202647508
mh01KK-212.v3 176 chr1 202647547
mh01KK-212.v3 187 chr1 202647558
mh01KK-212.v4 47 chr1 202647418
mh01KK-212.v4 89 chr1 202647460
mh01KK-212.v4 90 chr1 202647461
mh01KK-212.v4 116 chr1 202647487
mh01KK-212.v4 137 chr1 202647508
mh01KK-212.v4 176 chr1 202647547
mh01KK-212.v4 187 chr1 202647558
mh01KK-212.v4 289 chr1 202647660
mh01KK-212.v5 89 chr1 202647460
mh01KK-212.v5 90 chr1 202647461
mh01KK-212.v5 116 chr1 202647487
mh01KK-212.v5 137 chr1 202647508
mh01KK-212.v5 176 chr1 202647547
mh01KK-212.v5 187 chr1 202647558
mh01KK-212.v6 20 chr1 202647391
mh01KK-212.v6 47 chr1 202647418
mh01KK-212.v6 89 chr1 202647460
mh01KK-212.v6 116 chr1 202647487
mh01KK-212.v6 137 chr1 202647508
mh01KK-212.v6 176 chr1 202647547
mh01KK-212.v6 187 chr1 202647558
mh01KK-212.v6 289 chr1 202647660
mh05KK-170.v1 134 chr5 2447909
mh05KK-170.v1 162 chr5 2447937
mh05KK-170.v1 256 chr5 2448031
mh05KK-170.v1 270 chr5 2448045
mh05KK-170.v2 20 chr5 2447795
mh05KK-170.v2 34 chr5 2447809
mh05KK-170.v2 38 chr5 2447813
mh05KK-170.v2 46 chr5 2447821
mh05KK-170.v2 48 chr5 2447823
mh05KK-170.v2 56 chr5 2447831
mh05KK-170.v2 94 chr5 2447869
mh05KK-170.v2 116 chr5 2447891
mh05KK-170.v2 134 chr5 2447909
mh05KK-170.v2 162 chr5 2447937
mh05KK-170.v2 256 chr5 2448031
mh05KK-170.v2 270 chr5 2448045
mh05KK-170.v2 275 chr5 2448050
mh05KK-170.v3 20 chr5 2447795
mh05KK-170.v3 34 chr5 2447809
mh05KK-170.v3 38 chr5 2447813
mh05KK-170.v3 46 chr5 2447821
mh05KK-170.v3 48 chr5 2447823
mh05KK-170.v3 56 chr5 2447831
mh05KK-170.v3 94 chr5 2447869
mh05KK-170.v3 100 chr5 2447875
mh05KK-170.v3 116 chr5 2447891
mh05KK-170.v3 134 chr5 2447909
mh05KK-170.v3 162 chr5 2447937
mh05KK-170.v3 256 chr5 2448031
mh05KK-170.v3 270 chr5 2448045
mh05KK-170.v3 275 chr5 2448050
mh05KK-170.v4 256 chr5 2448031
mh05KK-170.v4 270 chr5 2448045
16 changes: 14 additions & 2 deletions microhapdb/tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -714,14 +714,26 @@ def test_cli_fasta_locus_multimarker(capsys):
terminal = capsys.readouterr()
observed = terminal.out
expected = """
>mh02KK-134 GRCh38:chr2:160222879-160223013 mh02KK-134.v4=20,44,59 mh02KK-134.v1=20,44,59,123 mh02KK-134.v2=20,44,59,65,107,123 mh02KK-134.v3=20,44,59,65,72,87,107,123
>mh02KK-134 GRCh38:chr2:160222879-160223013 mh02KK-134.v1=20,44,59,123 mh02KK-134.v2=20,44,59,65,107,123 mh02KK-134.v3=20,44,59,65,72,87,107,123 mh02KK-134.v4=20,44,59
TACCCTTGGCAGGAACCCTCACTACCTAAGGATGGGCAATGGCTTATGAGTGAGAAACACGGAGCCGTGGGAACTCAGAA
TGACATGCTACCTGGAGATTGTGGTAACGCCCTGTTTTTTTGTGGGCATATCTA
>mh14SHY-003 GRCh38:chr14:57983921-57984213 mh14SHY-003.v1=10,14,16,26,102,108,109,161,192,199 mh14SHY-003.v3=10,14,102,108,199,262,281 mh14SHY-003.v4=102,108,199 mh14SHY-003.v2=102,108,199,262,281
>mh14SHY-003 GRCh38:chr14:57983921-57984213 mh14SHY-003.v1=10,14,16,26,102,108,109,161,192,199 mh14SHY-003.v2=102,108,199,262,281 mh14SHY-003.v3=10,14,102,108,199,262,281 mh14SHY-003.v4=102,108,199
GTAGGAGTGATGTACGGGGCACCTACTTGGGGTTCACATGCTGGCCCCTTTATTGAGTTCATTCTGAATCCAGAAGCTTG
GCAGAGTTCAGCCAGATGGCAGGGTGAGCGCCCTGCCTTCCTGGTAGTCTCTTCTTCTGCAAGGGAATAGGAGGCGTTCA
CCCTCCTTTGTTCAAGAGTCTATTTCTAGGGGCCTATCAGCCCAGGGTCCCTTCTCCAGCTTTCTCAGGAGGCCCCACAT
CATCAGGCAATTAGCTCTCTAGTGGGTATAACTGCTACTGCCACAACCACTG
"""
print(observed)
assert observed.strip() == expected.strip()


def test_cli_offsets_locus_multimarker(capsys):
arglist = ["marker", "mh01KK-212", "mh05KK-170", "--delta=20", "--format=offsets"]
args = get_parser().parse_args(arglist)
microhapdb.cli.main(args)
terminal = capsys.readouterr()
observed = terminal.out
with open(data_file("multi2.tsv"), "r") as fh:
expected = fh.read()
print(observed)
assert observed.strip() == expected.strip()

0 comments on commit c8faac7

Please sign in to comment.