Skip to content

Commit

Permalink
Merge pull request #824 from uclahs-cds/czhu-fix-split-fasta
Browse files Browse the repository at this point in the history
VariantSourceSet  Comparison Fixed
  • Loading branch information
lydiayliu authored Nov 10, 2023
2 parents 93be448 + a03bbbe commit c3e0b66
Show file tree
Hide file tree
Showing 3 changed files with 27 additions and 12 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm

- Because of the updating to on-disk GTF, the coding transcripts were not generated and saved successfully. `filterFasta` is the only command affected.

- Updated `splitFasta` and `summarizeFasta` to accept source combinations in `--order-source`.

## [1.2.1] - 2023-10-05

### Add
Expand Down
4 changes: 0 additions & 4 deletions moPepGen/aa/VariantPeptideLabel.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,10 +69,6 @@ def __gt__(self, other:VariantSourceSet) -> bool:
""" greater than """
if self == other:
return False
if len(self) > len(other):
return True
if len(self) < len(other):
return False
this = self.to_int()
that = other.to_int()
if len(this) > len(that):
Expand Down
33 changes: 25 additions & 8 deletions test/unit/test_peptide_pool_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
'INDEL-2102-TTTT-T': 'gINDEL',
'INDEL-2103-TTTT-T': 'sINDEL',
'INDEL-2104-TTTT-T': 'sINDEL',
'CIRC-ENST0002-E1-E2': 'circRNA'
'CIRC-ENST0002-E1-E2': 'circRNA',
'SE-2100': 'altSplice'
},
'ENSG0003': {
'SNV-3001-T-A': 'gSNP',
Expand All @@ -61,7 +62,7 @@
}

SOURCE_ORDER = {'gSNP': 0, 'gINDEL': 1, 'sSNV': 2, 'sINDEL': 3, 'Fusion':4,
'circRNA': 5, 'Noncoding': 6}
'altSplice':5, 'circRNA': 6, 'Noncoding': 7}

ANNOTATION_ATTRS = [
[
Expand Down Expand Up @@ -208,6 +209,17 @@ def test_comparison(self):
set2 = VariantSourceSet(['sINDEL', 'circRNA'])
self.assertTrue(set1 > set2)

levels = copy.copy(SOURCE_ORDER)
levels.update({
'Noncoding': 5,
frozenset({'Noncoding', 'circRNA'}): 6,
'circRNA': 7
})
VariantSourceSet.set_levels(levels)
set1 = VariantSourceSet(['Noncoding', 'circRNA'])
set2 = VariantSourceSet(['circRNA'])
self.assertTrue(set1 < set2)

class TestVariantPeptideInfo(unittest.TestCase):
""" Test VariantPeptideInfo """
def test_from_variant_peptide(self):
Expand Down Expand Up @@ -282,7 +294,7 @@ def test_append_order_noncoding(self):
levels = copy.copy(SOURCE_ORDER)
splitter = PeptidePoolSplitter(order=levels)
splitter.append_order_internal_sources()
self.assertEqual(splitter.order['Noncoding'], 6)
self.assertEqual(splitter.order['Noncoding'], 7)

def test_load_gvf(self):
""" test loading gvf """
Expand Down Expand Up @@ -525,27 +537,32 @@ def test_split_database_fusion(self):
def test_split_database_source_comb_order(self):
""" Test split database with source order of combinations. """
anno = create_genomic_annotation(ANNOTATION_DATA)
anno.transcripts['ENST0005'] = copy.deepcopy(anno.transcripts['ENST0002'])
anno.transcripts['ENST0005'].is_protein_coding = False
tx2gene, coding_tx = get_tx2gene_and_coding_tx(anno)
peptides_data = [
[
'SSSSSSSR',
'ENST0001|SNV-1001-T-A|INDEL-1101-TTTT-T|1' +
' ENST0001|SNV-1003-T-A|INDEL-1104-TTTT-T|1'
'CIRC-ENST0002-E1-E2|1 ENST0005|SE-2100|1'
]
]
peptides = VariantPeptidePool({create_aa_record(*x) for x in peptides_data})
label_map = LabelSourceMapping(copy.copy(LABEL_MAP1))
order = copy.copy(SOURCE_ORDER)
order[frozenset(['sSNV', 'sINDEL'])] = max(order.values()) + 1
order.update({
'Noncoding': 6,
frozenset(['altSplice', 'Noncoding']): 7,
'circRNA': 8
})
splitter = PeptidePoolSplitter(
peptides=peptides,
order=order,
label_map=label_map
)
splitter.split(2, [], tx2gene, coding_tx)

self.assertEqual({'sSNV-sINDEL'}, set(splitter.databases.keys()))
self.assertEqual({'altSplice-Noncoding'}, set(splitter.databases.keys()))

received = {str(x.seq) for x in splitter.databases['sSNV-sINDEL'].peptides}
received = {str(x.seq) for x in splitter.databases['altSplice-Noncoding'].peptides}
expected = {'SSSSSSSR'}
self.assertEqual(expected, received)

0 comments on commit c3e0b66

Please sign in to comment.