From ad37bcc0531bb3c3ccc2e655ce88239397c44fd4 Mon Sep 17 00:00:00 2001 From: zhuchcn Date: Fri, 29 Jul 2022 11:43:59 -0700 Subject: [PATCH] fix (aa): enzyme lysN added --- CHANGELOG.md | 4 +++- moPepGen/aa/expasy_rules.py | 1 + test/unit/test_aa.py | 15 +++++++++++---- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9a5506b9..ef73e001 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,10 +12,12 @@ This project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.htm --- -## [0.9.2] - 2022-07-28 +## [0.9.2] - 2022-07-29 - For circRNA, each reading frame subgraph is now replicated for 3 times in order to catch all variant peptides that read through the junction site. #514 +- Enzyme lysN is added. #523 + --- ## [0.9.1] - 2022-07-27 diff --git a/moPepGen/aa/expasy_rules.py b/moPepGen/aa/expasy_rules.py index 35ded9ef..3b32c53b 100644 --- a/moPepGen/aa/expasy_rules.py +++ b/moPepGen/aa/expasy_rules.py @@ -29,6 +29,7 @@ 'hydroxylamine': r'N(?=G)', 'iodosobenzoic acid': r'W', 'lysc': r'K', + 'lysn': r'\w(?=K)', 'ntcb': r'\w(?=C)', 'pepsin ph1.3': r'((?<=[^HKR][^P])[^R](?=[FL][^P]))|' r'((?<=[^HKR][^P])[FL](?=\w[^P]))', diff --git a/test/unit/test_aa.py b/test/unit/test_aa.py index 8cbfb98a..b95325f2 100644 --- a/test/unit/test_aa.py +++ b/test/unit/test_aa.py @@ -60,7 +60,7 @@ def testcreate_unique_peptide_pool_by_length(self): class TestAminoAcidSeqRecord(unittest.TestCase): """ Test case for AminoAcidSeqRecord """ - def testinfer_ids_ensembl_case1(self): + def test_infer_ids_ensembl_case1(self): """ Test that ids are infered correctly with ENSMBLE style. """ seq = aa.AminoAcidSeqRecord( seq=Seq('GTGG'), @@ -77,7 +77,7 @@ def testinfer_ids_ensembl_case1(self): self.assertEqual(seq.transcript_id, 'ENST00000631435') self.assertEqual(seq.gene_id, 'ENSG00000282253') - def testinfer_ids_ensembl_case2(self): + def test_nfer_ids_ensembl_case2(self): """ Test that error will raise with GENCODE style. """ header = 'ENSP00000493376.2|ENST00000641515.2|ENSG00000186092.6|OTTH'+\ 'UMG00000001094.4|OTTHUMT00000003223.4|OR4F5-202|OR4F5|326' @@ -90,7 +90,7 @@ def testinfer_ids_ensembl_case2(self): with self.assertRaises(ValueError): seq.infer_ids_ensembl() - def testinfer_ids_gencode_case1(self): + def test_infer_ids_gencode_case1(self): """ Test that ids are infered correctly with ENSMBLE style. """ header = 'ENSP00000493376.2|ENST00000641515.2|ENSG00000186092.6|OTTH'+\ 'UMG00000001094.4|OTTHUMT00000003223.4|OR4F5-202|OR4F5|326' @@ -107,7 +107,7 @@ def testinfer_ids_gencode_case1(self): self.assertEqual(seq.gene_id, 'ENSG00000186092.6') self.assertEqual(seq.transcript_id, 'ENST00000641515.2') - def testinfer_ids_gencode_case2(self): + def test_infer_ids_gencode_case2(self): """ Test that error will raise with ENSEMBL style """ seq = aa.AminoAcidSeqRecord( seq=Seq('GTGG'), @@ -122,6 +122,13 @@ def testinfer_ids_gencode_case2(self): with self.assertRaises(ValueError): seq.infer_ids_gencode() + def test_enzyme_lysn(self): + """ Ensures that lysN cleaves lysine at N-terminus """ + seq = aa.AminoAcidSeqRecord('ACDEGKILMNP') + expected = {'ACDEG', 'KILMNP'} + fragments = seq.enzymatic_cleave(rule='lysn', miscleavage=0, min_mw=0, min_length=0) + received = {str(x.seq) for x in fragments} + self.assertEqual(expected, received) class TestCaseVariantPeptidePool(unittest.TestCase): """ Test cases for VariantPeptidePool """