diff --git a/src/python.rs b/src/python.rs index 6888c24..99be051 100644 --- a/src/python.rs +++ b/src/python.rs @@ -1,8 +1,6 @@ //! Python bindings for needletail // TODO: -// - Add a property to the `Record` class that returns the quality scores as a -// list of integers. // - Make the return values of `__repr__` and `__str__` show up as raw strings. // - Make `normalize_seq` and `reverse_complement` functions able to handle // `Record` objects as input. @@ -15,6 +13,7 @@ use crate::{ use pyo3::exceptions::PyValueError; use pyo3::prelude::*; +use pyo3::types::PyTuple; use pyo3::{create_exception, wrap_pyfunction}; use std::hash::{DefaultHasher, Hash, Hasher}; use std::io::Cursor; @@ -176,6 +175,57 @@ impl Record { Ok(self.qual.is_some()) } + /// Returns the numerical representation of the quality scores. + /// + /// Parameters + /// ---------- + /// base_64 : bool, default=False + /// If `True`, return the quality using the Phred+64 encoding, otherwise + /// the Phred+33 encoding will be used. + /// + /// Returns + /// ------- + /// tuple of int, optional + /// The numerical representation of the quality scores. Returns `None` + /// if quality scores are not available. + /// + /// Raises + /// ------ + /// ValueError + /// If an invalid quality score character is encountered. + #[pyo3(signature = (base_64=false))] + pub fn phred_quality_score<'py>( + &self, + py: Python<'py>, + base_64: bool, + ) -> PyResult>> { + match &self.qual { + Some(qual) => { + let offset = if base_64 { b'@' } else { b'!' }; + let scores: Result, _> = qual + .as_bytes() + .iter() + .map(|&c| { + if c < offset { + Err(PyValueError::new_err(format!( + "Invalid quality score character: '{}' (ASCII {})", + c as char, c as u8 + ))) + } else { + Ok(c - offset) + } + }) + .collect(); + + match scores { + Ok(valid_scores) => Ok(Some(PyTuple::new_bound(py, valid_scores))), + Err(e) => Err(e.into()), + } + } + None => Ok(None), + } + } + /// Normalize the sequence stored in the `seq` attribute of the object. /// /// See also @@ -365,14 +415,14 @@ pub fn normalize_seq(seq: &str, iupac: bool) -> PyResult { /// str /// The reverse complement of the input nucleotide sequence. #[pyfunction] -pub fn reverse_complement(seq: &str) -> String { +pub fn reverse_complement(seq: &str) -> PyResult { let comp: Vec = seq .as_bytes() .iter() .rev() .map(|n| complement(*n)) .collect(); - String::from_utf8_lossy(&comp).to_string() + Ok(String::from_utf8_lossy(&comp).to_string()) } #[pymodule] diff --git a/test_python.py b/test_python.py index f644e50..5e9664d 100644 --- a/test_python.py +++ b/test_python.py @@ -39,7 +39,7 @@ def test_record_normalize(self): record.normalize() self.assertEqual(record.seq, "AGCTGNNTCGA") - def test_format_record_method(self): + def test_record_format_method(self): record = Record("test", "AGCTGATCGA") self.assertTrue(record.is_fasta()) self.assertFalse(record.is_fastq()) @@ -47,6 +47,15 @@ def test_format_record_method(self): self.assertFalse(record.is_fasta()) self.assertTrue(record.is_fastq()) + def test_record_phred_quality_score_method(self): + record = Record("test", "AGCTGATCGA", "@AKKK@CATG") + self.assertEqual( + record.phred_quality_score(), (31, 32, 42, 42, 42, 31, 34, 32, 51, 38) + ) + self.assertEqual( + record.phred_quality_score(base_64=True), (0, 1, 11, 11, 11, 0, 3, 1, 20, 7) + ) + def test_record_eq(self): record1 = Record("test", "AGCTGATCGA", ";**9;;????") record2 = Record("test", "AGCTGATCGA", ";**9;;????")