From 41035a020608b6b5d76a103ec91484460e1632a3 Mon Sep 17 00:00:00 2001 From: Valeriu Ohan Date: Wed, 25 Nov 2020 16:07:33 +0000 Subject: [PATCH 1/3] Adjust parser for undefined CIGAR. --- htslib/sam.h | 8 ++++---- sam.c | 24 ++++++++++++++++-------- test/sam.c | 22 ++++++++++++++++++++++ 3 files changed, 42 insertions(+), 12 deletions(-) diff --git a/htslib/sam.h b/htslib/sam.h index 1ccad8776..2eba99395 100644 --- a/htslib/sam.h +++ b/htslib/sam.h @@ -1110,10 +1110,10 @@ int bam_set_qname(bam1_t *b, const char *qname); can be NULL @param a_cigar [out] address of the destination uint32_t buffer @param a_mem [in/out] address of the allocated number of buffer elements - @return number of processed CIGAR operators; 0 if error + @return number of processed CIGAR operators; -1 on error */ HTSLIB_EXPORT -size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem); +ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem); /*! @function @abstract Parse a CIGAR string into a bam1_t struct @@ -1121,10 +1121,10 @@ size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t @param end [out] address of the pointer to the new end of the input string can be NULL @param b [in/out] address of the destination bam1_t struct - @return number of processed CIGAR operators; 0 if error + @return number of processed CIGAR operators; -1 on error */ HTSLIB_EXPORT -size_t bam_parse_cigar(const char *in, char **end, bam1_t *b); +ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b); /************************* *** BAM/CRAM indexing *** diff --git a/sam.c b/sam.c index 90180be66..cae3c60b9 100644 --- a/sam.c +++ b/sam.c @@ -2150,8 +2150,8 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) if (*p != '*') { uint32_t *cigar = NULL; int old_l_data = b->l_data; - uint32_t n_cigar = bam_parse_cigar(p, &p, b); - if (!n_cigar || *p++ != '\t') goto err_ret; + int32_t n_cigar = bam_parse_cigar(p, &p, b); + if (n_cigar < 1 || *p++ != '\t') goto err_ret; cigar = (uint32_t *)(b->data + old_l_data); c->n_cigar = n_cigar; @@ -2370,16 +2370,20 @@ static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { return q-in; } -size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem) { +ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem) { size_t n_cigar = 0; int diff; if (!in || !a_cigar || !a_mem) { hts_log_error("NULL pointer arguments"); - return 0; + return -1; } if (end) *end = (char *)in; + if (*in == '*') { + if (end) (*end)++; + return 0; + } n_cigar = read_ncigar(in); if (!n_cigar) return 0; if (n_cigar > *a_mem) { @@ -2389,7 +2393,7 @@ size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem = n_cigar; } else { hts_log_error("Memory allocation error"); - return 0; + return -1; } } @@ -2399,21 +2403,25 @@ size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t return n_cigar; } -size_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { +ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { size_t n_cigar = 0; int diff; if (!in || !b) { hts_log_error("NULL pointer arguments"); - return 0; + return -1; } if (end) *end = (char *)in; + if (*in == '*') { + if (end) (*end)++; + return 0; + } n_cigar = read_ncigar(in); if (!n_cigar) return 0; if (possibly_expand_bam_data(b, n_cigar * sizeof(uint32_t)) < 0) { hts_log_error("Memory allocation error"); - return 0; + return -1; } if (!(diff = parse_cigar(in, (uint32_t *)(b->data + b->l_data), n_cigar))) return 0; diff --git a/test/sam.c b/test/sam.c index af1dc34da..65a03df7d 100644 --- a/test/sam.c +++ b/test/sam.c @@ -2149,6 +2149,27 @@ static void test_bam_set1_write_and_read_back() ks_free(&ks); } +static void test_cigar_api(void) +{ + uint32_t *buf = NULL; + char *cig = "*"; + char *end; + uint32_t m = 0; + int32_t n; + n = sam_parse_cigar(cig, &end, &buf, &m); + VERIFY(n == 0 && m == 0 && (end-cig) == 1, "failed to parse undefined CIGAR"); + cig = "2M3X1I10M5D"; + n = sam_parse_cigar(cig, &end, &buf, &m); + VERIFY(n == 5 && m > 0 && (end-cig) == 11, "failed to parse CIGAR string: 2M3X1I10M5D"); + n = sam_parse_cigar("722M15D187217376188323783284M67I", NULL, &buf, &m); + VERIFY(n == 0, "failed to flag CIGAR string with long op length: 722M15D187217376188323783284M67I"); + n = sam_parse_cigar("53I722MD8X", NULL, &buf, &m); + VERIFY(n == 0, "failed to flag CIGAR string with no op length: 53I722MD8X"); + +cleanup: + free(buf); +} + int main(int argc, char **argv) { int i; @@ -2186,6 +2207,7 @@ int main(int argc, char **argv) test_bam_set1_validate_cigar(); test_bam_set1_validate_size_limits(); test_bam_set1_write_and_read_back(); + test_cigar_api(); return status; } From 523facf15d901a72e6fdfb0d510a94e41b74af1e Mon Sep 17 00:00:00 2001 From: John Marshall Date: Thu, 26 Nov 2020 10:26:03 +0000 Subject: [PATCH 2/3] Recode to use const char pointers (cherry picked from commit a12d4d447cd3ca089db05dcc35144818339a4911) --- sam.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/sam.c b/sam.c index cae3c60b9..dd7e84441 100644 --- a/sam.c +++ b/sam.c @@ -2319,9 +2319,8 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) return -2; } -static uint32_t read_ncigar(const char *in) { +static uint32_t read_ncigar(const char *q) { uint32_t n_cigar = 0; - char *q = (char *)in; for (; *q && *q != '\t'; ++q) if (!isdigit_c(*q)) ++n_cigar; if (!n_cigar) { @@ -2344,12 +2343,12 @@ static uint32_t read_ncigar(const char *in) { */ static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { int i, overflow = 0; - char *p, *q = (char *)in; + const char *p = in; for (i = 0; i < n_cigar; i++) { uint32_t len; int op; - p = q; - len = hts_str2uint(q, &q, 28, &overflow)< Date: Thu, 26 Nov 2020 13:05:22 +0000 Subject: [PATCH 3/3] Address code review issues. --- htslib/sam.h | 5 +++-- sam.c | 10 +++++----- test/sam.c | 9 +++++---- 3 files changed, 13 insertions(+), 11 deletions(-) diff --git a/htslib/sam.h b/htslib/sam.h index 2eba99395..bfd7855a6 100644 --- a/htslib/sam.h +++ b/htslib/sam.h @@ -29,6 +29,7 @@ DEALINGS IN THE SOFTWARE. */ #include #include +#include #include "hts.h" #include "hts_endian.h" @@ -1108,12 +1109,12 @@ int bam_set_qname(bam1_t *b, const char *qname); @param in [in] pointer to the source string @param end [out] address of the pointer to the new end of the input string can be NULL - @param a_cigar [out] address of the destination uint32_t buffer + @param a_cigar [in/out] address of the destination uint32_t buffer @param a_mem [in/out] address of the allocated number of buffer elements @return number of processed CIGAR operators; -1 on error */ HTSLIB_EXPORT -ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem); +ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, size_t *a_mem); /*! @function @abstract Parse a CIGAR string into a bam1_t struct diff --git a/sam.c b/sam.c index dd7e84441..eb2712965 100644 --- a/sam.c +++ b/sam.c @@ -2150,7 +2150,7 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) if (*p != '*') { uint32_t *cigar = NULL; int old_l_data = b->l_data; - int32_t n_cigar = bam_parse_cigar(p, &p, b); + int n_cigar = bam_parse_cigar(p, &p, b); if (n_cigar < 1 || *p++ != '\t') goto err_ret; cigar = (uint32_t *)(b->data + old_l_data); c->n_cigar = n_cigar; @@ -2339,7 +2339,7 @@ static uint32_t read_ncigar(const char *q) { @abstract Parse a CIGAR string into preallocated a uint32_t array @param in [in] pointer to the source string @param a_cigar [out] address of the destination uint32_t buffer - @return number of processed input characters; 0 if error + @return number of processed input characters; 0 on error */ static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { int i, overflow = 0; @@ -2370,7 +2370,7 @@ static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { return p-in; } -ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem) { +ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, size_t *a_mem) { size_t n_cigar = 0; int diff; @@ -2397,7 +2397,7 @@ ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t } } - if (!(diff = parse_cigar(in, *a_cigar, n_cigar))) return 0; + if (!(diff = parse_cigar(in, *a_cigar, n_cigar))) return -1; if (end) *end = (char *)in+diff; return n_cigar; @@ -2424,7 +2424,7 @@ ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { return -1; } - if (!(diff = parse_cigar(in, (uint32_t *)(b->data + b->l_data), n_cigar))) return 0; + if (!(diff = parse_cigar(in, (uint32_t *)(b->data + b->l_data), n_cigar))) return -1; b->l_data += (n_cigar * sizeof(uint32_t)); if (end) *end = (char *)in+diff; diff --git a/test/sam.c b/test/sam.c index 65a03df7d..b6f6c0e04 100644 --- a/test/sam.c +++ b/test/sam.c @@ -33,6 +33,7 @@ DEALINGS IN THE SOFTWARE. */ #include #include #include +#include // Suppress message for faidx_fetch_nseq(), which we're intentionally testing #include "../htslib/hts_defs.h" @@ -2154,17 +2155,17 @@ static void test_cigar_api(void) uint32_t *buf = NULL; char *cig = "*"; char *end; - uint32_t m = 0; - int32_t n; + size_t m = 0; + int n; n = sam_parse_cigar(cig, &end, &buf, &m); VERIFY(n == 0 && m == 0 && (end-cig) == 1, "failed to parse undefined CIGAR"); cig = "2M3X1I10M5D"; n = sam_parse_cigar(cig, &end, &buf, &m); VERIFY(n == 5 && m > 0 && (end-cig) == 11, "failed to parse CIGAR string: 2M3X1I10M5D"); n = sam_parse_cigar("722M15D187217376188323783284M67I", NULL, &buf, &m); - VERIFY(n == 0, "failed to flag CIGAR string with long op length: 722M15D187217376188323783284M67I"); + VERIFY(n == -1, "failed to flag CIGAR string with long op length: 722M15D187217376188323783284M67I"); n = sam_parse_cigar("53I722MD8X", NULL, &buf, &m); - VERIFY(n == 0, "failed to flag CIGAR string with no op length: 53I722MD8X"); + VERIFY(n == -1, "failed to flag CIGAR string with no op length: 53I722MD8X"); cleanup: free(buf);