diff --git a/htslib/sam.h b/htslib/sam.h index 1ccad8776..bfd7855a6 100644 --- a/htslib/sam.h +++ b/htslib/sam.h @@ -29,6 +29,7 @@ DEALINGS IN THE SOFTWARE. */ #include #include +#include #include "hts.h" #include "hts_endian.h" @@ -1108,12 +1109,12 @@ int bam_set_qname(bam1_t *b, const char *qname); @param in [in] pointer to the source string @param end [out] address of the pointer to the new end of the input string can be NULL - @param a_cigar [out] address of the destination uint32_t buffer + @param a_cigar [in/out] address of the destination uint32_t buffer @param a_mem [in/out] address of the allocated number of buffer elements - @return number of processed CIGAR operators; 0 if error + @return number of processed CIGAR operators; -1 on error */ HTSLIB_EXPORT -size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem); +ssize_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, size_t *a_mem); /*! @function @abstract Parse a CIGAR string into a bam1_t struct @@ -1121,10 +1122,10 @@ size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t @param end [out] address of the pointer to the new end of the input string can be NULL @param b [in/out] address of the destination bam1_t struct - @return number of processed CIGAR operators; 0 if error + @return number of processed CIGAR operators; -1 on error */ HTSLIB_EXPORT -size_t bam_parse_cigar(const char *in, char **end, bam1_t *b); +ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b); /************************* *** BAM/CRAM indexing *** diff --git a/sam.c b/sam.c index 90180be66..eb2712965 100644 --- a/sam.c +++ b/sam.c @@ -2150,8 +2150,8 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) if (*p != '*') { uint32_t *cigar = NULL; int old_l_data = b->l_data; - uint32_t n_cigar = bam_parse_cigar(p, &p, b); - if (!n_cigar || *p++ != '\t') goto err_ret; + int n_cigar = bam_parse_cigar(p, &p, b); + if (n_cigar < 1 || *p++ != '\t') goto err_ret; cigar = (uint32_t *)(b->data + old_l_data); c->n_cigar = n_cigar; @@ -2319,9 +2319,8 @@ int sam_parse1(kstring_t *s, sam_hdr_t *h, bam1_t *b) return -2; } -static uint32_t read_ncigar(const char *in) { +static uint32_t read_ncigar(const char *q) { uint32_t n_cigar = 0; - char *q = (char *)in; for (; *q && *q != '\t'; ++q) if (!isdigit_c(*q)) ++n_cigar; if (!n_cigar) { @@ -2340,16 +2339,16 @@ static uint32_t read_ncigar(const char *in) { @abstract Parse a CIGAR string into preallocated a uint32_t array @param in [in] pointer to the source string @param a_cigar [out] address of the destination uint32_t buffer - @return number of processed input characters; 0 if error + @return number of processed input characters; 0 on error */ static int parse_cigar(const char *in, uint32_t *a_cigar, uint32_t n_cigar) { int i, overflow = 0; - char *p, *q = (char *)in; + const char *p = in; for (i = 0; i < n_cigar; i++) { uint32_t len; int op; - p = q; - len = hts_str2uint(q, &q, 28, &overflow)< *a_mem) { @@ -2389,34 +2393,38 @@ size_t sam_parse_cigar(const char *in, char **end, uint32_t **a_cigar, uint32_t *a_mem = n_cigar; } else { hts_log_error("Memory allocation error"); - return 0; + return -1; } } - if (!(diff = parse_cigar(in, *a_cigar, n_cigar))) return 0; + if (!(diff = parse_cigar(in, *a_cigar, n_cigar))) return -1; if (end) *end = (char *)in+diff; return n_cigar; } -size_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { +ssize_t bam_parse_cigar(const char *in, char **end, bam1_t *b) { size_t n_cigar = 0; int diff; if (!in || !b) { hts_log_error("NULL pointer arguments"); - return 0; + return -1; } if (end) *end = (char *)in; + if (*in == '*') { + if (end) (*end)++; + return 0; + } n_cigar = read_ncigar(in); if (!n_cigar) return 0; if (possibly_expand_bam_data(b, n_cigar * sizeof(uint32_t)) < 0) { hts_log_error("Memory allocation error"); - return 0; + return -1; } - if (!(diff = parse_cigar(in, (uint32_t *)(b->data + b->l_data), n_cigar))) return 0; + if (!(diff = parse_cigar(in, (uint32_t *)(b->data + b->l_data), n_cigar))) return -1; b->l_data += (n_cigar * sizeof(uint32_t)); if (end) *end = (char *)in+diff; diff --git a/test/sam.c b/test/sam.c index af1dc34da..b6f6c0e04 100644 --- a/test/sam.c +++ b/test/sam.c @@ -33,6 +33,7 @@ DEALINGS IN THE SOFTWARE. */ #include #include #include +#include // Suppress message for faidx_fetch_nseq(), which we're intentionally testing #include "../htslib/hts_defs.h" @@ -2149,6 +2150,27 @@ static void test_bam_set1_write_and_read_back() ks_free(&ks); } +static void test_cigar_api(void) +{ + uint32_t *buf = NULL; + char *cig = "*"; + char *end; + size_t m = 0; + int n; + n = sam_parse_cigar(cig, &end, &buf, &m); + VERIFY(n == 0 && m == 0 && (end-cig) == 1, "failed to parse undefined CIGAR"); + cig = "2M3X1I10M5D"; + n = sam_parse_cigar(cig, &end, &buf, &m); + VERIFY(n == 5 && m > 0 && (end-cig) == 11, "failed to parse CIGAR string: 2M3X1I10M5D"); + n = sam_parse_cigar("722M15D187217376188323783284M67I", NULL, &buf, &m); + VERIFY(n == -1, "failed to flag CIGAR string with long op length: 722M15D187217376188323783284M67I"); + n = sam_parse_cigar("53I722MD8X", NULL, &buf, &m); + VERIFY(n == -1, "failed to flag CIGAR string with no op length: 53I722MD8X"); + +cleanup: + free(buf); +} + int main(int argc, char **argv) { int i; @@ -2186,6 +2208,7 @@ int main(int argc, char **argv) test_bam_set1_validate_cigar(); test_bam_set1_validate_size_limits(); test_bam_set1_write_and_read_back(); + test_cigar_api(); return status; }