Skip to content

Commit 9374380

Browse files
authored
Check genome size param for span=auto (#57)
* Set G=0 by default and check value in tigmint-make * Bump up version numbers to 1.2.4 * Fix typo * Remove and gitignore compiled long-to-linked-pe
1 parent 3f25f2a commit 9374380

9 files changed

+27
-18
lines changed

.gitignore

+1
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@ xml-patch-make/
2626
__pycache__
2727
.pytest_cache
2828
.vscode
29+
src/long-to-linked-pe
2930

3031
# BWA
3132
*.fa.amb

README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -149,7 +149,7 @@ tigmint-make tigmint-long draft=myassembly reads=myreads span=auto G=gsize dist=
149149

150150
+ `draft`: Name of the draft assembly, `myassembly.fa`
151151
+ `reads`: Name of the reads, `myreads.fq.gz`
152-
+ `G`: Haploid genome size of the draft assembly organism. Used to calculate `span` parameter automatically. Can be given as an integer or in scientific notation (e.g. '3e9' for human)
152+
+ `G`: Haploid genome size of the draft assembly organism. Required to calculate `span` parameter automatically. Can be given as an integer or in scientific notation (e.g. '3e9' for human) [0]
153153
+ `span=20`: Number of spanning molecules threshold. Set `span=auto` to automatically select span parameter (currently only recommended for `tigmint-long`)
154154
+ `cut=500`: Cut length for long reads (`tigmint-long` only)
155155
+ `longmap=ont`: Long read platform; `ont` for Oxford Nanopore Technologies (ONT) long reads, `pb` for PacBio long reads (`tigmint-long` only)

bin/tigmint-cut

+1-1
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,7 @@ def get_span(filename):
242242

243243
def main():
244244
parser = argparse.ArgumentParser(description="Find misassembled regions in assembly using Chromium molecule extents")
245-
parser.add_argument("--version", action="version", version="tigmint-cut 1.2.3")
245+
parser.add_argument("--version", action="version", version="tigmint-cut 1.2.4")
246246
parser.add_argument("fasta", type=str, help="Reference genome fasta file (must have FAI index generated)")
247247
parser.add_argument("bed", type=str, help="Sorted bed file of molecule extents")
248248
parser.add_argument("-o", "--fastaout", type=str, help="The output FASTA file.", required=True)

bin/tigmint-make

+19-11
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ endif
3333
ref=ref
3434

3535
# Haploid size of the reference genome, for calculating span, NG50 and NGA50
36-
G=-1
36+
G=0
3737

3838
# Minimap2 long read map parameter
3939
longmap=ont
@@ -114,7 +114,7 @@ endif
114114

115115
.DELETE_ON_ERROR:
116116
.SECONDARY:
117-
.PHONY: help version all tigmint tigmint-long arcs metrics draft_metrics tigmint_metrics arcs_metrics
117+
.PHONY: help version all tigmint tigmint-long arcs metrics draft_metrics tigmint_metrics arcs_metrics check_span_g
118118

119119
help:
120120
@echo 'Tigmint: Correct misassemblies using linked or long reads'
@@ -123,12 +123,12 @@ help:
123123
@echo 'For more information see https://bcgsc.github.io/tigmint/'
124124

125125
version:
126-
@echo "Tigmint 1.2.3"
126+
@echo "Tigmint 1.2.4"
127127
@echo "Written by Shaun Jackman @sjackman."
128128

129129
all: tigmint arcs
130130
ifneq ($(ref), ref)
131-
ifneq ($(G), -1)
131+
ifneq ($G, 0)
132132
all: metrics
133133
endif
134134
endif
@@ -196,19 +196,27 @@ $(draft).%.sortbx.bam: %.fq.gz $(draft).fa.bwt
196196
$(draft).%.cut$(cut).sortbx.bam: %.cut$(cut).fa.gz $(draft).fa
197197
$(gtime) minimap2 -y -t$t -ax map-$(longmap) --secondary=no $(draft).fa $< | samtools view -b -u -F4 | samtools sort -@$t -tBX -T$$(mktemp -u -t $@.XXXXXX) -o $@
198198

199+
# Check that G is set if span=auto
200+
check_span_g:
201+
ifeq ($(span), auto)
202+
ifeq ($G, 0)
203+
$(error Must set genome size parameter (G) to calculate span automatically)
204+
endif
205+
endif
206+
199207
# Segment long reads from gzipped fasta file, optionally calculating tigmint-long parameters.
200-
$(reads).cut$(cut).fq.gz: $(longreads)
208+
$(reads).cut$(cut).fq.gz: $(longreads) check_span_g
201209
ifeq ($(span), auto)
202210
ifeq ($(dist), auto)
203-
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$(G) -s -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
211+
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$G -s -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
204212
else
205-
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$(G) -s -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
213+
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -g$G -s -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
206214
endif
207215
else
208216
ifeq ($(dist), auto)
209-
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
217+
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) -d -o $(reads).tigmint-long.params.tsv | $(gzip) > $@
210218
else
211-
$(gtime) $(gzip) -dc $< | $(bin)/long-to-linked -l$(cut) -m$(minsize) | $(gzip) > $@
219+
$(gtime) $(gzip) -dc $(longreads) | $(bin)/long-to-linked -l$(cut) -m$(minsize) | $(gzip) > $@
212220
endif
213221
endif
214222

@@ -229,7 +237,7 @@ $(reads).tigmint-long.params.tsv: $(longreads)
229237
$(bin)/tigmint_estimate_dist.py - -n $(dist_sample) -o $@'
230238

231239
# Create molecule extents BED using cut long reads
232-
$(draft).$(reads).cut$(cut).molecule.size$(minsize).bed: $(longreads) $(draft).fa $(reads).tigmint-long.params.tsv
240+
$(draft).$(reads).cut$(cut).molecule.size$(minsize).bed: $(longreads) $(draft).fa $(reads).tigmint-long.params.tsv check_span_g
233241
ifeq ($(dist), auto)
234242
$(gtime) $(bin)/../src/long-to-linked-pe -l $(cut) -m$(minsize) -g$G -s -b $(reads).barcode-multiplicity.tsv --bx -t$t --fasta -f $(reads).tigmint-long.params.tsv $< | \
235243
minimap2 -y -t$t -x map-$(longmap) --secondary=no $(draft).fa - | \
@@ -326,7 +334,7 @@ endif
326334
%.abyss-fac.tsv: %.fa
327335
abyss-fac -G$G -t500 $< >$@
328336

329-
ifneq ($(G), -1)
337+
ifneq ($G, 0)
330338
abyss_samtobreak=abyss-samtobreak -l500 -G$G
331339
else
332340
abyss_samtobreak=abyss-samtobreak -l500

bin/tigmint_estimate_dist.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -44,7 +44,7 @@ def main():
4444
required=False)
4545
parser.add_argument("-v", "--version",
4646
action="version",
47-
version="tigmint_estimate_dist.py 1.2.3")
47+
version="tigmint_estimate_dist.py 1.2.4")
4848

4949
args = parser.parse_args()
5050

bin/tigmint_molecule.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -243,7 +243,7 @@ def parse_arguments(self):
243243
"Read a SAM/BAM file and output a TSV file. "
244244
"The SAM/BAM file must be sorted by BX tag and then by position.")
245245
parser.add_argument(
246-
'--version', action='version', version='tigmint-molecule 1.2.3')
246+
'--version', action='version', version='tigmint-molecule 1.2.4')
247247
parser.add_argument(
248248
metavar="BAM", dest="in_bam_filename",
249249
help="Input BAM file sorted by BX tag then position, - for stdin")

bin/tigmint_molecule_paf.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -102,7 +102,7 @@ def parse_arguments(self):
102102
parser = argparse.ArgumentParser(
103103
description="Group linked reads simulated from long reads into molecules. "
104104
"Read a PAF file and output a BED file.")
105-
parser.add_argument('--version', action='version', version='tigmint_molecule_paf.py 1.2.3')
105+
parser.add_argument('--version', action='version', version='tigmint_molecule_paf.py 1.2.4')
106106
parser.add_argument(metavar="PAF", dest="PAF", help="Input PAF file, - for stdin")
107107
parser.add_argument("-o", "--output", dest="out_molecules_filename",
108108
help="Output molecule BED file [stdout]",

setup.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
setuptools.setup(
77
name="tigmint",
8-
version="1.2.3",
8+
version="1.2.4",
99
author="Shaun Jackman",
1010
author_email="[email protected]",
1111
description="Correct misassemblies using linked or long reads",

src/long-to-linked-pe.cpp

+1-1
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
#include <vector>
2222

2323
const static std::string PROGNAME = "long-to-linked-pe";
24-
const static std::string VERSION = "v1.2.3";
24+
const static std::string VERSION = "v1.2.4";
2525
const static size_t MAX_THREADS = 6;
2626

2727
static void

0 commit comments

Comments
 (0)