diff --git a/README.md b/README.md index 634c1d9..d572516 100755 --- a/README.md +++ b/README.md @@ -45,7 +45,7 @@ Before updating, back up your `config-metawrap` file so you do not have to re-do conda update -y -c ursky metawrap-mg # or for a specific version: -conda install -y -c ursky metawrap-mg=1.2.4 +conda install -y -c ursky metawrap-mg=1.3.0 ``` If you are using the (recommended) manual instalation of metaWRAP, simply run `git pull` inside the metaWRAP directory. @@ -92,7 +92,7 @@ conda install biopython blas=2.5 blast=2.6.0 bmtagger bowtie2 bwa checkm-genome conda install -y -c ursky metawrap-mg # Note: may take a while - # To fix the CONCOCT endless warning messages in metaWRAP=1.2, run + # To fix the CONCOCT endless warning messages in metaWRAP=1.2+, run conda install -y blas=2.5=mkl ``` @@ -111,7 +111,7 @@ conda config --add channels ursky conda install -y -c ursky metawrap-mg # Note: may take a while - # To fix the CONCOCT endless warning messages in metaWRAP=1.2, run + # To fix the CONCOCT endless warning messages in metaWRAP=1.2+, run conda install -y blas=2.5=mkl ``` diff --git a/bin/config-metawrap b/bin/config-metawrap index 417e106..f80fba8 100755 --- a/bin/config-metawrap +++ b/bin/config-metawrap @@ -12,5 +12,5 @@ KRAKEN_DB=/scratch/gu/MY_KRAKEN_DB BMTAGGER_DB=/scratch/gu/BMTAGGER_DB # paths to BLAST databases -BLASTDB=/localscratch/gu/NCBI_nt -TAXDUMP=/localscratch/gu/NCBI_tax +BLASTDB=~/PGScratch/testing_taxonomy/NCBI_nt_v4 +TAXDUMP=~/PGScratch/testing_taxonomy/NCBI_tax diff --git a/bin/metawrap b/bin/metawrap index b1d1683..0f6b232 100755 --- a/bin/metawrap +++ b/bin/metawrap @@ -3,7 +3,7 @@ # Master metaWRAP script that calls on individual modules/pipelines ############################################################################################################################################################## -VERSION="1.2.4" +VERSION="1.3.0" help_message () { echo"" diff --git a/bin/metawrap-modules/classify_bins.sh b/bin/metawrap-modules/classify_bins.sh index d6c0f0f..0971641 100755 --- a/bin/metawrap-modules/classify_bins.sh +++ b/bin/metawrap-modules/classify_bins.sh @@ -113,14 +113,17 @@ for f in $(ls $bin_folder); do cat ${bin_folder}/${f} >> ${out}/all_contigs.fa; if [[ ! -s ${out}/all_contigs.fa ]]; then error "something went wrong with joining files in $bin_folder into ${out}/all_contigs.fa"; fi +if [[ -s ${out}/megablast_out.raw.tab ]]; then + comm "megablast alignment already done. Skipping..." +else + comm "aligning ${out}/all_contigs.fa to ${BLASTDB} database with MEGABLAST. This is the longest step - please be patient. You may look at the classification progress in ${out}/megablast_out.raw.tab" + blastn -task megablast -num_threads $threads\ + -db ${BLASTDB}/nt\ + -outfmt '6 qseqid qstart qend qlen sseqid staxids sstart send bitscore evalue nident length'\ + -query ${out}/all_contigs.fa > ${out}/megablast_out.raw.tab -comm "aligning ${out}/all_contigs.fa to ${BLASTDB} database with MEGABLAST. This is the longest step - please be patient. You may look at the classification progress in ${out}/megablast_out.raw.tab" -blastn -task megablast -num_threads $threads\ - -db ${BLASTDB}/nt\ - -outfmt '6 qseqid qstart qend qlen sseqid staxids sstart send bitscore evalue nident length'\ - -query ${out}/all_contigs.fa > ${out}/megablast_out.raw.tab - -if [[ $? -ne 0 ]]; then error "Failed to run megablast. Exiting..."; fi + if [[ $? -ne 0 ]]; then error "Failed to run megablast. Exiting..."; fi +fi comm "removing unnecessary lines that lead to bad tax IDs (without a proper rank)" diff --git a/bin/metawrap-scripts/prune_blast_hits.py b/bin/metawrap-scripts/prune_blast_hits.py index ba02b49..47b4764 100755 --- a/bin/metawrap-scripts/prune_blast_hits.py +++ b/bin/metawrap-scripts/prune_blast_hits.py @@ -7,7 +7,7 @@ cut=line.split('\t') ranks[cut[0]]=cut[4] -exclude=["no rank", "subspecies", "species group", "varietas", "forma", "subfamily", "cohort"] +include=set(["species", "genus", "family", "order", "class", "phylum", "superkingdom"]) #prune blast output to remove mappings without a rank and remove taxid columnn for line in open(sys.argv[2]): @@ -19,7 +19,7 @@ ct=0 for id in ids.split(';'): if id not in ranks: continue - if ranks[id] in exclude: continue + if ranks[id] not in include: continue if ct>0: continue cut[5]=id ct+=1 diff --git a/conda_pkg/meta.yaml b/conda_pkg/meta.yaml index 5e16f9d..4faafe3 100644 --- a/conda_pkg/meta.yaml +++ b/conda_pkg/meta.yaml @@ -1,6 +1,6 @@ package: name: metawrap-mg - version: "1.2.4" + version: "1.3.0" source: git_url: https://github.com/bxlab/metaWRAP.git diff --git a/installation/database_installation.md b/installation/database_installation.md index 498458b..6928ab3 100644 --- a/installation/database_installation.md +++ b/installation/database_installation.md @@ -33,6 +33,8 @@ cd NCBI_nt wget "ftp://ftp.ncbi.nlm.nih.gov/blast/db/nt.*.tar.gz" for a in nt.*.tar.gz; do tar xzf $a; done ``` +Note: if you are using a more recent blast verions (beyond v2.6) you will need a the newer database format: `wget "ftp://ftp.ncbi.nlm.nih.gov/blast/db/v4/nt_v4.*.tar.gz"` + Do not forget to set the BLASTDB variable in the config-metawrap file! ``` bash BLASTDB=/your/location/of/database/NCBI_nt