diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml index f59483a..53a27a2 100644 --- a/.github/workflows/main.yml +++ b/.github/workflows/main.yml @@ -1,5 +1,5 @@ -name: outbreaker +name: spora # Controls when the workflow will run on: @@ -12,7 +12,7 @@ on: jobs: build: - name: Outbreaker test on ${{ matrix.os }} + name: spora test on ${{ matrix.os }} runs-on: ${{ matrix.os }} strategy: matrix: @@ -22,20 +22,20 @@ jobs: - uses: conda-incubator/setup-miniconda@v2 with: environment-file: environments/environment.yml - activate-environment: ncov_outbreaker + activate-environment: ncov_spora channels: conda-forge,bioconda,defaults,r mamba-version: "*" - - name: Install outbreaker + - name: Install spora run: pip install . - - name: Check outbreaker version + - name: Check spora version shell: bash -l {0} - run: outbreaker -v - - name: Run outbreaker test via CLI + run: spora -v + - name: Run spora test via CLI shell: bash -l {0} - run: outbreaker -f data/tests/focal_seqs.fa -b data/tests/background_seqs.fa -r data/reference/ncov_reference.gb -o outbreaker_test -p test --snps-only --report --rename --names-csv data/tests/names.csv - - name: Run outbreaker test via config + run: spora -f data/tests/focal_seqs.fa -b data/tests/background_seqs.fa -r data/reference/ncov_reference.gb -o outbreaker_test -p test --snps-only --report --rename --names-csv data/tests/names.csv + - name: Run spora test via config shell: bash -l {0} - run: outbreaker -c data/test_config.yaml - - name: Run pytest for outbreaker + run: spora -c data/test_config.yaml + - name: Run pytest for spora shell: bash -l {0} run: pytest tests/ diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml new file mode 100644 index 0000000..56e8a49 --- /dev/null +++ b/.github/workflows/publish.yml @@ -0,0 +1,31 @@ +name: Publish spora to PyPI on release tag + +on: + release: + types: + - "published" +jobs: + pypi_publish: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - name: Set up Python 3.8 + uses: actions/setup-python@v4.0.0 + with: + python-version: 3.8 + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install setuptools wheel twine + pip install pandas>=1.1.5 numpy>=1.19 biopython>=1.79 pytest>=7.1.2 snakemake>=7.0.0 pypandoc>=1.8 + - name: build package + run: | + pip install pandoc>=2.2 + python setup.py sdist bdist_wheel + - name: publish to PyPI + # only create relases from tagged commits to master: + if: startsWith(github.ref, 'refs/tags') + uses: pypa/gh-action-pypi-publish@v1.5.0 + with: + # if no "user:" specified use pypi token instead of username/password + password: ${{ secrets.PUBLISH_PYPI_SPORA }} # use secret from github secrets diff --git a/CHANGELOG.md b/CHANGELOG.md index 7307180..4243adb 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,4 +1,4 @@ -# outbreaker Changelog +# spora Changelog ## Version 0.1.0, 06-10-21 - Initial workflow @@ -66,3 +66,9 @@ - Above fix changes fixes the error in the SNP distance plot in the HTML report when rename is used but no names CSV is supplied - Addition of pytests in the CI/CD workflow + ## Version 0.7.0, 15-06-22 + + - outbreaker is renamed to spora to be compatible with PyPi + - version change to dplyr dependency for conda env + - Addition of CD GA workflow to pudblish to PyPi on release + diff --git a/README.md b/README.md index 37ae574..3984312 100644 --- a/README.md +++ b/README.md @@ -1,24 +1,24 @@ -# outbreaker +# spora: Streamlined Phylogenomic Outbreak Report Analysis -![example workflow](https://github.com/matt-sd-watson/outbreaker/actions/workflows/main.yml/badge.svg) +![example workflow](https://github.com/matt-sd-watson/spora/actions/workflows/main.yml/badge.svg) snakemake and Python integrated workflow for intermediate file generation for COVID outbreak analysis ## Installation ``` -git clone https://github.com/matt-sd-watson/outbreaker.git -conda env create -f ncov_outbreaker/environments/environment.yml -conda activate ncov_outbreaker -cd outbreaker +git clone https://github.com/matt-sd-watson/spora.git +conda env create -f ncov_spora/environments/environment.yml +conda activate ncov_spora +cd spora pip install . ``` ## Updating ``` -conda activate ncov_outbreaker -cd ~/outbreaker +conda activate ncov_spora +cd ~/spora git checkout main git pull pip install . @@ -27,11 +27,11 @@ pip install . ## Usage ``` usage: - outbreaker -c + spora -c OR - outbreaker --focal_list ... + spora --focal_list ... -Outbreaker: Python and snakemake outbreak workflow for COVID-19 +spora: Streamlined Phylogenomic Outbreak Report Analysis optional arguments: -h, --help Show the help output and exit. @@ -42,16 +42,15 @@ optional arguments: -b BACKGROUND_SEQS, --background-sequences BACKGROUND_SEQS Optional input .txt list or multi-FASTA background samples to add to analysis -m MASTER_FASTA, --master-fasta MASTER_FASTA - Master FASTA of genomic sequences to select from. Required if either --focal-sequences or --background-sequences are not supplied in - FASTA format + Master FASTA of genomic sequences to select from. Required if either --focal-sequences or --background-sequences are not supplied in FASTA format -o OUTDIR, --output-directory OUTDIR - Path to the desired output directory. If none is provided, a new folder named outbreaker will be created in the current directory + Path to the desired output directory. If none is provided, a new folder named spora will be created in the current directory -r REFERENCE, --reference REFERENCE .gb file containing the desired COVID-19 reference sequence. Required -p PREFIX, --prefix PREFIX Prefix string to label all output files. Default: outbreak -t NTHREADS, --nthreads NTHREADS - Number of threads to use for processing. Default: 4 + Number of threads to use for processing. Default: 2 -s, --snps-only Generate a snps-only FASTA from the input FASTA. Default: False -rn, --rename Rename the FASTA headers to be compatible with NML standards. Default: False -nc NAMES_CSV, --names-csv NAMES_CSV @@ -63,16 +62,16 @@ optional arguments: Integer for the minimum genome completeness percentage for filtering. Default: 90 -gl GENOME_LENGTH, --genome-length GENOME_LENGTH Integer for the minimum genome length for filtering. Default: 29500 - -rp, --report Generate a summary output report for the outbreaker run. Default: Not enabled - -v, --version Show the current outbreaker version then exit. + -rp, --report Generate a summary output report for the spora run. Default: Not enabled + -v, --version Show the current spora version then exit. ``` ## Documentation -More detailed documentation for outbreaker usage and functionality can be found [here](docs/0-OVERVIEW.md) +More detailed documentation for spora usage and functionality can be found [here](docs/0-OVERVIEW.md) ## Acknowledgments -Inspiration for code structure and design for outbreaker was inspired by [pangolin](https://github.com/cov-lineages/pangolin) and [civet](https://github.com/artic-network/civet), and minor code blocks were adopted from these software. +Inspiration for code structure and design for spora was inspired by [pangolin](https://github.com/cov-lineages/pangolin) and [civet](https://github.com/artic-network/civet), and minor code blocks were adopted from these software. The **Background** section in the documentation describing outbreak definitions was written by Mark Horsman. diff --git a/data/test_config.yaml b/data/test_config.yaml index 36fb0f0..96ebea2 100644 --- a/data/test_config.yaml +++ b/data/test_config.yaml @@ -4,7 +4,7 @@ background_seqs: tests/background_seqs.fa reference: reference/ncov_reference.gb -outdir: outbreaker_test_config +outdir: spora_test_config snps_only: True diff --git a/docs/0-OVERVIEW.md b/docs/0-OVERVIEW.md index 0a5c9a7..5e5e65f 100644 --- a/docs/0-OVERVIEW.md +++ b/docs/0-OVERVIEW.md @@ -1,8 +1,8 @@ -# outbreaker +# spora ## Overview -outbreaker is a workflow written in snakemake and Python that aims to facilitate rapid generation of intermediate input files that are required for outbreak analysis for COVID-19 at PHO. The workflow is designed to be flexible with command line inputs, providing users with options that can be toggled depending on the nature of the outbreak request and the input files required for downstream outbreak analysis tools. -At its core, outbreaker is designed to accept only a small number of mandatory inputs from the user, and will use a standard set of bioinformatics tools to produce a number of output files such as alignments, trees, SNP matrices, etc., that are often the required inputs for downstream outbreak tools such as [ggtree](https://github.com/YuLab-SMU/ggtree) and/or [civet/civet3](https://github.com/snake-flu/civet3). +spora is a workflow written in snakemake and Python that aims to facilitate rapid generation of intermediate input files that are required for outbreak analysis for COVID-19 at PHO. The workflow is designed to be flexible with command line inputs, providing users with options that can be toggled depending on the nature of the outbreak request and the input files required for downstream outbreak analysis tools. +At its core, spora is designed to accept only a small number of mandatory inputs from the user, and will use a standard set of bioinformatics tools to produce a number of output files such as alignments, trees, SNP matrices, etc., that are often the required inputs for downstream outbreak tools such as [ggtree](https://github.com/YuLab-SMU/ggtree) and/or [civet/civet3](https://github.com/snake-flu/civet3). ## Background @@ -13,7 +13,7 @@ At the same time, WGS data from COVID-19 cases known to not be involved in the s ## Workflow

- +

diff --git a/docs/1-INSTALLATION.md b/docs/1-INSTALLATION.md index a30cb8f..4fd57cf 100644 --- a/docs/1-INSTALLATION.md +++ b/docs/1-INSTALLATION.md @@ -8,13 +8,13 @@ The following tools/packages are required: \ ## Basic installation from source -The basic installation instructions for outbreaker are as follows: +The basic installation instructions for spora are as follows: ``` -git clone https://github.com/matt-sd-watson/outbreaker.git -conda env create -f outbreaker/environments/environment.yml -conda activate ncov_outbreaker -cd outbreaker +git clone https://github.com/matt-sd-watson/spora.git +conda env create -f spora/environments/environment.yml +conda activate ncov_spora +cd spora pip install . ``` @@ -22,20 +22,20 @@ pip install . Test that the installation was successful using the following commands: ``` -outbreaker +spora #OR -outbreaker --help +spora --help ``` which should result in the following output: ``` usage: - outbreaker -c + spora -c OR - outbreaker --focal_list ... + spora --focal_list ... -Outbreaker: Python and snakemake outbreak workflow for COVID-19 +spora: Python and snakemake outbreak workflow for COVID-19 optional arguments: -h, --help Show the help output and exit. @@ -49,7 +49,7 @@ optional arguments: Master FASTA of genomic sequences to select from. Required if either --focal-sequences or --background-sequences are not supplied in FASTA format -o OUTDIR, --output-directory OUTDIR - Path to the desired output directory. If none is provided, a new folder named outbreaker will be created in the current directory + Path to the desired output directory. If none is provided, a new folder named spora will be created in the current directory -r REFERENCE, --reference REFERENCE .gb file containing the desired COVID-19 reference sequence. Required -p PREFIX, --prefix PREFIX @@ -67,8 +67,8 @@ optional arguments: Integer for the minimum genome completeness percentage for filtering. Default: 90 -gl GENOME_LENGTH, --genome-length GENOME_LENGTH Integer for the minimum genome length for filtering. Default: 29500 - -rp, --report Generate a summary output report for the outbreaker run. Default: Not enabled - -v, --version Show the current outbreaker version then exit. + -rp, --report Generate a summary output report for the spora run. Default: Not enabled + -v, --version Show the current spora version then exit. ``` diff --git a/docs/2-INPUTS.md b/docs/2-INPUTS.md index 31e7bf7..2b4f4dd 100644 --- a/docs/2-INPUTS.md +++ b/docs/2-INPUTS.md @@ -1,6 +1,6 @@ # Inputs -outbreaker accepts two modes of arguments that the user may pass: \ +spora accepts two modes of arguments that the user may pass: \ • arguments listed in a config.yaml file as serialized key-value pairs \ • Individual CLI arguments passed through Python argparse syntax @@ -8,8 +8,8 @@ These two modes are mutually exclusive, meaning that if passing arguments throug ## Mandatory input formats -Of the arguments passed to outbreaker, the following are required (outbreaker will throw an error if they are not passed in either mode): \ - • ```--focal_sequences```: The collection of target sequences for evaluation by outbreaker (i.e. sequences of interest to the user). These may be passed as either sample names in a .txt file, and outbreaker will parse a master FASTA to retrieve them, or directly as a multi-FASTA file. If passed as a list of names, the format should be as follows: +Of the arguments passed to spora, the following are required (spora will throw an error if they are not passed in either mode): \ + • ```--focal_sequences```: The collection of target sequences for evaluation by spora (i.e. sequences of interest to the user). These may be passed as either sample names in a .txt file, and spora will parse a master FASTA to retrieve them, or directly as a multi-FASTA file. If passed as a list of names, the format should be as follows: ``` head example_focal_list.txt seq1 @@ -20,20 +20,20 @@ seq5 ``` where each line can be replaced with the specific FASTA sample header. Note that the > portion of the FASTA header should NOT be included in the list of names. \ • ```--reference```: The .gb file used for the alignment step with MAFFT. An example of a compatible COVID-19 reference file can be found in /data/reference/, named **ncov_reference.gb** \ - • ```--master_fasta```: The master FASTA file containing all PHO sequences, from which outbreaker will subset based on input focal and (optional) background lists. + • ```--master_fasta```: The master FASTA file containing all PHO sequences, from which spora will subset based on input focal and (optional) background lists. -Note: that the master_fasta input is required ONLY if either focal_sequences or background_sequences are passed as sample name lists (.txt files). If both are passed as multi-FASTA files (files with an extension of .fa or .fasta), then outbreaker will not require this file to execute. See below for background sequences, as the same formats apply to that input. +Note: that the master_fasta input is required ONLY if either focal_sequences or background_sequences are passed as sample name lists (.txt files). If both are passed as multi-FASTA files (files with an extension of .fa or .fasta), then spora will not require this file to execute. See below for background sequences, as the same formats apply to that input. ## Optional input formats -The following inputs are purely optional, but may augment the types of analysis that can be conducted using outbreaker: \ +The following inputs are purely optional, but may augment the types of analysis that can be conducted using spora: \ • ```--background_sequences```: The desired collection of context sequences that the user can use to analyze the focal sequences. The format of this input should follow the same rules as focal_sequences (above). \ ## Sample head renaming It is common to rename a sample COVID-19 sequence with a different alias for privacy purposes, especially if the outbreak analysis is to be shared with external collaborators. \ -outbreaker is designed to facilitate the renaming of FASTA headers to accommodate privacy guidelines and/or to use different label aliases for the outbreak. This feature can be toggled on using ```--rename```. There are two different renaming possibilities for user when ```--rename``` is enabled: \ - • **Option 1**: outbreaker will use the run prefix supplied at runtime to create new alias for each sample. In an example, for a run with 10 samples with run prefix "apartment_can", The new sample names will range from apartment_can_1 to apartment_can_10. A CSV matching the original and newly generated names will be added to the output directory. \ - • **Option 2**: A CSV file of FASTA labels can be supplied using --names_csv. This allows for custom labels for specific samples. Note that not all samples need to have a new name in this CSV. If a sample does not have a coresponding new name, it is left as is as of outbreaker v0.6.4. +spora is designed to facilitate the renaming of FASTA headers to accommodate privacy guidelines and/or to use different label aliases for the outbreak. This feature can be toggled on using ```--rename```. There are two different renaming possibilities for user when ```--rename``` is enabled: \ + • **Option 1**: spora will use the run prefix supplied at runtime to create new alias for each sample. In an example, for a run with 10 samples with run prefix "apartment_can", The new sample names will range from apartment_can_1 to apartment_can_10. A CSV matching the original and newly generated names will be added to the output directory. \ + • **Option 2**: A CSV file of FASTA labels can be supplied using --names_csv. This allows for custom labels for specific samples. Note that not all samples need to have a new name in this CSV. If a sample does not have a coresponding new name, it is left as is as of spora v0.6.4. The format of this CSV should be as follows: ``` original_name new_name @@ -41,17 +41,17 @@ PHLON21-SARS29115 sequence_1 PHLON21-SARS15665 sequence_2 ``` -This table will allow outbreaker to use fastafurious to rename the above PHLON sequences with sequence_# headers in all downstream input files generated by the workflow. \ +This table will allow spora to use fastafurious to rename the above PHLON sequences with sequence_# headers in all downstream input files generated by the workflow. \ If ```--names_csv``` is supplied, the CSV headers must have original_name for the current/original header name, and new_name for the target/output name to run properly. ## Optional argument descriptions -The following arguments are optional for outbreaker, but may improve and augment the types of analysis and generated files that can be produced from a specific outbreaker run: \ - • ```--output-directory```: If no output directory is specified, outbreaker will attempt to make a new folder named outbreaker in the current directory where the workflow is executed. Furthermore, if the user specifies an output directory that doesn’t yet exist, outbreaker will try to create this path. Therefore, it is important that the user have adequate permissions for the directories that outbreaker will try to access. \ - • ```--prefix```: The prefix denotes a string that will tag each of the output files for a specific outbreaker run. The prefix should be descriptive of the type of analysis being done, or the internal PHO code for the specific outbreak request. If no prefix is supplied by the user, the default is to create each output file with outbreak as the prefix. \ - • ```--filter```: If enabled, the user can also set --genome-completeness and --genome-length to filter out any sequences that do not meet the required thresholds. If --filter is enabled by the other options are not set, then outbreaker will use as default filtering settings genome completeness of 90% as a genome length of 29500. By default, filtering is not enabled. \ - • ```--report```: If enabled, outbreaker will generate a summary report that contains high-level information about the outbreak run and basic analyses (see below). By default, the report is not generated. \ - • ```--snps-only```: By default, outbreaker will conduct routine bioinformatics analyses of the input sequences based on the entire genome. Sometimes, it is beneficial to have phylogenetic analysis conducted using just the variable positions for samples relative to a genome (i.e. consider only the SNP locations for the inputs). if this option is enabled, outbreaker will also create a SNPs-only FASTA file and associated phylogenetic tree in addition to the tree using the entire genome. +The following arguments are optional for spora, but may improve and augment the types of analysis and generated files that can be produced from a specific spora run: \ + • ```--output-directory```: If no output directory is specified, spora will attempt to make a new folder named spora in the current directory where the workflow is executed. Furthermore, if the user specifies an output directory that doesn’t yet exist, spora will try to create this path. Therefore, it is important that the user have adequate permissions for the directories that spora will try to access. \ + • ```--prefix```: The prefix denotes a string that will tag each of the output files for a specific spora run. The prefix should be descriptive of the type of analysis being done, or the internal PHO code for the specific outbreak request. If no prefix is supplied by the user, the default is to create each output file with outbreak as the prefix. \ + • ```--filter```: If enabled, the user can also set --genome-completeness and --genome-length to filter out any sequences that do not meet the required thresholds. If --filter is enabled by the other options are not set, then spora will use as default filtering settings genome completeness of 90% as a genome length of 29500. By default, filtering is not enabled. \ + • ```--report```: If enabled, spora will generate a summary report that contains high-level information about the outbreak run and basic analyses (see below). By default, the report is not generated. \ + • ```--snps-only```: By default, spora will conduct routine bioinformatics analyses of the input sequences based on the entire genome. Sometimes, it is beneficial to have phylogenetic analysis conducted using just the variable positions for samples relative to a genome (i.e. consider only the SNP locations for the inputs). if this option is enabled, spora will also create a SNPs-only FASTA file and associated phylogenetic tree in addition to the tree using the entire genome. ### Option 1: config.yaml arguments (Recommended) @@ -74,18 +74,18 @@ rename: True prefix: example_Oct_2021 ``` -For reproducibility, it is recommended to record all arguments in a config.yaml and execute outbreaker with the following command: +For reproducibility, it is recommended to record all arguments in a config.yaml and execute spora with the following command: ``` -outbreaker -c config.yaml +spora -c config.yaml ``` ### Option 2: CLI argparse arguments -For flexibility, users may also pass CLI arguments to outbreaker. For the config arguments shown above, an equivalent set of CLI arguments to produce the same outputs would be as follows: +For flexibility, users may also pass CLI arguments to spora. For the config arguments shown above, an equivalent set of CLI arguments to produce the same outputs would be as follows: ``` -outbreaker -f /home/mwatson/COVID-19/outbreak/example_Oct_2021/focal_names.txt \ +spora -f /home/mwatson/COVID-19/outbreak/example_Oct_2021/focal_names.txt \ -m /home/mwatson/COVID-19/master_fasta/complete_all_12-Oct-2021-09-04.fa \ -r /home/mwatson/COVID-19/reference/reference.gb \ -o /home/mwatson/COVID-19/outbreak/example_Oct_2021/ \ diff --git a/docs/3-OUTPUTS.md b/docs/3-OUTPUTS.md index 82ab98b..d842a6d 100644 --- a/docs/3-OUTPUTS.md +++ b/docs/3-OUTPUTS.md @@ -1,6 +1,6 @@ # Outputs -Currently outbreaker creates its various output files all within the specified target output directory. The output files would appear as follows: \ +Currently spora creates its various output files all within the specified target output directory. The output files would appear as follows: \ • {prefix}.fa - multi-FASTA (not aligned) that contains all focal sequences, as well as background sequences, if they are included. \ • {prefix}_aln.fasta - multi-FASTA alignment (using MAFFT) of the above FASTA file. \ • {prefix}_snp_dists.csv - SNP distance matrix of all samples included in the multi-FASTA. Generated from snp-dists using the multi alignment file with molten format selected and CSV format output. Note that only ACGT differences are counted (N’s are disregarded). \ @@ -14,13 +14,13 @@ For the outbreak described above with the prefix example_oct_2021, the output di

- +

## Summary output report -As of outbreaker v0.5.0, there is the option to create a summary report in HTML format. The report contains the following sections: \ +As of spora v0.5.0, there is the option to create a summary report in HTML format. The report contains the following sections: \ • Summary Statistics \ ◦ Input sequences \ ◦ Retained sequences for analysis \ diff --git a/docs/images/outbreaker_workflow.png b/docs/images/outbreaker_workflow.png deleted file mode 100644 index ff6bd88..0000000 Binary files a/docs/images/outbreaker_workflow.png and /dev/null differ diff --git a/docs/images/outbreaker_outputs.png b/docs/images/spora_outputs.png similarity index 100% rename from docs/images/outbreaker_outputs.png rename to docs/images/spora_outputs.png diff --git a/docs/images/spora_workflow.png b/docs/images/spora_workflow.png new file mode 100644 index 0000000..0d50e0d Binary files /dev/null and b/docs/images/spora_workflow.png differ diff --git a/environments/environment.yml b/environments/environment.yml index 0a6c3d3..99ffbd0 100644 --- a/environments/environment.yml +++ b/environments/environment.yml @@ -1,4 +1,4 @@ -name: ncov_outbreaker +name: ncov_spora channels: - conda-forge - bioconda @@ -23,7 +23,7 @@ dependencies: - raxml=8.2.12 - r-argparse - r-base=>3.6.3 - - r-dplyr=1.0.7 + - r-dplyr>=1.0.9 - r-dt - r-heatmaply - r-markdown @@ -39,3 +39,4 @@ dependencies: - Bio - git+https://github.com/matt-sd-watson/FASTAfurious.git - git+https://github.com/aineniamh/snipit.git + - pypandoc>=1.8 diff --git a/outbreaker/__init__.py b/outbreaker/__init__.py deleted file mode 100644 index cf91f7a..0000000 --- a/outbreaker/__init__.py +++ /dev/null @@ -1,2 +0,0 @@ -_program = "outbreaker" -__version__ = "0.6.5" diff --git a/setup.py b/setup.py index 40c6a56..298387b 100644 --- a/setup.py +++ b/setup.py @@ -1,22 +1,35 @@ from setuptools import setup -from outbreaker import __version__, _program +from spora import __version__, _program + +try: + import pypandoc + long_description = pypandoc.convert_file('README.md', 'rst') +except(IOError, ImportError): + long_description = open('README.md').read() setup( - name='outbreaker', + name='spora', version=__version__, - packages=['outbreaker'], - package_dir={'outbreaker': 'outbreaker'}, - scripts=["outbreaker/workflows/outbreaker.smk", - "outbreaker/workflows/outbreaker_summary_report.Rmd"], - url='', + packages=['spora'], + package_dir={'spora': 'spora'}, + scripts=["spora/workflows/spora.smk", + "spora/workflows/spora_summary_report.Rmd"], + url='https://github.com/matt-sd-watson/spora/', + project_urls = { + "Issues": "https://github.com/matt-sd-watson/spora/issues", + "Source": "https://github.com/matt-sd-watson/spora", + }, license='', author='Matthew Watson', author_email='matthew.watson@oahpp.ca', - description='snakemake and Python integrated workflow for intermediate file generation for COVID outbreak analysis', - install_requires = ["pandas>=1.1.5", "numpy>=1.19", "biopython>=1.79", "snakemake>=7.0.0"], + description='spora: Streamlined Phylogenomic Outbreak Report Analysis', + long_description_content_type="text/markdown", + long_description = long_description, + install_requires = ["pandas>=1.1.5", "numpy>=1.19", "biopython>=1.79", "snakemake>=7.0.0", "pypandoc>=1.8", + "pytest>=7.1.2"], entry_points=""" [console_scripts] - {program} = outbreaker.main:main + {program} = spora.main:main """.format(program=_program), include_package_data=True, ) diff --git a/spora/__init__.py b/spora/__init__.py new file mode 100644 index 0000000..bd29a78 --- /dev/null +++ b/spora/__init__.py @@ -0,0 +1,2 @@ +_program = "spora" +__version__ = "0.7.0" diff --git a/outbreaker/config.py b/spora/config.py similarity index 100% rename from outbreaker/config.py rename to spora/config.py diff --git a/outbreaker/init_defaults.py b/spora/init_defaults.py similarity index 98% rename from outbreaker/init_defaults.py rename to spora/init_defaults.py index b695e3d..140f05c 100644 --- a/outbreaker/init_defaults.py +++ b/spora/init_defaults.py @@ -1,4 +1,4 @@ -from outbreaker.config import * +from spora.config import * import os import sys import yaml @@ -6,7 +6,7 @@ def get_defaults(): default_dict = { - # all outbreaker input options + # all spora input options KEY_FOCAL_SEQS: "", KEY_BACKGROUND_SEQS: "", KEY_OUTDIR: "", diff --git a/outbreaker/main.py b/spora/main.py similarity index 91% rename from outbreaker/main.py rename to spora/main.py index b89c2ab..eb8dd06 100644 --- a/outbreaker/main.py +++ b/spora/main.py @@ -1,5 +1,5 @@ -from outbreaker import __version__ -import outbreaker.init_defaults as defaults +from spora import __version__ +import spora.init_defaults as defaults import os import sys @@ -18,7 +18,7 @@ def isFasta(input): # mandatory = set(["focal_list", "reference", "master_fasta"]) def get_primary_snakefile(thisdir): - snakefile = os.path.join(thisdir, 'workflows', 'outbreaker.smk') + snakefile = os.path.join(thisdir, 'workflows', 'spora.smk') if not os.path.exists(snakefile): print(f'Error: cannot find Snakefile at {snakefile}\n Check installation\n') sys.exit(-1) @@ -27,11 +27,11 @@ def get_primary_snakefile(thisdir): def main(sysargs = sys.argv[1:]): parser = argparse.ArgumentParser(add_help=False, - description="Outbreaker: Python and snakemake outbreak workflow for COVID-19", + description="spora: Streamlined Phylogenomic Outbreak Report Analysis", usage=''' - \toutbreaker -c + \tspora -c \tOR - \toutbreaker --focal_list ...''') + \tspora --focal_list ...''') parser.add_argument('-h', "--help", action="help", help="Show the help output and exit.", @@ -56,7 +56,7 @@ def main(sysargs = sys.argv[1:]): parser.add_argument('-o', "--output-directory", action="store", help="Path to the desired output directory. If none is provided, " - "a new folder named outbreaker will be created in the current directory", + "a new folder named spora will be created in the current directory", dest="outdir", default="") parser.add_argument('-r', "--reference", action="store", @@ -101,12 +101,12 @@ def main(sysargs = sys.argv[1:]): dest="genome_length", default=29500, type=int) parser.add_argument('-rp', "--report", action="store_true", - help="Generate a summary output report for the outbreaker run. Default: Not enabled", + help="Generate a summary output report for the spora run. Default: Not enabled", dest="report") parser.add_argument('-v', "--version", action="version", - help="Show the current outbreaker version then exit.", - version=f"This is outbreaker: v{__version__}") + help="Show the current spora version then exit.", + version=f"This is spora: v{__version__}") @@ -182,10 +182,10 @@ def main(sysargs = sys.argv[1:]): ) if status: # translate "success" into shell exit code of 0 - sys.stderr.write(f"\noutbreaker completed successfully.\n") + sys.stderr.write(f"\nspora completed successfully.\n") return 0 - sys.stderr.write(f"\noutbreaker did not complete successfully.\n") + sys.stderr.write(f"\nspora did not complete successfully.\n") return 1 if __name__ == '__main__': diff --git a/outbreaker/workflows/outbreaker.smk b/spora/workflows/spora.smk similarity index 99% rename from outbreaker/workflows/outbreaker.smk rename to spora/workflows/spora.smk index 6cf2f2c..8e8445a 100644 --- a/outbreaker/workflows/outbreaker.smk +++ b/spora/workflows/spora.smk @@ -4,7 +4,7 @@ import click import pandas as pd if not config["outdir"]: - config["outdir"] = os.getcwd() + "/outbreaker/" + config["outdir"] = os.getcwd() + "/spora/" def isFasta(input): return input.endswith(('.fa', '.fasta', '.FA', '.FASTA')) @@ -315,7 +315,7 @@ rule summary_report: output: report = os.path.join(config["outdir"], config["prefix"] + "_summary_report.html") params: - script = srcdir("outbreaker_summary_report.Rmd"), + script = srcdir("spora_summary_report.Rmd"), output = absol_path(os.path.join(config["outdir"], config["prefix"] + "_summary_report.html")), focal_read = str(absol_path(config["focal_seqs"])), background_read = str(absol_path(config["background_seqs"])), diff --git a/outbreaker/workflows/outbreaker_summary_report.Rmd b/spora/workflows/spora_summary_report.Rmd similarity index 98% rename from outbreaker/workflows/outbreaker_summary_report.Rmd rename to spora/workflows/spora_summary_report.Rmd index a2a442a..7c2db86 100644 --- a/outbreaker/workflows/outbreaker_summary_report.Rmd +++ b/spora/workflows/spora_summary_report.Rmd @@ -1,5 +1,5 @@ --- -title: "outbreaker Summary Report" +title: "spora Summary Report" date: "Generated: `r format(Sys.time(), '%B %d, %Y, %H:%M')`
" params: focal_list: @@ -222,7 +222,7 @@ Number of background sequences input: **`r format (nrow(background_input))`**
### Retained sequences for analysis -outbreaker filters samples based on the following criteria: \ +spora filters samples based on the following criteria: \ + sequence filtering if ```--filter``` is used. This will remove any sequences that do not meet the genome completeness and length requirements set by the user. \ + removal of any duplicate sequence(s) based on sequence name. If a duplicated sequence is found in both the focal and background sequence sets the duplicate(s) is/are removed and the retained unique sequence is treated as a focal sequence. \ @@ -354,7 +354,7 @@ if (closest_dist_w_background != "NA (no background sequences)") { datatable(w_background_with_min, rownames= FALSE, colnames = rep("", ncol(w_background_with_min))) } else { - cat("No background sequences were provided to outbreaker. SNP pairs are not shown.") + cat("No background sequences were provided to spora. SNP pairs are not shown.") } ``` @@ -373,7 +373,7 @@ if (farthest_dist_w_background != "NA (no background sequences)") { datatable(w_background_with_max, rownames= FALSE, colnames = rep("", ncol(w_background_with_max))) } else { - cat("No background sequences were provided to outbreaker. SNP pairs are not shown.") + cat("No background sequences were provided to spora. SNP pairs are not shown.") } ``` @@ -578,7 +578,7 @@ annotated_tree_scaled <- ggtree(tree_snps, size = 0.5) + xlim(c(0, annotated_tree_snps } else { - cat("No SNPs only analysis was conducted.\nPlease enable SNPs only analysis with outbreaker to generate a SNPs only phylogenetic tree.") + cat("No SNPs only analysis was conducted.\nPlease enable SNPs only analysis with spora to generate a SNPs only phylogenetic tree.") } diff --git a/tests/test_outbreaker.py b/tests/test_spora.py similarity index 95% rename from tests/test_outbreaker.py rename to tests/test_spora.py index ffbdbda..bb5dd56 100644 --- a/tests/test_outbreaker.py +++ b/tests/test_spora.py @@ -1,5 +1,5 @@ import os -from outbreaker import main +from spora import main import sys from Bio import SeqIO import subprocess @@ -31,7 +31,7 @@ def get_names_csv(get_data_dir): def get_renamed_fasta(tmp_path): return str(os.path.join(tmp_path, 'pytest_renamed.fa')) -class TestOutbreaker: +class TestSpora: def test_read_test_focal_fasta(self, get_focal_sequences): assert len(list(SeqIO.parse(get_focal_sequences, "fasta"))) == 4 def test_read_test_background_fasta(self, get_background_sequences): @@ -78,7 +78,7 @@ def test_run_with_missing_names_csv(self, tmp_path, get_focal_sequences, get_bac def test_run_with_console_output(self, tmp_path, get_focal_sequences, get_background_sequences, get_alignment_reference, get_names_csv): - results = subprocess.run(['outbreaker', '-f', get_focal_sequences, '-b', get_background_sequences, '--rename', '-p', 'pytest', + results = subprocess.run(['spora', '-f', get_focal_sequences, '-b', get_background_sequences, '--rename', '-p', 'pytest', '-r', get_alignment_reference, '-o', str(tmp_path), '--names-csv', get_names_csv], stdout=subprocess.PIPE) assert 'WARNING: the following record has no match in samples IDs and will be kept with the original name: Focal_4' \