From 2aa432c65e41a2ab33c79069cae0b825d57e1aba Mon Sep 17 00:00:00 2001
From: matt-sd-watson <matthew.watson@uhn.ca>
Date: Wed, 27 Apr 2022 15:26:11 -0400
Subject: [PATCH 01/13] Fix filter to remove identical names

---
 .gitignore                                         | 2 ++
 outbreaker/workflows/outbreaker_summary_report.Rmd | 3 ++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index f9564f6..33c5779 100644
--- a/.gitignore
+++ b/.gitignore
@@ -130,3 +130,5 @@ dmypy.json
 
 
 .DS_Store
+
+.snakemake/
diff --git a/outbreaker/workflows/outbreaker_summary_report.Rmd b/outbreaker/workflows/outbreaker_summary_report.Rmd
index 5f298d7..2ddd436 100644
--- a/outbreaker/workflows/outbreaker_summary_report.Rmd
+++ b/outbreaker/workflows/outbreaker_summary_report.Rmd
@@ -249,7 +249,8 @@ distances <- read.csv(params$snp_dists, header = FALSE,
                          na.strings=c("","NA"),
                          stringsAsFactors=FALSE,
                          sep=",") %>% filter(! grepl("MN908947", V1) &
-                                               ! grepl("MN908947", V2))
+                                               ! grepl("MN908947", V2)) %>%
+            filter(V1 != V2)
 
 filtered_w_background <- subset(distances, V1 %in% subset(tr.df.labs, category == "Focal_Sequence")$label &
                      ! V2 %in% subset(tr.df.labs, category == "Focal_Sequence")$label)

From 163c31b0aefcf38b6151f7541b8c2e4a2c605e3c Mon Sep 17 00:00:00 2001
From: matt-sd-watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 10:51:34 -0400
Subject: [PATCH 02/13] Fix rename without names csv w report

---
 outbreaker/workflows/outbreaker.smk           | 23 +++++++++-------
 .../workflows/outbreaker_summary_report.Rmd   | 27 +++++++++++++------
 2 files changed, 32 insertions(+), 18 deletions(-)

diff --git a/outbreaker/workflows/outbreaker.smk b/outbreaker/workflows/outbreaker.smk
index 43b1a92..6cf2f2c 100644
--- a/outbreaker/workflows/outbreaker.smk
+++ b/outbreaker/workflows/outbreaker.smk
@@ -1,6 +1,7 @@
 import os
 import sys
 import click
+import pandas as pd
 
 if not config["outdir"]: 
     config["outdir"] = os.getcwd() + "/outbreaker/"
@@ -21,6 +22,7 @@ rule all:
         os.path.join(config["outdir"], config["prefix"] + ".fa"),
         os.path.join(config["outdir"], config["prefix"] + "_filtered.fa") if config["filter"] else [],
         os.path.join(config["outdir"], config["prefix"] + "_renamed.fa") if config["rename"] else [],
+        os.path.join(config["outdir"], config["prefix"] + "_rename_matches.csv") if config["rename"] and not config["names_csv"] else [],
         os.path.join(config["outdir"], config["prefix"] + "_aln.fasta"),
         os.path.join(config["outdir"], config["prefix"] + "_snipit.jpg"),
         os.path.join(config["outdir"], config["prefix"]+ "_tree.nwk"),
@@ -134,7 +136,8 @@ rule rename_headers:
         fasta = rules.create_subset.output.sub_fasta,
         names_csv = config["names_csv"] if config["names_csv"] else []
     output: 
-        renamed = os.path.join(config["outdir"], config["prefix"] + "_renamed.fa")
+        renamed = os.path.join(config["outdir"], config["prefix"] + "_renamed.fa"),
+        names_matches = os.path.join(config["outdir"], config["prefix"] + "_rename_matches.csv") if not config["names_csv"] else []
     run: 
         if config["rename"]: 
             if config["names_csv"]: 
@@ -146,19 +149,21 @@ rule rename_headers:
             else:
                 fasta_to_open = open(input.fasta)
                 newfasta = open(output.renamed, 'w')
+                names_matches = {}
+                name_counter = 1
                 for line in fasta_to_open: 
                     if line.startswith('>'):
                         line_cleaned = line.strip('>').strip()
-                        try: 
-                            replacement_name = "ON-PHL-" + line_cleaned.split("PHLON")[1].split("-SARS")[0] + "-" + line_cleaned.split("PHLON")[1].split("-SARS")[1]
-                        except IndexError:
-                            replacement_name = line_cleaned
+                        replacement_name = config["prefix"] + "_" + str(name_counter)
                         newfasta.write(">" + replacement_name + "\n")
+                        names_matches[line_cleaned] = replacement_name
+                        name_counter += 1
                     else:
                         newfasta.write(line)
                 
                 fasta_to_open.close()
                 newfasta.close()
+                pd.DataFrame(names_matches.items(), columns=['original_name', 'new_name']).to_csv(output.names_matches, index = False)
                 sys.stderr.write(f'\nrenamed multi-FASTA headers into: {output.renamed}\n')
                                           
             
@@ -321,15 +326,13 @@ rule summary_report:
         renamed = convertPythonBooleanToR(config["rename"]),
         names_sheet_read = absol_path(config["names_csv"]) if config["names_csv"] else [],
         prefix_input = str(config["prefix"]),
-        report_output = absol_path(os.path.join(config["outdir"])) + "/"
+        report_output = absol_path(os.path.join(config["outdir"])) + "/",
+        name_matches = absol_path(os.path.join(config["outdir"], config["prefix"] + "_rename_matches.csv")) if config["rename"] and not config["names_csv"] else []
     run:
         if config["report"]:
             shell( 
             """
-            Rscript -e \"rmarkdown::render(input = '{params.script}', params = list(focal_list = '{params.focal_read}', background_list = '{params.background_read}',     snp_dists = '{params.snp_read}', snp_tree = '{params.snp_tree_read}', full_tree = '{params.full_tree_read}', snipit = '{params.snipit_read}', renamed = '{params.renamed}', names_csv = '{params.names_sheet_read}', outbreak_prefix = '{params.prefix_input}', outbreak_directory = '{params.report_output}'), output_file = '{params.output}')\"
+            Rscript -e \"rmarkdown::render(input = '{params.script}', params = list(focal_list = '{params.focal_read}', background_list = '{params.background_read}',     snp_dists = '{params.snp_read}', snp_tree = '{params.snp_tree_read}', full_tree = '{params.full_tree_read}', snipit = '{params.snipit_read}', renamed = '{params.renamed}', names_csv = '{params.names_sheet_read}', outbreak_prefix = '{params.prefix_input}', outbreak_directory = '{params.report_output}', name_matches = '{params.name_matches}'), output_file = '{params.output}')\"
             """)
-  
-        
-        
 
 
diff --git a/outbreaker/workflows/outbreaker_summary_report.Rmd b/outbreaker/workflows/outbreaker_summary_report.Rmd
index 2ddd436..668121d 100644
--- a/outbreaker/workflows/outbreaker_summary_report.Rmd
+++ b/outbreaker/workflows/outbreaker_summary_report.Rmd
@@ -29,6 +29,9 @@ params:
     value: ""
   outbreak_directory: 
     value: ""
+  name_matches: 
+    input: file
+    value: ""
 output:
   html_document:
     toc: yes
@@ -108,9 +111,10 @@ if (file_ext(params$focal_list) %in% fasta_extensions) {
 ```{r, echo=F, warning=F, message=F}
 
 if (params$renamed == "TRUE" & params$names_csv == "") {
+  
+  rename_matches <- read.csv(params$name_matches)
+  new_focal_names <- as.vector(subset(rename_matches, original_name %in% focal_input$Sequence)$new_name)
 
-   new_focal_names <- as.vector(paste("ON-PHL", str_split_fixed(focal_input$Sequence, "PHLON|-SARS", 4)[,2],
-      str_split_fixed(focal_input$Sequence, "PHLON|-SARS", 4)[,3], sep = "-"))
 } else if (params$renamed == "TRUE" & params$names_csv != "") {
   renaming_sheet <- read.csv(params$names_csv, header = T,
                          na.strings=c("","NA"),
@@ -250,16 +254,23 @@ distances <- read.csv(params$snp_dists, header = FALSE,
                          stringsAsFactors=FALSE,
                          sep=",") %>% filter(! grepl("MN908947", V1) &
                                                ! grepl("MN908947", V2)) %>%
-            filter(V1 != V2)
+                         filter(V1 != V2)
 
-filtered_w_background <- subset(distances, V1 %in% subset(tr.df.labs, category == "Focal_Sequence")$label &
-                     ! V2 %in% subset(tr.df.labs, category == "Focal_Sequence")$label)
+filtered_w_background <- subset(distances, V1 %in% as.vector(subset(tr.df.labs, category == "Focal_Sequence")$label) &
+                     ! V2 %in% as.vector(subset(tr.df.labs, category == "Focal_Sequence")$label))
 
-filtered_only_focal <- subset(distances, V1 %in% subset(tr.df.labs, category == "Focal_Sequence")$label &
-                     V2 %in% subset(tr.df.labs, category == "Focal_Sequence")$label)
+filtered_only_focal <- subset(distances, V1 %in% as.vector(subset(tr.df.labs, category == "Focal_Sequence")$label) &
+                     V2 %in% as.vector(subset(tr.df.labs, category == "Focal_Sequence")$label))
 
-distance_frame_only_focal <- as.data.frame(table(filtered_only_focal$V3)) %>% mutate(Var1 = as.numeric(as.character(Var1)))
+distance_frame_only_focal <- as.data.frame(table(filtered_only_focal$V3))
+
+if (nrow(distance_frame_only_focal) != 0) {
+  distance_frame_only_focal <- distance_frame_only_focal %>% mutate(Var1 = as.numeric(as.character(Var1)))
 colnames(distance_frame_only_focal) <- c("SNP_Distance", "Frequency")
+} else {
+  distance_frame_only_focal <- data.frame(SNP_Distance = numeric(),
+                                          Frequency = numeric())
+}
 
 distance_frame_w_background <-as.data.frame(table(filtered_w_background$V3))
 

From 302f62e1a41f9237bd86015099518ccf504a0535 Mon Sep 17 00:00:00 2001
From: matt-sd-watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 11:42:50 -0400
Subject: [PATCH 03/13] Initial pytest

---
 tests/test_outbreaker.py | 46 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 46 insertions(+)
 create mode 100644 tests/test_outbreaker.py

diff --git a/tests/test_outbreaker.py b/tests/test_outbreaker.py
new file mode 100644
index 0000000..b1f8ede
--- /dev/null
+++ b/tests/test_outbreaker.py
@@ -0,0 +1,46 @@
+import os
+from outbreaker import main
+import sys
+from Bio import SeqIO
+
+DATA_DIR = os.path.abspath(os.path.join(os.path.dirname( __file__ ), '..', 'data/'))
+print(DATA_DIR)
+
+test_reference = os.path.join(DATA_DIR, 'reference', 'ncov_reference.gb')
+
+
+class TestOutbreaker:
+    def test_read_test_focal_fasta(self):
+        query_file = os.path.join(DATA_DIR, 'tests/', 'focal_seqs.fa')
+        assert len(list(SeqIO.parse(query_file, "fasta"))) == 4
+    def test_read_test_background_fasta(self):
+        query_file = os.path.join(DATA_DIR, 'tests/', 'background_seqs.fa')
+        assert len(list(SeqIO.parse(query_file, "fasta"))) == 6
+
+    def test_run_outputs(self, tmp_path):
+        focal_seqs = os.path.join(DATA_DIR, 'tests/', 'focal_seqs.fa')
+        background_seqs = os.path.join(DATA_DIR, 'tests/', 'background_seqs.fa')
+
+        args = ['-f', str(focal_seqs), '-b', str(background_seqs), '--rename', '-p', 'pytest',
+                '-r', str(test_reference), '-o', str(tmp_path)]
+
+        main.main(sysargs = args)
+        output_merged_fasta = os.path.join(tmp_path, 'pytest_renamed.fa')
+        assert len(list(SeqIO.parse(output_merged_fasta, "fasta"))) == 10
+
+        new_names = ["pytest_" + str(i) for i in range(1, 11, 1)]
+        names_in_fasta = []
+        for record in SeqIO.parse(output_merged_fasta, "fasta"):
+            names_in_fasta.append(record.id)
+        assert names_in_fasta == new_names
+
+
+
+
+
+        
+        
+
+
+
+

From 3a06af9084decebf5406dd66cf9a13627c1fc5a7 Mon Sep 17 00:00:00 2001
From: matt-sd-watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 13:11:30 -0400
Subject: [PATCH 04/13] Add pytests + github actions

---
 .github/workflows/main.yml |  3 +++
 tests/test_outbreaker.py   | 30 ++++++++++++++++++++++++++++++
 2 files changed, 33 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index cd0fd92..3263e26 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -32,3 +32,6 @@ jobs:
       - name: Run outbreaker test via config
         shell: bash -l {0}
         run: outbreaker -c data/test_config.yaml
+      - name: Run pytest for outbreaker
+        shell: bash -l {0}
+        run: pytest tests/
diff --git a/tests/test_outbreaker.py b/tests/test_outbreaker.py
index b1f8ede..68cbc64 100644
--- a/tests/test_outbreaker.py
+++ b/tests/test_outbreaker.py
@@ -35,6 +35,36 @@ def test_run_outputs(self, tmp_path):
         assert names_in_fasta == new_names
 
 
+    def test_run_with_missing_names_csv(self, tmp_path):
+        focal_seqs = os.path.join(DATA_DIR, 'tests/', 'focal_seqs.fa')
+        background_seqs = os.path.join(DATA_DIR, 'tests/', 'background_seqs.fa')
+        names_csv = os.path.join(DATA_DIR, 'tests/', 'names.csv')
+
+        args = ['-f', str(focal_seqs), '-b', str(background_seqs), '--rename', '-p', 'pytest',
+                    '-r', str(test_reference), '-o', str(tmp_path), '--names-csv', str(names_csv)]
+
+        main.main(sysargs=args)
+
+        output_merged_fasta = os.path.join(tmp_path, 'pytest_renamed.fa')
+        names_in_fasta = []
+        for record in SeqIO.parse(output_merged_fasta, "fasta"):
+                names_in_fasta.append(record.id)
+        names_not_all = ['Renamed_1', 'Renamed_2', 'Renamed_3',
+            'Focal_4', 'Renamed_4', 'Renamed_5', 'Background_3',
+                             'Renamed_6', 'Renamed_7', 'Renamed_8']
+        assert names_in_fasta == names_not_all
+
+        output_snp_dists = os.path.join(tmp_path, "pytest_snp_dists.csv")
+
+        with open(output_snp_dists) as f:
+            lines = f.readlines()
+        assert str('Renamed_8,Background_3,5\n') in lines
+
+        
+
+            
+
+
 
 
 

From 4ca33260c6dd869d034eaf69f63f17b98069688a Mon Sep 17 00:00:00 2001
From: Matthew Watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 13:49:21 -0400
Subject: [PATCH 05/13] add r to channels for CI env create

---
 .github/workflows/main.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 3263e26..d5ba251 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -19,7 +19,7 @@ jobs:
         with:
           environment-file: environments/environment.yml
           activate-environment: ncov_outbreaker
-          channels: conda-forge,bioconda,defaults
+          channels: conda-forge,bioconda,defaults,r
       - name: Install outbreaker
         shell: bash -l {0}
         run: pip install -e .

From 487a121e79846df844d3305716543b3a62ae17c1 Mon Sep 17 00:00:00 2001
From: Matthew Watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 14:01:26 -0400
Subject: [PATCH 06/13] add mamba and matrix for ubuntu to CI

---
 .github/workflows/main.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index d5ba251..2028834 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -12,7 +12,11 @@ on:
 
 jobs:
   build:
-    runs-on: ubuntu-latest
+    name: Outbreaker test on ${{ matrix.os }}
+    runs-on: ${{ matrix.os }}
+    strategy:
+      matrix:
+        os: ["ubuntu-latest"]
     steps:
       - uses: actions/checkout@v2
       - uses: conda-incubator/setup-miniconda@v2
@@ -20,8 +24,8 @@ jobs:
           environment-file: environments/environment.yml
           activate-environment: ncov_outbreaker
           channels: conda-forge,bioconda,defaults,r
+          mamba-version: "*"
       - name: Install outbreaker
-        shell: bash -l {0}
         run: pip install -e .
       - name: Check outbreaker version
         shell: bash -l {0}

From 31bc886d8b48e753fc6b7cd0bdadff36db58c074 Mon Sep 17 00:00:00 2001
From: Matthew Watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 14:07:29 -0400
Subject: [PATCH 07/13] try removing minimal spec for snakemake

---
 environments/environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/environment.yml b/environments/environment.yml
index 060a9e9..21647e4 100644
--- a/environments/environment.yml
+++ b/environments/environment.yml
@@ -30,7 +30,7 @@ dependencies:
   - r-essentials
   - r-traminer
   - scipy=1.6.3
-  - snakemake-minimal
+  - snakemake
   - snp-dists=0.8.2
   - snp-sites=2.5.1
   - vcftools=0.1.16

From 3232b8c5ce87ba9d18c2fb7526ca81019423035b Mon Sep 17 00:00:00 2001
From: Matthew Watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 14:19:01 -0400
Subject: [PATCH 08/13] try keeping older version of snakemake

---
 environments/environment.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/environments/environment.yml b/environments/environment.yml
index 21647e4..6a0923c 100644
--- a/environments/environment.yml
+++ b/environments/environment.yml
@@ -30,7 +30,7 @@ dependencies:
   - r-essentials
   - r-traminer
   - scipy=1.6.3
-  - snakemake
+  - snakemake-minimal<=6.8.0
   - snp-dists=0.8.2
   - snp-sites=2.5.1
   - vcftools=0.1.16

From 64d35e9ef25437de1ddfea23c689b296a4d57495 Mon Sep 17 00:00:00 2001
From: Matthew Watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 14:31:20 -0400
Subject: [PATCH 09/13] try explicit install of snakemake min with pip

---
 .github/workflows/main.yml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 2028834..8add243 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -25,6 +25,8 @@ jobs:
           activate-environment: ncov_outbreaker
           channels: conda-forge,bioconda,defaults,r
           mamba-version: "*"
+      - name: Install Python dependencies
+        run: python -m pip install --upgrade pip snakemake-minimal<=6.8.0
       - name: Install outbreaker
         run: pip install -e .
       - name: Check outbreaker version

From baf9dd0e03bcc46521f8a6fd250dc1eb27eee1cd Mon Sep 17 00:00:00 2001
From: Matthew Watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 14:39:18 -0400
Subject: [PATCH 10/13] try pip install no -e

---
 .github/workflows/main.yml | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index 8add243..2275d00 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -25,10 +25,8 @@ jobs:
           activate-environment: ncov_outbreaker
           channels: conda-forge,bioconda,defaults,r
           mamba-version: "*"
-      - name: Install Python dependencies
-        run: python -m pip install --upgrade pip snakemake-minimal<=6.8.0
       - name: Install outbreaker
-        run: pip install -e .
+        run: pip install .
       - name: Check outbreaker version
         shell: bash -l {0}
         run: outbreaker -v

From 88b474320146c69f42e6be3f107b163e29332dd9 Mon Sep 17 00:00:00 2001
From: Matthew Watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 14:51:14 -0400
Subject: [PATCH 11/13] add snakemake to setup requirements

---
 setup.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.py b/setup.py
index 7642aaf..d44fd62 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@
     author='Matthew Watson',
     author_email='matthew.watson@oahpp.ca',
     description='snakemake and Python integrated workflow for intermediate file generation for COVID outbreak analysis',
-    install_requires = ["pandas>=1.1.5", "numpy>=1.19", "biopython>=1.79"],
+    install_requires = ["pandas>=1.1.5", "numpy>=1.19", "biopython>=1.79", "snakemake-minimal<=6.8.0"],
     entry_points="""
     [console_scripts]
     {program} = outbreaker.main:main

From d9f5d02f183eef9abb642afea267542286e10220 Mon Sep 17 00:00:00 2001
From: matt-sd-watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 14:57:51 -0400
Subject: [PATCH 12/13] Remove snakemake minimal

---
 environments/environment.yml | 2 +-
 setup.py                     | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/environments/environment.yml b/environments/environment.yml
index 6a0923c..21647e4 100644
--- a/environments/environment.yml
+++ b/environments/environment.yml
@@ -30,7 +30,7 @@ dependencies:
   - r-essentials
   - r-traminer
   - scipy=1.6.3
-  - snakemake-minimal<=6.8.0
+  - snakemake
   - snp-dists=0.8.2
   - snp-sites=2.5.1
   - vcftools=0.1.16
diff --git a/setup.py b/setup.py
index d44fd62..40c6a56 100644
--- a/setup.py
+++ b/setup.py
@@ -13,7 +13,7 @@
     author='Matthew Watson',
     author_email='matthew.watson@oahpp.ca',
     description='snakemake and Python integrated workflow for intermediate file generation for COVID outbreak analysis',
-    install_requires = ["pandas>=1.1.5", "numpy>=1.19", "biopython>=1.79", "snakemake-minimal<=6.8.0"],
+    install_requires = ["pandas>=1.1.5", "numpy>=1.19", "biopython>=1.79", "snakemake>=7.0.0"],
     entry_points="""
     [console_scripts]
     {program} = outbreaker.main:main

From fb95943b6edc46beb164bbb2e0bf32940ef70917 Mon Sep 17 00:00:00 2001
From: matt-sd-watson <matthew.watson@uhn.ca>
Date: Thu, 28 Apr 2022 16:05:47 -0400
Subject: [PATCH 13/13] Bump version, changelog, docs

---
 CHANGELOG.md           |  8 +++++++-
 README.md              |  2 +-
 docs/2-INPUTS.md       | 26 ++++++++++++--------------
 outbreaker/__init__.py |  2 +-
 4 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 1b0e7c0..7307180 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -58,5 +58,11 @@
  
 ## Minor Version 0.6.4, 17-02-22
 
- -outbreaker now retains all sequences if ```--names-csv``` is used for renaming and not all sequences are contained in the CSV
+ - outbreaker now retains all sequences if ```--names-csv``` is used for renaming and not all sequences are contained in the CSV
  - updates to the renaming behavior to be compatible with fastafurious v1.2.0 (additional warning messages)
+ 
+## Minor Version 0.6.5, 28-04-22 (Patch)
+ - Change behaviour of renaming when no CSV is supplied. Will now use the prefix for the run to generate new names with alphanumerical sequential order (i.e. prefix_1, prefix_2) and will output the name matches as a CSV file
+ - Above fix changes fixes the error in the SNP distance plot in the HTML report when rename is used but no names CSV is supplied
+ - Addition of pytests in the CI/CD workflow
+ 
diff --git a/README.md b/README.md
index e430251..55aae9a 100644
--- a/README.md
+++ b/README.md
@@ -71,6 +71,6 @@ More detailed documentation for outbreaker usage and functionality can be found
 
 ## Acknowledgments
 
-Inspiration for code structure and design for outbreaker was inspired by [pangolin](https://github.com/cov-lineages/pangolin) and [civet](https://github.com/artic-network/civet), and minor code blocks were adopted from these software. \
+Inspiration for code structure and design for outbreaker was inspired by [pangolin](https://github.com/cov-lineages/pangolin) and [civet](https://github.com/artic-network/civet), and minor code blocks were adopted from these software.
 
 The **Background** section in the documentation describing outbreak definitions was written by Mark Horsman. 
diff --git a/docs/2-INPUTS.md b/docs/2-INPUTS.md
index 651fe2d..31e7bf7 100644
--- a/docs/2-INPUTS.md
+++ b/docs/2-INPUTS.md
@@ -30,21 +30,19 @@ The following inputs are purely optional, but may augment the types of analysis
     
 ## Sample head renaming
 
-For PHO outbreak analysis, it is common to rename a sample COVID-19 sequence with a different alias for privacy purposes, especially if the outbreak analysis is to be shared with external collaborators. A typical renaming scheme for PHO COVID-19 samples would follow the following pattern: \
-Original sample name: PHLON20-SARS##### or PHLON22-SARS#####
-New sample name: ON-PHL-20-##### or ON-PHL-21-##### \
-where ##### denotes the specific WGS Id that is used to track the genomic sequence within the PHO laboratory. 
+It is common to rename a sample COVID-19 sequence with a different alias for privacy purposes, especially if the outbreak analysis is to be shared with external collaborators. \
 outbreaker is designed to facilitate the renaming of FASTA headers to accommodate privacy guidelines and/or to use different label aliases for the outbreak. This feature can be toggled on using ```--rename```. There are two different renaming possibilities for user when ```--rename``` is enabled: \
-    • **Option 1**: The workflow will auto-detect any FASTA headers that have the format PHLON{20,21}-SARS##### and change them to ON-PHL-{20,21}-#####. If the FASTA header does not follow this format, it will be left as is (i.e. Gisaid sample headers that follow a different format, or external samples) \
-    • **Option 2**: A CSV file of FASTA labels can be supplied using --names_csv. This requires that ALL focal and background samples be included in the table. The contents of the table should have the following scheme as an example:
-original_name
-new_name
-PHLON21-SARS29115
-sequence_1
-PHLON21-SARS15665
-sequence_2
-This table will allow outbreaker to rename the above PHLON sequences with sequence_# headers in all downstream input files generated by the workflow.
-If ```--names_csv```, the CSV headers must have original_name for the current/original header name, and new_name for the target/output name to run properly.
+    • **Option 1**: outbreaker will use the run prefix supplied at runtime to create new alias for each sample. In an example, for a run with 10 samples with run prefix "apartment_can", The new sample names will range from apartment_can_1 to apartment_can_10. A CSV matching the original and newly generated names will be added to the output directory. \
+    • **Option 2**: A CSV file of FASTA labels can be supplied using --names_csv. This allows for custom labels for specific samples. Note that not all samples need to have a new name in this CSV. If a sample does not have a coresponding new name, it is left as is as of outbreaker v0.6.4. 
+The format of this CSV should be as follows: 
+```
+original_name     new_name
+PHLON21-SARS29115 sequence_1
+PHLON21-SARS15665 sequence_2
+```
+
+This table will allow outbreaker to use fastafurious to rename the above PHLON sequences with sequence_# headers in all downstream input files generated by the workflow. \
+If ```--names_csv``` is supplied, the CSV headers must have original_name for the current/original header name, and new_name for the target/output name to run properly.
 
 
 ## Optional argument descriptions
diff --git a/outbreaker/__init__.py b/outbreaker/__init__.py
index 7b17ff8..cf91f7a 100644
--- a/outbreaker/__init__.py
+++ b/outbreaker/__init__.py
@@ -1,2 +1,2 @@
 _program = "outbreaker"
-__version__ = "0.6.4"
+__version__ = "0.6.5"