diff --git a/.env.example b/.env.example
index 41fba3f..3e69e4e 100644
--- a/.env.example
+++ b/.env.example
@@ -1,2 +1,2 @@
-ENA_WEBIN=""
-ENA_WEBIN_PASSWORD=""
\ No newline at end of file
+ENA_WEBIN="ENA_WEBIN"
+ENA_WEBIN_PASSWORD="ENA_WEBIN_PASSWORD"
diff --git a/.github/workflows/pytest_workflow.yml b/.github/workflows/pytest_workflow.yml
index c0b190f..4effa1e 100644
--- a/.github/workflows/pytest_workflow.yml
+++ b/.github/workflows/pytest_workflow.yml
@@ -4,7 +4,9 @@ on:
   push:
     branches: [main]
   pull_request:
-    branches: [main]
+    branches: 
+      - main
+      - dev
 
 jobs:
   pytest_38:
@@ -27,7 +29,7 @@ jobs:
           emoji: false
           verbose: true
           job-summary: true
-          
+
   pytest_39:
     name: Run pytest 3.9
     needs:
diff --git a/README.md b/README.md
index 05a3b8c..05354d6 100755
--- a/README.md
+++ b/README.md
@@ -1,5 +1,5 @@
 # Public bins and MAGs uploader
-Python script to upload bins and MAGs in fasta format to ENA (European Nucleotide Archive). This script generates xmls and manifests necessary for submission with webin-cli. 
+Python script to upload bins and MAGs in fasta format to ENA (European Nucleotide Archive). This script generates xmls and manifests necessary for submission with webin-cli.
 
 It takes as input one tsv (tab-separated values) table in the following format:
 
@@ -26,20 +26,20 @@ With columns indicating:
   * _local_environment_: `string` (explanation following)
   * _environmental_medium_: `string` (explanation following)
 
-According to ENA checklist's guidelines, 'broad_environment' describes the broad ecological context of a sample - desert, taiga, coral reef, ... 'local_environment' is more local - lake, harbour, cliff, ... 'environmental_medium' is either the material displaced by the sample, or the one in which the sample was embedded prior to the sampling event - air, soil, water, ... 
+According to ENA checklist's guidelines, 'broad_environment' describes the broad ecological context of a sample - desert, taiga, coral reef, ... 'local_environment' is more local - lake, harbour, cliff, ... 'environmental_medium' is either the material displaced by the sample, or the one in which the sample was embedded prior to the sampling event - air, soil, water, ...
 For host-associated metagenomic samples, the three variables can be defined similarly to the following example for the chicken gut metagenome: "chicken digestive system", "digestive tube", "caecum". More information can be found at [ERC000050](<https://www.ebi.ac.uk/ena/browser/view/ERC000050>) for bins and [ERC000047](<https://www.ebi.ac.uk/ena/browser/view/ERC000047>) for MAGs under field names "broad-scale environmental context", "local environmental context", "environmental medium"
 
 Another example can be found [here](examples/input_example.tsv)
 
 ### Warnings
 
-Raw-read runs from which genomes were generated should already be available on the INSDC (ENA by EBI, GenBank by NCBI, or DDBJ), hence at least one DRR|ERR|SRR accession should be available for every genome to be uploaded. Assembly accessions (ERZ|SRZ|DRZ) are also supported. 
+Raw-read runs from which genomes were generated should already be available on the INSDC (ENA by EBI, GenBank by NCBI, or DDBJ), hence at least one DRR|ERR|SRR accession should be available for every genome to be uploaded. Assembly accessions (ERZ|SRZ|DRZ) are also supported.
 
-If uploading TPA (Third PArty) genomes, you will need to contact [ENA support](<https://www.ebi.ac.uk/ena/browser/support>) before using the script. They will provide instructions on how to correctly register a TPA project where to submit your genomes. If both TPA and non-TPA genomes need to be uploaded, please divide them in two batches and use the `--tpa` flag only with TPA genomes. 
+If uploading TPA (Third PArty) genomes, you will need to contact [ENA support](<https://www.ebi.ac.uk/ena/browser/support>) before using the script. They will provide instructions on how to correctly register a TPA project where to submit your genomes. If both TPA and non-TPA genomes need to be uploaded, please divide them in two batches and use the `--tpa` flag only with TPA genomes.
 
-Files to be uploaded will need to be compressed (e.g. already in .gz format). 
+Files to be uploaded will need to be compressed (e.g. already in .gz format).
 
-No more than 5000 genomes can be submitted at the same time. 
+No more than 5000 genomes can be submitted at the same time.
 
 ## Register samples and generate pre-upload files
 The script needs `python`, `pandas`, `requests`, and `ena-webin-cli` to run. We provide a yaml file for the generation of a conda environment:
@@ -68,9 +68,9 @@ where
   * `--centre_name CENTRE_NAME`: name of the centre generating and uploading genomes
   * `--tpa`: if uploading TPA (Third PArty) generated genomes
 
-It is recommended to validate your genomes in test mode (i.e. without `--live` in the registration step and with `-test` during the upload) before attempting the final upload. Launching the registration in test mode will add a timestamp to the genome name to allow multiple executions of the test process. 
+It is recommended to validate your genomes in test mode (i.e. without `--live` in the registration step and with `-test` during the upload) before attempting the final upload. Launching the registration in test mode will add a timestamp to the genome name to allow multiple executions of the test process.
 
-Sample xmls won't be regenerated automatically if a previous xml already exists. If any metadata or value in the tsv table changes, `--force` will allow xml regeneration. 
+Sample xmls won't be regenerated automatically if a previous xml already exists. If any metadata or value in the tsv table changes, `--force` will allow xml regeneration.
 
 ### Produced files:
 The script produces the following files and folders:
diff --git a/genomeuploader/__init__.py b/genomeuploader/__init__.py
index 55fa725..58039f5 100644
--- a/genomeuploader/__init__.py
+++ b/genomeuploader/__init__.py
@@ -1 +1 @@
-__version__ = '2.1.1'
+__version__ = "2.1.1"
diff --git a/genomeuploader/constants.py b/genomeuploader/constants.py
index 11d9a5e..7c74036 100644
--- a/genomeuploader/constants.py
+++ b/genomeuploader/constants.py
@@ -14,14 +14,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-HQ = (
-    "Multiple fragments where gaps span repetitive regions. Presence of the "
-    "23S, 16S, and 5S rRNA genes and at least 18 tRNAs."
-)
-MQ = (
-    "Many fragments with little to no review of assembly other than reporting "
-    "of standard assembly statistics."
-)
+HQ = "Multiple fragments where gaps span repetitive regions. Presence of the " "23S, 16S, and 5S rRNA genes and at least 18 tRNAs."
+MQ = "Many fragments with little to no review of assembly other than reporting " "of standard assembly statistics."
 
 METAGENOMES = [
     "activated carbon metagenome",
@@ -635,3 +629,24 @@
     "Zambia",
     "Zimbabwe",
 ]
+
+BIN_MANDATORY_FIELDS = [
+    "genome_name",
+    "accessions",
+    "assembly_software",
+    "binning_software",
+    "binning_parameters",
+    "stats_generation_software",
+    "NCBI_lineage",
+    "broad_environment",
+    "local_environment",
+    "environmental_medium",
+    "metagenome",
+    "co-assembly",
+    "genome_coverage",
+    "genome_path",
+]
+
+MAG_MANDATORY_FIELDS = ["rRNA_presence", "completeness", "contamination"]
+
+GEOGRAPHY_DIGIT_COORDS = 8
diff --git a/genomeuploader/ena.py b/genomeuploader/ena.py
index 51ab45c..0f3c5b6 100644
--- a/genomeuploader/ena.py
+++ b/genomeuploader/ena.py
@@ -1,4 +1,3 @@
-
 #!/usr/bin/env python
 # -*- coding: utf-8 -*-
 
@@ -16,12 +15,17 @@
 # limitations under the License.
 
 
-import requests
 import json
 import logging
+import os
+import sys
+import xml.dom.minidom as minidom
+from pathlib import Path
 from time import sleep
 
-import xml.dom.minidom as minidom
+import requests
+from dotenv import load_dotenv
+from requests.exceptions import ConnectionError, HTTPError, RequestException, Timeout
 
 logging.basicConfig(level=logging.INFO)
 
@@ -32,265 +36,396 @@ class NoDataException(ValueError):
     pass
 
 
-RUN_DEFAULT_FIELDS = ','.join([
-    'study_accession',
-    'secondary_study_accession',
-    'instrument_model',
-    'run_accession',
-    'sample_accession'
-])
-
-ASSEMBLY_DEFAULT_FIELDS = 'sample_accession'
-
-SAMPLE_DEFAULT_FIELDS = ','.join([
-    'sample_accession',
-    'secondary_sample_accession',
-    'collection_date',
-    'country',
-    'location'
-])
-
-STUDY_DEFAULT_FIELDS = ','.join([
-    'study_accession',
-    'secondary_study_accession',
-    'study_description',
-    'study_title'
-])
-
-RETRY_COUNT = 5
-
-
-class ENA():
-    def get_default_params(self):
-        return {
-            'format': 'json',
-            'includeMetagenomes': True,
-            'dataPortal': 'ena'
-        }
+RUN_DEFAULT_FIELDS = ",".join(["secondary_study_accession", "sample_accession"])
+
+STUDY_RUN_DEFAULT_FIELDS = ",".join(["sample_accession", "run_accession", "instrument_model"])
+
+ASSEMBLY_DEFAULT_FIELDS = "sample_accession"
 
-    def post_request(self, data, webin, password):
-        url = "https://www.ebi.ac.uk/ena/portal/api/search"
-        auth = (webin, password)
-        default_connection_headers = {
+
+SAMPLE_DEFAULT_FIELDS = ",".join(["sample_accession", "collection_date", "country", "location"])
+
+STUDY_DEFAULT_FIELDS = "study_accession,study_description"
+
+RETRY_COUNT = 3
+
+
+def get_default_connection_headers():
+    return {
+        "headers": {
             "Content-Type": "application/x-www-form-urlencoded",
-            "Accept": "*/*"
+            "Accept": "*/*",
         }
-        response = requests.post(url, data=data, auth=auth, headers=default_connection_headers)
-        
-        return response
+    }
+
+
+def get_default_params():
+    return {"format": "json", "includeMetagenomes": True, "dataPortal": "ena"}
+
+
+def parse_accession(accession):
+    if accession.startswith("PRJ"):
+        return "study_accession"
+    elif "RP" in accession:
+        return "secondary_study_accession"
+    elif "RR" in accession:
+        return "run_accession"
+    elif "SAM" in accession:
+        return "sample_accession"
+    elif "RS" in accession:
+        return "secondary_sample_accession"
+    elif "RZ" in accession:
+        return "analysis_accession"
+    else:
+        logging.error(f"{accession} is not a valid accession")
+        sys.exit()
+
+
+def configure_credentials():
+    # Config file
+    user_config = Path.home() / ".genome_uploader.config.env"
+    if user_config.exists():
+        logger.debug("Loading the env variables from ".format())
+        load_dotenv(str(user_config))
+    else:
+        cwd_config = Path.cwd() / ".genome_uploader.config.env"
+        if cwd_config.exists():
+            logger.debug("Loading the variables from the current directory.")
+            load_dotenv(str(cwd_config))
+        else:
+            logger.debug("Trying to load env variables from the .env file")
+            # from a local .env file
+            load_dotenv()
 
-    def get_run(self, run_accession, webin, password, attempt=0, search_params=None):
-        data = self.get_default_params()
-        data['result'] = 'read_run'
-        data['fields'] = RUN_DEFAULT_FIELDS
-        data['query'] = 'run_accession=\"{}\"'.format(run_accession)
-
-        if search_params:
-            data.update(search_params)
-
-        response = self.post_request(data, webin, password)
-        
-        if not response.ok and attempt > 2:
-            raise ValueError("Could not retrieve run with accession {}, returned "
-                "message: {}".format(run_accession, response.text))
-        elif response.status_code == 204:
-            if attempt < 2:
-                attempt += 1
-                sleep(1)
-                return self.get_run(run_accession, webin, password, attempt)
-            else:
-                raise ValueError("Could not find run {} in ENA after {}"
-                    " attempts".format(run_accession, RETRY_COUNT))
-        try:
-            run = json.loads(response.text)[0]
-        except (IndexError, TypeError, ValueError):
-            raise ValueError("Could not find run {} in ENA.".format(run_accession))
-        except:
-            raise Exception("Could not query ENA API: {}".format(response.text))
+    username = os.getenv("ENA_WEBIN")
+    password = os.getenv("ENA_WEBIN_PASSWORD")
 
-        return run
+    return username, password
 
-    def get_run_from_assembly(self, assembly_name):
-        manifestXml = minidom.parseString(requests.get("https://www.ebi.ac.uk" +
-            "/ena/browser/api/xml/" + assembly_name).text)
 
-        run_ref = manifestXml.getElementsByTagName("RUN_REF")
-        run = run_ref[0].attributes["accession"].value
-        
-        return run
+def query_taxid(taxid):
+    url = "https://www.ebi.ac.uk/ena/taxonomy/rest/tax-id/{}".format(taxid)
+    response = requests.get(url)
 
-    def get_study(self, webin, password, study_accession):
-        data = self.get_default_params()
-        data['result'] = "study"
-        data['fields'] = STUDY_DEFAULT_FIELDS
-        data['query'] = 'study_accession="{}" OR secondary_study_accession="{}"' \
-            .format(study_accession, study_accession)
+    try:
+        # Will raise exception if response status code is non-200
+        response.raise_for_status()
+    except requests.exceptions.HTTPError as e:
+        print("Request failed {} with error {}".format(url, e))
+        return False
 
-        data['dataPortal'] = "ena"
+    res = response.json()
+
+    return res.get("scientificName", "")
 
-        try:
-            response = self.post_request(data, webin, password)
-            if response.status_code == 204:
-                raise NoDataException()
-            try:
-                studyList = response.json()
-                assert len(studyList) == 1
-                study = studyList[0]
-            except (IndexError, TypeError, ValueError, KeyError) as e:
-                raise e
-            return study
-        except NoDataException:
-            print("No info found to fetch study {}".format(study_accession))
-        except (IndexError, TypeError, ValueError, KeyError):
-            print("Failed to fetch study {}, returned error: {}".format(study_accession, response.text))
 
-        raise ValueError('Could not find study {} in ENA.'.format(study_accession))
+def query_scientific_name(scientific_name, search_rank=False):
+    url = "https://www.ebi.ac.uk/ena/taxonomy/rest/scientific-name/{}".format(scientific_name)
+    response = requests.get(url)
 
-    def get_study_runs(self, study_acc, webin, password, fields=None, search_params=None):
-        data = self.get_default_params()
-        data['result'] = 'read_run'
-        data['fields'] = fields or RUN_DEFAULT_FIELDS
-        data['query'] = '(study_accession=\"{}\" OR secondary_study_accession=\"{}\")'.format(study_acc, study_acc)
+    try:
+        # Will raise exception if response status code is non-200
+        response.raise_for_status()
+    except requests.exceptions.HTTPError:
+        if search_rank:
+            return False, "", ""
+        else:
+            return False, ""
 
-        if search_params:
-            data.update(search_params)
+    try:
+        res = response.json()[0]
+    except IndexError:
+        if search_rank:
+            return False, "", ""
+        else:
+            return False, ""
+
+    submittable = res.get("submittable", "").lower() == "true"
+    taxid = res.get("taxId", "")
+    rank = res.get("rank", "")
+
+    if search_rank:
+        return submittable, taxid, rank
+    else:
+        return submittable, taxid
+
+
+class EnaQuery:
+    def __init__(self, accession, query_type, private=False):
+        self.private_url = "https://www.ebi.ac.uk/ena/submit/report"
+        self.public_url = "https://www.ebi.ac.uk/ena/portal/api/search"
+        self.browser_url = "https://www.ebi.ac.uk/ena/browser/api/xml"
+        self.accession = accession
+        self.acc_type = parse_accession(accession)
+        username, password = configure_credentials()
+        if username is None or password is None:
+            logging.error("ENA_WEBIN and ENA_WEBIN_PASSWORD are not set")
+        if username and password:
+            self.auth = (username, password)
+        else:
+            self.auth = None
+        self.private = private
+        self.query_type = query_type
+
+    def post_request(self, data):
+        response = requests.post(self.public_url, data=data, **get_default_connection_headers())
+        return response
 
-        response = self.post_request(data, webin, password)
-        
-        if not response.ok:
-            raise ValueError("Could not retrieve runs for study %s.", study_acc)
-        
-        if response.status_code == 204:
-            return []
+    def get_request(self, url):
+        if self.private:
+            response = requests.get(url, auth=self.auth)
+        else:
+            response = requests.get(url)
+        return response
 
+    def get_data_or_handle_error(self, response, xml=False, full_json=False):
         try:
-            runs = response.json()
-        except:
-            raise ValueError("Query against ENA API did not work. Returned "
-                "message: {}".format(response.text))
+            data_txt = response.text
+            if data_txt is None:
+                if self.private:
+                    logging.error(f"{self.accession} private data is not present in the specified Webin account")
+                else:
+                    logging.error(f"{self.accession} public data does not exist")
+                return None
+            else:
+                if xml:
+                    return minidom.parseString(data_txt)
+                elif full_json:
+                    #   return the full json when more than one entry expected
+                    return response.json()
+                else:
+                    #   only return the fist element in the list is the default. In these cases there should only be one entry returned
+                    return json.loads(response.text)[0] 
+        except (IndexError, TypeError, ValueError, KeyError):
+            logging.error(f"Failed to fetch {self.accession}, returned error: {response.text}")
+
+    def retry_or_handle_request_error(self, request, *args, **kwargs):
+        attempt = 0
+        while attempt < RETRY_COUNT:
+            try:
+                response = request(*args, **kwargs)
+                response.raise_for_status()
+                return response
+            #   all other RequestExceptions are raised below
+            except (Timeout, ConnectionError) as retry_err:
+                attempt += 1
+                if attempt >= RETRY_COUNT:
+                    raise ValueError(f"Could not find {self.accession} in ENA after {RETRY_COUNT} attempts. Error: {retry_err}")
+                sleep(1)
+            except HTTPError as http_err:
+                print(f"HTTP response has an error status: {http_err}")
+                raise
+            except RequestException as req_err:
+                print(f"Network-related error status: {req_err}")
+                raise
+            #   should hopefully encompass all other issues...
+            except Exception as e:
+                print(f"An unexpected error occurred: {e}")
+                raise
+
+    def _get_private_run(self):
+        url = f"{self.private_url}/runs/{self.accession}"
+        response = self.retry_or_handle_request_error(self.get_request, url)
+        run = self.get_data_or_handle_error(response)
+        run_data = run["report"]
+        reformatted_data = {
+            "run_accession": run_data["id"],
+            "secondary_study_accession": run_data["studyId"],
+            "sample_accession": run_data["sampleId"],
+        }
+        logging.info(f"{self.accession} private run returned from ENA")
+        return reformatted_data
+
+    def _get_public_run(self):
+        data = get_default_params()
+        data.update(
+            {
+                "result": "read_run",
+                "query": f'run_accession="{self.accession}"',
+                "fields": "secondary_study_accession,sample_accession",
+            }
+        )
+        response = self.retry_or_handle_request_error(self.post_request, data)
+        run = self.get_data_or_handle_error(response)
+        logging.info(f"{self.accession} public run returned from ENA")
+        return run
+
+    def _get_private_study(self):
+        url = f"{self.private_url}/studies/xml/{self.accession}"
+        response = self.retry_or_handle_request_error(self.get_request, url)
+        study = self.get_data_or_handle_error(response, xml=True)
+        study_desc = study.getElementsByTagName("STUDY_DESCRIPTION")[0].firstChild.nodeValue
+        reformatted_data = {"study_accession": self.accession, "study_description": study_desc}
+        logging.info(f"{self.accession} private study returned from ENA")
+        return reformatted_data
+
+    def _get_public_study(self):
+        data = get_default_params()
+        data.update(
+            {
+                "result": "study",
+                "query": f'{self.acc_type}="{self.accession}"',
+                "fields": STUDY_DEFAULT_FIELDS,
+            }
+        )
+        response = self.retry_or_handle_request_error(self.post_request, data)
+        study = self.get_data_or_handle_error(response)
+        logging.info(f"{self.accession} public study returned from ENA")
+        return study
+
+    def _get_private_run_from_assembly(self):
+        url = f"{self.private_url}/analyses/xml/{self.accession}"
+        reponse = self.retry_or_handle_request_error(self.get_request, url)
+        parsed_xml = self.get_data_or_handle_error(reponse, xml=True)
+        run_ref = parsed_xml.getElementsByTagName("RUN_REF")
+        run = run_ref[0].attributes["accession"].value
+        logging.info(f"private run from the assembly {self.accession} returned from ENA")
+        return run
+
+    def _get_public_run_from_assembly(self):
+        url = f"{self.browser_url}/{self.accession}"
+        reponse = self.retry_or_handle_request_error(self.get_request, url)
+        parsed_xml = self.get_data_or_handle_error(reponse, xml=True)
+        run_ref = parsed_xml.getElementsByTagName("RUN_REF")
+        run = run_ref[0].attributes["accession"].value
+        logging.info(f"public run from the assembly {self.accession} returned from ENA")
 
+        return run
+    
+    def _get_private_study_runs(self):
+        #   Pagination documentation unclear - offest not working. Using hardcoded 2000 default. TO MODIFY
+        url = f"{self.private_url}/runs/{self.accession}?format=json&max-results=2000"
+        response = self.retry_or_handle_request_error(self.get_request, url)
+        runs = self.get_data_or_handle_error(response, full_json=True)
+        reformatted_data = []
+        for run in runs:
+            run_data = run["report"]
+            if "sampleId" in run_data:
+                run_data["sample_accession"] = run_data.pop("sampleId")
+            if "id" in run_data:
+                run_data["run_accession"] = run_data.pop("id")
+            if "instrumentModel" in run_data:
+                run_data["instrument_model"] = run_data.pop("instrumentModel")
+            reformatted_data.append(run_data)
+        logging.info(f"found {len(reformatted_data)} runs for study {self.accession}")
+        logging.info(f"private runs from study {self.accession}, returned from ENA")
+        return reformatted_data
+
+    def _get_public_study_runs(self):
+        data = get_default_params()
+        data.update({"result": "read_run", "fields": STUDY_RUN_DEFAULT_FIELDS, "query": f"{self.acc_type}={self.accession}"})
+        response = self.retry_or_handle_request_error(self.post_request, data)
+        runs = self.get_data_or_handle_error(response, full_json=True)
+        logging.info(f"public runs from study {self.accession}, returned from ENA")
         return runs
 
-    def get_sample(self, sample_accession, webin, password, fields=None, search_params=None, attempt=0):
-        data = self.get_default_params()
-        data['result'] = 'sample'
-        data['fields'] = fields or SAMPLE_DEFAULT_FIELDS
-        data['query'] = ('(sample_accession=\"{acc}\" OR secondary_sample_accession'
-            '=\"{acc}\") ').format(acc=sample_accession)
-
-        if search_params:
-            data.update(search_params)
-
-        response = self.post_request(data, webin, password)
-        
-        if response.status_code == 200:
-            sample = response.json()
-            assert len(sample) == 1 
-            return sample[0]
-
-        if response.status_code == 204:
-            if attempt < 2:
-                new_params = {'dataPortal': 'metagenome' if data['dataPortal'] == 'ena' else 'ena'}
-                attempt += 1
-                return self.get_sample(sample_accession, webin, password, fields=fields, 
-                    search_params=new_params, attempt=attempt)
-            else:
-                raise ValueError("Could not find sample {} in ENA after "
-                    "{} attempts.".format(sample_accession, RETRY_COUNT))
-        else:
-            raise ValueError("Could not retrieve sample with accession {}. "
-                "Returned message: {}".format(sample_accession, response.text))
+    def _get_private_sample(self):
+        url = f"{self.private_url}/samples/xml/{self.accession}"
+        reponse = self.retry_or_handle_request_error(self.get_request, url)
+        reformatted_data = {}
+        reformatted_data["sample_accession"] = self.accession
+        parsed_xml = self.get_data_or_handle_error(reponse, xml=True)
+        sample_attributes = parsed_xml.getElementsByTagName("SAMPLE_ATTRIBUTE")
+        for attribute in sample_attributes:
+            tag = attribute.getElementsByTagName("TAG")[0].firstChild.nodeValue
+            if tag == "geographic location (country and/or sea)":
+                reformatted_data["country"] = attribute.getElementsByTagName("VALUE")[0].firstChild.nodeValue
+            if tag == "geographic location (latitude)":
+                reformatted_data["latitude"] = attribute.getElementsByTagName("VALUE")[0].firstChild.nodeValue
+            if tag == "geographic location (longitude)":
+                reformatted_data["longitude"] = attribute.getElementsByTagName("VALUE")[0].firstChild.nodeValue
+            if tag == "collection date":
+                reformatted_data["collection_date"] = attribute.getElementsByTagName("VALUE")[0].firstChild.nodeValue
+        logging.info(f"{self.accession} private sample returned from ENA")
+        return reformatted_data
+
+    def _get_public_sample(self):
+        data = get_default_params()
+        data.update({"result": "sample", "fields": SAMPLE_DEFAULT_FIELDS, "query": f"{self.acc_type}={self.accession}"})
+        response = self.retry_or_handle_request_error(self.post_request, data)
+        sample = self.get_data_or_handle_error(response)
+        logging.info(f"{self.accession} public sample returned from ENA")
+        return sample
+
+    def build_query(self):
+        """If the private flag is given, assume private data and try private APIs.
+        ENA also has cases where a run may be private but the sample may be public etc. Hence always try
+        public if private fails"""
+        api_map = {
+            "study": (self._get_private_study, self._get_public_study),
+            "run": (self._get_private_run, self._get_public_run),
+            "run_assembly": (self._get_private_run_from_assembly, self._get_public_run_from_assembly),
+            "study_runs": (self._get_private_study_runs, self._get_public_study_runs),
+            "sample": (self._get_private_sample, self._get_public_sample),
+        }
 
-    def query_taxid(self, taxid):
-        url = "https://www.ebi.ac.uk/ena/taxonomy/rest/tax-id/{}".format(taxid)
-        response = requests.get(url)
+        private_api, public_api = api_map.get(self.query_type, (None, None))
 
-        try:
-            # Will raise exception if response status code is non-200 
-            response.raise_for_status()
-        except requests.exceptions.HTTPError as e:
-            print("Request failed {} with error {}".format(url, e))
-            return False
-        
-        res = response.json()
-        
-        return res.get("scientificName", "")
-
-    def query_scientific_name(self, scientificName, searchRank=False):
-        url = "https://www.ebi.ac.uk/ena/taxonomy/rest/scientific-name/{}".format(scientificName)
-        response = requests.get(url)
-        
-        try:
-            # Will raise exception if response status code is non-200 
-            response.raise_for_status()
-        except requests.exceptions.HTTPError as e:
-            if searchRank:
-                return False, "", ""
-            else:
-                return False, ""
-        
-        try:
-            res = response.json()[0]
-        except IndexError:
-            if searchRank:
-                return False, "", ""
-            else:
-                return False, ""
+        if self.private:
+            try:
+                ena_response = private_api()
+            except Exception:
+                logging.info(f"Private API for {self.query_type} failed, trying public.")
+                ena_response = public_api()
+        else:
+            ena_response = public_api()
+
+        return ena_response
 
-        submittable = res.get("submittable", "").lower() == "true"
-        taxid = res.get("taxId", "")
-        rank = res.get("rank", "")
 
-        if searchRank:
-            return submittable, taxid, rank
+class EnaSubmit:
+    def __init__(self, sample_xml, submission_xml, live=False):
+        self.sample_xml = sample_xml
+        self.submission_xml = submission_xml
+        self.live = live
+        username, password = configure_credentials()
+        if username is None or password is None:
+            logging.error("ENA_WEBIN and ENA_WEBIN_PASSWORD are not set")
+        if username and password:
+            self.auth = (username, password)
         else:
-            return submittable, taxid
+            self.auth = None
 
-    def handle_genomes_registration(self, sample_xml, submission_xml, webin, password, live=False):
-        liveSub, mode = "", "live"
+    def handle_genomes_registration(self):
+        live_sub, mode = "", "live"
 
-        if not live:
-            liveSub = "dev"
+        if not self.live:
+            live_sub = "dev"
             mode = "test"
 
-        url = "https://www{}.ebi.ac.uk/ena/submit/drop-box/submit/".format(liveSub)
+        url = "https://www{}.ebi.ac.uk/ena/submit/drop-box/submit/".format(live_sub)
 
-        logger.info('Registering sample xml in {} mode.'.format(mode))
+        logger.info("Registering sample xml in {} mode.".format(mode))
 
-        f = {
-            'SUBMISSION': open(submission_xml, 'r'),
-            'SAMPLE': open(sample_xml, 'r')
-        }
+        f = {"SUBMISSION": open(self.submission_xml, "r"), "SAMPLE": open(self.sample_xml, "r")}
 
-        submissionResponse = requests.post(url, files = f, auth = (webin, password))
+        submission_response = requests.post(url, files=f, auth=self.auth)
 
-        if submissionResponse.status_code != 200:
-            if str(submissionResponse.status_code).startswith('5'):
-                raise Exception("Genomes could not be submitted to ENA as the server " +
-                    "does not respond. Please again try later.")
+        if submission_response.status_code != 200:
+            if str(submission_response.status_code).startswith("5"):
+                raise Exception("Genomes could not be submitted to ENA as the server " + "does not respond. Please again try later.")
             else:
-                raise Exception("Genomes could not be submitted to ENA. HTTP response: " +
-                    submissionResponse.reason)
+                raise Exception("Genomes could not be submitted to ENA. HTTP response: " + submission_response.reason)
 
-        receiptXml = minidom.parseString((submissionResponse.content).decode("utf-8"))
-        receipt = receiptXml.getElementsByTagName("RECEIPT")
+        receipt_xml = minidom.parseString((submission_response.content).decode("utf-8"))
+        receipt = receipt_xml.getElementsByTagName("RECEIPT")
         success = receipt[0].attributes["success"].value
         if success == "true":
-            aliasDict = {}
-            samples = receiptXml.getElementsByTagName("SAMPLE")
+            alias_dict = {}
+            samples = receipt_xml.getElementsByTagName("SAMPLE")
             for s in samples:
-                sraAcc = s.attributes["accession"].value
+                sra_acc = s.attributes["accession"].value
                 alias = s.attributes["alias"].value
-                aliasDict[alias] = sraAcc
+                alias_dict[alias] = sra_acc
         elif success == "false":
-            errors = receiptXml.getElementsByTagName("ERROR")
-            finalError = "\tSome genomes could not be submitted to ENA. Please, check the errors below."
+            errors = receipt_xml.getElementsByTagName("ERROR")
+            final_error = "\tSome genomes could not be submitted to ENA. Please, check the errors below."
             for error in errors:
-                finalError += "\n\t" + error.firstChild.data
-            finalError += "\n\tIf you wish to validate again your data and metadata, "
-            finalError += "please use the --force option."
-            raise Exception(finalError)
-        
-        logger.info('{} genome samples successfully registered.'.format(str(len(aliasDict))))
-
-        return aliasDict
+                final_error += "\n\t" + error.firstChild.data
+            final_error += "\n\tIf you wish to validate again your data and metadata, "
+            final_error += "please use the --force option."
+            raise Exception(final_error)
+
+        logger.info("{} genome samples successfully registered.".format(str(len(alias_dict))))
+
+        return alias_dict
diff --git a/genomeuploader/genome_upload.py b/genomeuploader/genome_upload.py
index 99cf3fb..4436195 100755
--- a/genomeuploader/genome_upload.py
+++ b/genomeuploader/genome_upload.py
@@ -14,34 +14,72 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import os
-import sys
-import logging
 import argparse
-import re
 import json
-import pandas as pd
-from datetime import date, datetime as dt
-from dotenv import load_dotenv
-from pathlib import Path
-
-import xml.etree.ElementTree as ET
+import logging
+import os
+import re
+import sys
 import xml.dom.minidom as minidom
-import requests
-
-from ena import ENA
+import xml.etree.ElementTree as ET
+from datetime import date
+from datetime import datetime as dt
 
-from constants import METAGENOMES, GEOGRAPHIC_LOCATIONS, MQ, HQ
+import ena
+import pandas as pd
+from constants import (
+    BIN_MANDATORY_FIELDS,
+    GEOGRAPHIC_LOCATIONS,
+    GEOGRAPHY_DIGIT_COORDS,
+    HQ,
+    MAG_MANDATORY_FIELDS,
+    METAGENOMES,
+    MQ,
+)
+from ena import EnaQuery, EnaSubmit
 
 logging.basicConfig(level=logging.DEBUG)
-
 logger = logging.getLogger(__name__)
 
-ena = ENA()
 
-GEOGRAPHY_DIGIT_COORDS = 8
+def parse_args(argv):
+    parser = argparse.ArgumentParser(
+        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
+        description="Create xmls and manifest files for genome upload and upload to ENA",
+    )
+
+    parser.add_argument("-u", "--upload_study", type=str, required=True, help="Study accession for genomes upload")
+    parser.add_argument("--genome_info", type=str, required=True, help="Genomes metadata file")
+
+    genomeType = parser.add_mutually_exclusive_group(required=True)
+    genomeType.add_argument("-m", "--mags", action="store_true", help="Select for MAG upload")
+    genomeType.add_argument("-b", "--bins", action="store_true", help="Select for bin upload")
+
+    parser.add_argument("--out", type=str, help="Output folder. Default: working directory")
+    parser.add_argument("--force", action="store_true", required=False, default=False, help="Forces reset of sample xml's backups")
+    parser.add_argument(
+        "--live",
+        action="store_true",
+        required=False,
+        default=False,
+        help="Uploads on ENA. Omitting this " + "option allows to validate samples beforehand",
+    )
+    parser.add_argument("--tpa", action="store_true", required=False, default=False, help="Select if uploading TPA-generated genomes")
+
+    # Users can provide their credentials in environment variables or using a config file
+    parser.add_argument("--centre_name", required=True, help="Name of the centre uploading genomes")
+    parser.add_argument(
+        "--private",
+        required=False,
+        action="store_true",
+        default=False,
+        help="if data is private",
+    )
+
+    return parser.parse_args(argv)
+
 
-'''
+"""
 Input table: expects the following parameters:
     genome_name: genome file name
     accessions: run(s) or assembly(ies) the genome was generated from
@@ -60,190 +98,95 @@
     co-assembly: True/False, whether the genome was generated from a co-assembly
     genome_coverage : genome coverage
     genome_path: path to genome to upload
-'''
-def read_and_cleanse_metadata_tsv(inputFile, genomeType, live):
-    logger.info('Retrieving info for genomes to submit...')
-    
-    binMandatoryFields = ["genome_name", "accessions",
-        "assembly_software", "binning_software", 
-        "binning_parameters", "stats_generation_software", "NCBI_lineage",
-        "broad_environment", "local_environment", "environmental_medium", "metagenome",
-        "co-assembly", "genome_coverage", "genome_path"]
-    MAGMandatoryFields = ["rRNA_presence", "completeness", "contamination"]
-    
-    allFields = MAGMandatoryFields + binMandatoryFields
-    metadata = pd.read_csv(inputFile, sep='\t', usecols=allFields)
-    
-    # make sure there are no empty cells
-    cleanColumns = list(metadata.dropna(axis=1))
-    if genomeType == "MAGs":
-        missingValues = [item for item in allFields if item not in cleanColumns]
-    else:
-        missingValues = [item for item in binMandatoryFields if item not in cleanColumns]
-    
-    if missingValues:
-        raise ValueError("The following mandatory fields have missing values in " +
-            "the input file: {}".format(", ".join(missingValues)))
-
-    # check amount of genomes to register at the same time
-    if len(metadata) >= 5000:
-        raise ValueError("Genomes need to be registered in batches of 5000 genomes or smaller.")
-
-    # check whether accessions follow the right format
-    accessions_regExp = re.compile(r"([E|S|D]R[R|Z]\d{6,})")
-
-    accessionComparison = pd.DataFrame(columns=["genome_name", "attemptive_accessions", 
-        "correct", "mismatching", "co-assembly"])
-    accessionComparison["genome_name"] = metadata["genome_name"]
-
-    accessionComparison["attemptive_accessions"] = metadata["accessions"].map(
-        lambda a: len(a.split(',')))
-
-    accessionComparison["correct"] = metadata["accessions"].map(
-        lambda a: len(accessions_regExp.findall(a)))
-
-    accessionComparison["mismatching"] = accessionComparison.apply(lambda row: 
-        True if row["attemptive_accessions"] == row["correct"] 
-        else None, axis=1).isna()
-
-    mismatchingAccessions = accessionComparison[accessionComparison["mismatching"]]["genome_name"]
-    if not mismatchingAccessions.empty:
-        raise ValueError("Run accessions are not correctly formatted for the following " + 
-            "genomes: " + ','.join(mismatchingAccessions.values))
-
-    # check whether completeness and contamination are floats
-    try:
-        pd.to_numeric(metadata["completeness"])
-        pd.to_numeric(metadata["contamination"])
-        pd.to_numeric(metadata["genome_coverage"])
-    except:
-        raise ValueError("Completeness, contamination or coverage values should be formatted as floats")
-
-    # check whether all co-assemblies have more than one run associated and viceversa
-    accessionComparison["co-assembly"] = metadata["co-assembly"]
-    coassemblyDiscrepancy = metadata[(
-        (accessionComparison["correct"] < 2) & (accessionComparison["co-assembly"])) |
-        ((accessionComparison["correct"] > 1) & (~accessionComparison["co-assembly"])
-        )]["genome_name"]
-    if not coassemblyDiscrepancy.empty:
-        raise ValueError("The following genomes show discrepancy between number of runs "
-            "involved and co-assembly status: " + ','.join(coassemblyDiscrepancy.values))
-
-    # are provided metagenomes part of the accepted metagenome list?
-    if False in metadata.apply(lambda row: 
-        True if row["metagenome"] in METAGENOMES 
-        else False, axis=1).unique():
-        raise ValueError("Metagenomes associated with each genome need to belong to ENA's " +
-            "approved metagenomes list.")
-
-    # do provided file paths exist?
-    if False in metadata.apply(lambda row: 
-        True if os.path.exists(row["genome_path"]) 
-        else False, axis =1).unique():
-        raise FileNotFoundError("Some genome paths do not exist.")
-
-    # check genome name lengths
-    #if not (metadata["genome_name"].map(lambda a: len(a) < 20).all()):
-    #    raise ValueError("Genome names must be shorter than 20 characters.")
-
-    # create dictionary while checking genome name uniqueness
-    uniqueness = metadata["genome_name"].nunique() == metadata["genome_name"].size
-    if uniqueness:
-        if not live:
-            timestamp = str(int(dt.timestamp(dt.now())))
-            timestamp_names = [row["genome_name"] + '_' + timestamp for index, row in metadata.iterrows()]
-            metadata["unique_genome_name"] = timestamp_names
-            genomeInfo = metadata.set_index("unique_genome_name").transpose().to_dict()
-        else:
-            genomeInfo = metadata.set_index("genome_name").transpose().to_dict()
-    else:
-        raise ValueError("Duplicate names found in genome names")
+"""
 
-    return genomeInfo
 
 def round_stats(stats):
-    newStat = round(float(stats), 2)
-    if newStat == 100.0:
-        newStat = 100
-    if newStat == 0:
-        newStat = 0.0
+    new_stat = round(float(stats), 2)
+    if new_stat == 100.0:
+        new_stat = 100
+    if new_stat == 0:
+        new_stat = 0.0
 
-    return newStat
+    return new_stat
 
-def compute_MAG_quality(completeness, contamination, RNApresence):
-    RNApresent = str(RNApresence).lower() in ["true", "yes", "y"]
+
+def compute_mag_quality(completeness, contamination, rna_presence):
+    rna_presence = str(rna_presence).lower() in ["true", "yes", "y"]
     quality = MQ
-    if float(completeness) >= 90 and float(contamination) <= 5 and RNApresent:
+    if float(completeness) >= 90 and float(contamination) <= 5 and rna_presence:
         quality = HQ
-    
+
     return quality, completeness, contamination
 
-def extract_tax_info(taxInfo):
+
+def extract_tax_info(tax_info):
     # if unclassified, block the execution
-    lineage, kingdomPositionInLineage, digitAnnotation = taxInfo.split(';'), 0, False
-    lineageFirst = lineage[0]
-    if "Unclassified " in lineageFirst:
-        if "Archaea" in lineageFirst:
-            scientificName = "uncultured archaeon"
-        elif "Bacteria" in lineageFirst:
-            scientificName = "uncultured bacterium"
-        elif "Eukaryota" in lineageFirst:
-            scientificName = "uncultured eukaryote"
-        submittable, taxid, rank = ena.query_scientific_name(scientificName, searchRank=True)
-        return taxid, scientificName
+    lineage, kingdom_position_lineage, digit_annotation = tax_info.split(";"), 0, False
+    lineage_first = lineage[0]
+    if "Unclassified " in lineage_first:
+        if "Archaea" in lineage_first:
+            scientific_name = "uncultured archaeon"
+        elif "Bacteria" in lineage_first:
+            scientific_name = "uncultured bacterium"
+        elif "Eukaryota" in lineage_first:
+            scientific_name = "uncultured eukaryote"
+        submittable, taxid, rank = ena.query_scientific_name(scientific_name, search_rank=True)
+        return taxid, scientific_name
 
     kingdoms = ["Archaea", "Bacteria", "Eukaryota"]
-    kingdomTaxa = ["2157", "2", "2759"]
+    kingdom_taxa = ["2157", "2", "2759"]
 
-    selectedKingdom, finalKingdom = kingdoms, ""
+    selected_kingdom, final_kingdom = kingdoms, ""
     if lineage[1].isdigit():
-        selectedKingdom = kingdomTaxa
-        kingdomPositionInLineage = 2
-        digitAnnotation = True
-    for index, k in enumerate(selectedKingdom):
-        if digitAnnotation:
-            if k == lineage[kingdomPositionInLineage]:
-                finalKingdom = selectedKingdom[index]
+        selected_kingdom = kingdom_taxa
+        kingdom_position_lineage = 2
+        digit_annotation = True
+    for index, k in enumerate(selected_kingdom):
+        if digit_annotation:
+            if k == lineage[kingdom_position_lineage]:
+                final_kingdom = selected_kingdom[index]
                 break
         else:
-            if k in lineage[kingdomPositionInLineage]:
-                finalKingdom = selectedKingdom[index]
+            if k in lineage[kingdom_position_lineage]:
+                final_kingdom = selected_kingdom[index]
                 break
 
-    iterator = len(lineage)-1
+    iterator = len(lineage) - 1
     submittable = False
     rank = ""
     while iterator != -1 and not submittable:
-        scientificName = lineage[iterator].strip()
-        if digitAnnotation:
-            if not '*' in scientificName:
-                scientificName = ena.query_taxid(scientificName)
+        scientific_name = lineage[iterator].strip()
+        if digit_annotation:
+            if "*" not in scientific_name:
+                scientific_name = ena.query_taxid(scientific_name)
             else:
                 iterator -= 1
                 continue
-        elif "__" in scientificName:
-            scientificName = scientificName.split("__")[1]
+        elif "__" in scientific_name:
+            scientific_name = scientific_name.split("__")[1]
         else:
-            raise ValueError("Unrecognised taxonomy format: " + scientificName)
-        submittable, taxid, rank = ena.query_scientific_name(scientificName, searchRank=True)
+            raise ValueError("Unrecognised taxonomy format: " + scientific_name)
+        submittable, taxid, rank = ena.query_scientific_name(scientific_name, search_rank=True)
 
         if not submittable:
-            if finalKingdom == "Archaea" or finalKingdom == "2157":
-                submittable, scientificName, taxid = extract_Archaea_info(scientificName, rank)
-            elif finalKingdom == "Bacteria" or finalKingdom == "2":
-                submittable, scientificName, taxid = extract_Bacteria_info(scientificName, rank)
-            elif finalKingdom == "Eukaryota" or finalKingdom == "2759":
-                submittable, scientificName, taxid = extract_Eukaryota_info(scientificName, rank)
+            if final_kingdom == "Archaea" or final_kingdom == "2157":
+                submittable, scientific_name, taxid = extract_archaea_info(scientific_name, rank)
+            elif final_kingdom == "Bacteria" or final_kingdom == "2":
+                submittable, scientific_name, taxid = extract_bacteria_info(scientific_name, rank)
+            elif final_kingdom == "Eukaryota" or final_kingdom == "2759":
+                submittable, scientific_name, taxid = extract_eukaryota_info(scientific_name, rank)
         iterator -= 1
 
-    return taxid, scientificName
+    return taxid, scientific_name
 
-def extract_Eukaryota_info(name, rank):
-    nonSubmittable = (False, "", 0)
+
+def extract_eukaryota_info(name, rank):
+    non_submittable = (False, "", 0)
 
     # Asterisks in given taxonomy suggest the classification might be not confident enough.
-    if '*' in name:
-        return nonSubmittable
+    if "*" in name:
+        return non_submittable
 
     if rank == "super kingdom":
         name = "uncultured eukaryote"
@@ -260,24 +203,25 @@ def extract_Eukaryota_info(name, rank):
             if submittable:
                 return submittable, name, taxid
             else:
-                name = name.replace(" sp.", '')
+                name = name.replace(" sp.", "")
                 submittable, taxid = ena.query_scientific_name(name)
                 if submittable:
                     return submittable, name, taxid
                 else:
-                    return nonSubmittable
+                    return non_submittable
+
 
-def extract_Bacteria_info(name, rank):
+def extract_bacteria_info(name, rank):
     if rank == "species":
         name = name
     elif rank == "superkingdom":
-        name = "uncultured bacterium".format(name)
+        name = "uncultured bacterium".format()
     elif rank in ["family", "order", "class", "phylum"]:
         name = "uncultured {} bacterium".format(name)
     elif rank == "genus":
         name = "uncultured {} sp.".format(name)
-    
-    submittable, taxid, rank = ena.query_scientific_name(name, searchRank=True)
+
+    submittable, taxid, rank = ena.query_scientific_name(name, search_rank=True)
     if not submittable:
         if rank in ["species", "genus"] and name.lower().endswith("bacteria"):
             name = "uncultured {}".format(name.lower().replace("bacteria", "bacterium"))
@@ -288,7 +232,8 @@ def extract_Bacteria_info(name, rank):
 
     return submittable, name, taxid
 
-def extract_Archaea_info(name, rank):
+
+def extract_archaea_info(name, rank):
     if rank == "species":
         name = name
     elif rank == "superkingdom":
@@ -304,326 +249,204 @@ def extract_Archaea_info(name, rank):
         name = "uncultured {} archaeon".format(name)
     elif rank == "genus":
         name = "uncultured {} sp.".format(name)
-    
-    submittable, taxid, rank = ena.query_scientific_name(name, searchRank=True)
+
+    submittable, taxid, rank = ena.query_scientific_name(name, search_rank=True)
     if not submittable:
         if "Candidatus" in name:
             if rank == "phylum":
-                name = name.replace("Candidatus ", '')
+                name = name.replace("Candidatus ", "")
             elif rank == "family":
-                name = name.replace("uncultured ", '')
+                name = name.replace("uncultured ", "")
             submittable, taxid = ena.query_scientific_name(name)
-                            
+
     return submittable, name, taxid
 
-def extract_genomes_info(inputFile, genomeType, live):
-    genomeInfo = read_and_cleanse_metadata_tsv(inputFile, genomeType, live)
-    for gen in genomeInfo:
-        genomeInfo[gen]["accessions"] = genomeInfo[gen]["accessions"].split(',')
-        accessionType = "run"
-        assembly_regExp = re.compile(r"([E|S|D]RZ\d{6,})")
-        if assembly_regExp.findall(genomeInfo[gen]["accessions"][0]):
-            accessionType = "assembly"
-        genomeInfo[gen]["accessionType"] = accessionType
-
-        genomeInfo[gen]["isolationSource"] = genomeInfo[gen]["metagenome"]
-        
-        try:
-            (genomeInfo[gen]["MAG_quality"], 
-            genomeInfo[gen]["completeness"], 
-            genomeInfo[gen]["contamination"]) = compute_MAG_quality(
-                                    str(round_stats(genomeInfo[gen]["completeness"])),
-                                    str(round_stats(genomeInfo[gen]["contamination"])), 
-                                    genomeInfo[gen]["rRNA_presence"])
-        except IndexError:
-            pass
-
-        if str(genomeInfo[gen]["co-assembly"]).lower() in ["yes", "y", "true"]:
-            genomeInfo[gen]["co-assembly"] = True
-        else:
-            genomeInfo[gen]["co-assembly"] = False
-        
-        genomeInfo[gen]["alias"] = gen
-
-        taxID, scientificName = extract_tax_info(genomeInfo[gen]["NCBI_lineage"])
-        genomeInfo[gen]["taxID"] = taxID
-        genomeInfo[gen]["scientific_name"] = scientificName
-
-    return genomeInfo
-
-def extract_ENA_info(genomeInfo, uploadDir, webin, password):
-    logger.info('Retrieving project and run info from ENA (this might take a while)...')
-    
-    # retrieving metadata from runs (and runs from assembly accessions if provided)
-    allRuns = []
-    for g in genomeInfo:
-        if genomeInfo[g]["accessionType"] == "assembly":
-            derivedRuns = []
-            for acc in genomeInfo[g]["accessions"]:
-                derivedRuns.append(ena.get_run_from_assembly(acc))
-            genomeInfo[g]["accessions"] = derivedRuns
-        allRuns.extend(genomeInfo[g]["accessions"])
-
-    runsSet, studySet, samplesDict, tempDict = set(allRuns), set(), {}, {}
-    for r in runsSet:
-        run_info = ena.get_run(r, webin, password)
-        studySet.add(run_info["secondary_study_accession"])
-        samplesDict[r] = run_info["sample_accession"]
-    
-    if not studySet:
-        raise ValueError("No study corresponding to runs found.")
-
-    backupFile = os.path.join(uploadDir, "ENA_backup.json")
-    counter = 0
-    if not os.path.exists(backupFile):
-        with open(backupFile, 'w') as file:
-            pass
-    with open(backupFile, "r+") as file:
-        try:
-            backupDict = json.load(file)
-            tempDict = dict(backupDict)
-            logger.info(f"A backup file {backupFile} for ENA sample metadata has been found.")
-        except json.decoder.JSONDecodeError:
-            backupDict = {}
-        for s in studySet:
-            studyInfo = ena.get_study(webin, password, s)
-            projectDescription = studyInfo["study_description"]
-
-            ENA_info = ena.get_study_runs(s, webin, password)
-            if ENA_info == []:
-                raise IOError("No runs found on ENA for project {}.".format(s))
-            
-            for run, item in enumerate(ENA_info):
-                runAccession = ENA_info[run]["run_accession"]
-                if runAccession not in backupDict:
-                    if runAccession in runsSet:
-                        sampleAccession = ENA_info[run]["sample_accession"]
-                        sampleInfo = ena.get_sample(sampleAccession, webin, password)
-
-                        location = sampleInfo["location"]
-                        latitude, longitude = None, None
-                        if 'N' in location:
-                            latitude = location.split('N')[0].strip()
-                            longitude = location.split('N')[1].strip()
-                        elif 'S' in location:
-                            latitude = '-' + location.split('S')[0].strip()
-                            longitude = location.split('S')[1].strip()
-
-                        if 'W' in longitude:
-                            longitude = '-' + longitude.split('W')[0].strip()
-                        elif longitude.endswith('E'):
-                            longitude = longitude.split('E')[0].strip()
-
-                        if latitude:
-                            latitude = "{:.{}f}".format(round(float(latitude), GEOGRAPHY_DIGIT_COORDS), GEOGRAPHY_DIGIT_COORDS)
-                        else:
-                            latitude = "not provided"
-
-                        if longitude:
-                            longitude = "{:.{}f}".format(round(float(longitude), GEOGRAPHY_DIGIT_COORDS), GEOGRAPHY_DIGIT_COORDS)
-                        else:
-                            longitude = "not provided"
- 
-                        country = sampleInfo["country"].split(':')[0]
-                        if not country in GEOGRAPHIC_LOCATIONS:
-                            country = "not provided"
-                        
-                        collectionDate = sampleInfo["collection_date"]
-                        if collectionDate == "" or collectionDate == "missing":
-                            collectionDate = "not provided"
-
-                        tempDict[runAccession] = {
-                            "instrumentModel" : ENA_info[run]["instrument_model"],
-                            "collectionDate" : collectionDate,
-                            "country" : country,
-                            "latitude" : latitude,
-                            "longitude" : longitude,
-                            "projectDescription" : projectDescription,
-                            "study" : s,
-                            "sampleAccession" : samplesDict[runAccession]
-                        }
-                        counter += 1
-
-                        if (counter%10 == 0) or (len(runsSet) - len(backupDict) == counter):
-                            file.seek(0)
-                            file.write(json.dumps(tempDict))
-                            file.truncate()
-    tempDict = {**tempDict, **backupDict}
-    combine_ENA_info(genomeInfo, tempDict)
-
-def multipleElementSet(metadataList):
-    return len(set(metadataList))>1
-
-def combine_ENA_info(genomeInfo, ENADict):
-    for g in genomeInfo:
+
+def multiple_element_set(metadata_list):
+    return len(set(metadata_list)) > 1
+
+
+def combine_ena_info(genome_info, ena_dict):
+    for g in genome_info:
         # TODO: optimise all the part below
-        if genomeInfo[g]["co-assembly"]:
-            instrumentList, collectionList, countryList = [], [], []
-            studyList, descriptionList, samplesList = [], [], []
-            longList, latitList = [], []
-            for run in genomeInfo[g]["accessions"]:
-                instrumentList.append(ENADict[run]["instrumentModel"])
-                collectionList.append(ENADict[run]["collectionDate"])
-                countryList.append(ENADict[run]["country"])
-                studyList.append(ENADict[run]["study"])
-                descriptionList.append(ENADict[run]["projectDescription"])
-                samplesList.append(ENADict[run]["sampleAccession"])
-                longList.append(ENADict[run]["longitude"])
-                latitList.append(ENADict[run]["latitude"])
-
-            genomeInfo[g]["study"] = studyList[0] 
-            genomeInfo[g]["description"] = descriptionList[0]
-            
-            instrument = instrumentList[0]
-            if multipleElementSet(instrumentList):
-                instrument = ','.join(instrumentList)
-            genomeInfo[g]["sequencingMethod"] = instrument
-
-            collectionDate = collectionList[0]
-            if multipleElementSet(collectionList):
-                collectionDate = "not provided"
-            genomeInfo[g]["collectionDate"] = collectionDate
-
-            country = countryList[0]
-            if multipleElementSet(countryList):
+        if genome_info[g]["co-assembly"]:
+            instrument_list, collection_list, country_list = [], [], []
+            study_list, description_list, samples_list = [], [], []
+            long_list, latit_list = [], []
+            for run in genome_info[g]["accessions"]:
+                instrument_list.append(ena_dict[run]["instrumentModel"])
+                collection_list.append(ena_dict[run]["collectionDate"])
+                country_list.append(ena_dict[run]["country"])
+                study_list.append(ena_dict[run]["study"])
+                description_list.append(ena_dict[run]["projectDescription"])
+                samples_list.append(ena_dict[run]["sampleAccession"])
+                long_list.append(ena_dict[run]["longitude"])
+                latit_list.append(ena_dict[run]["latitude"])
+
+            genome_info[g]["study"] = study_list[0]
+            genome_info[g]["description"] = description_list[0]
+
+            instrument = instrument_list[0]
+            if multiple_element_set(instrument_list):
+                instrument = ",".join(instrument_list)
+            genome_info[g]["sequencingMethod"] = instrument
+
+            collection_date = collection_list[0]
+            if multiple_element_set(collection_list):
+                collection_date = "not provided"
+            genome_info[g]["collectionDate"] = collection_date
+
+            country = country_list[0]
+            if multiple_element_set(country_list):
                 country = "not applicable"
-            genomeInfo[g]["country"] = country
+            genome_info[g]["country"] = country
 
-            latitude = latitList[0]
-            if multipleElementSet(latitList):
+            latitude = latit_list[0]
+            if multiple_element_set(latit_list):
                 latitude = "not provided"
-            genomeInfo[g]["latitude"] = str(round(float(latitude), GEOGRAPHY_DIGIT_COORDS))
+            genome_info[g]["latitude"] = str(round(float(latitude), GEOGRAPHY_DIGIT_COORDS))
 
-            longitude = longList[0]
-            if multipleElementSet(longList):
+            longitude = long_list[0]
+            if multiple_element_set(long_list):
                 longitude = "not provided"
-            genomeInfo[g]["longitude"] = str(round(float(longitude), GEOGRAPHY_DIGIT_COORDS))
+            genome_info[g]["longitude"] = str(round(float(longitude), GEOGRAPHY_DIGIT_COORDS))
 
-            samples = samplesList[0]
-            if multipleElementSet(samplesList):
-                samples = ','.join(samplesList)
-            genomeInfo[g]["sample_accessions"] = samples
+            samples = samples_list[0]
+            if multiple_element_set(samples_list):
+                samples = ",".join(samples_list)
+            genome_info[g]["sample_accessions"] = samples
         else:
-            run = genomeInfo[g]["accessions"][0]
-            genomeInfo[g]["sequencingMethod"] = ENADict[run]["instrumentModel"]
-            genomeInfo[g]["collectionDate"] = ENADict[run]["collectionDate"]
-            genomeInfo[g]["study"] = ENADict[run]["study"]
-            genomeInfo[g]["description"] = ENADict[run]["projectDescription"]
-            genomeInfo[g]["sample_accessions"] = ENADict[run]["sampleAccession"]
-            genomeInfo[g]["country"] = ENADict[run]["country"]
-            genomeInfo[g]["longitude"] = ENADict[run]["longitude"]
-            genomeInfo[g]["latitude"] = ENADict[run]["latitude"]
-        
-        genomeInfo[g]["accessions"] = ','.join(genomeInfo[g]["accessions"])
-
-
-
-def getAccessions(accessionsFile):
-    accessionDict = {}
-    with open(accessionsFile, 'r') as f:
+            run = genome_info[g]["accessions"][0]
+            genome_info[g]["sequencingMethod"] = ena_dict[run]["instrumentModel"]
+            genome_info[g]["collectionDate"] = ena_dict[run]["collectionDate"]
+            genome_info[g]["study"] = ena_dict[run]["study"]
+            genome_info[g]["description"] = ena_dict[run]["projectDescription"]
+            genome_info[g]["sample_accessions"] = ena_dict[run]["sampleAccession"]
+            genome_info[g]["country"] = ena_dict[run]["country"]
+            genome_info[g]["longitude"] = ena_dict[run]["longitude"]
+            genome_info[g]["latitude"] = ena_dict[run]["latitude"]
+
+        genome_info[g]["accessions"] = ",".join(genome_info[g]["accessions"])
+
+
+def get_accessions(accessions_file):
+    accession_dict = {}
+    with open(accessions_file, "r") as f:
         for line in f:
-            line = line.split('\t')
+            line = line.split("\t")
             alias = line[0]
-            accession = line[1].rstrip('\n')
-            accessionDict[alias] = accession
-
-    return accessionDict
-
-def saveAccessions(aliasAccessionDict, accessionsFile, writeMode):
-    with open(accessionsFile, writeMode) as f:
-        for elem in aliasAccessionDict:
-            f.write("{}\t{}\n".format(elem, aliasAccessionDict[elem]))
-
-def create_manifest_dictionary(run, alias, assemblySoftware, sequencingMethod, 
-    MAGpath, gen, study, coverage, isCoassembly):
-    manifestDict = {
-        "accessions" : run,
-        "alias" : alias,
-        "assembler" : assemblySoftware, 
-        "sequencingMethod" : sequencingMethod, 
-        "genome_path" : MAGpath,
-        "genome_name" : gen,
-        "study" : study,
-        "coverageDepth" : coverage, 
-        "co-assembly" : isCoassembly
+            accession = line[1].rstrip("\n")
+            accession_dict[alias] = accession
+
+    return accession_dict
+
+
+def save_accessions(alias_accession_dict, accessions_file, write_mode):
+    with open(accessions_file, write_mode) as f:
+        for elem in alias_accession_dict:
+            f.write("{}\t{}\n".format(elem, alias_accession_dict[elem]))
+
+
+def create_manifest_dictionary(run, alias, assembly_software, sequencing_method, mag_path, gen, study, coverage, is_coassembly):
+    manifest_dict = {
+        "accessions": run,
+        "alias": alias,
+        "assembler": assembly_software,
+        "sequencingMethod": sequencing_method,
+        "genome_path": mag_path,
+        "genome_name": gen,
+        "study": study,
+        "coverageDepth": coverage,
+        "co-assembly": is_coassembly,
     }
 
-    return manifestDict
+    return manifest_dict
+
 
 def compute_manifests(genomes):
-    manifestInfo = {}
+    manifest_info = {}
     for g in genomes:
-        manifestInfo[g] = create_manifest_dictionary(genomes[g]["accessions"],
-            genomes[g]["alias"], genomes[g]["assembly_software"], 
-            genomes[g]["sequencingMethod"], genomes[g]["genome_path"], g, 
-            genomes[g]["study"], genomes[g]["genome_coverage"], genomes[g]["co-assembly"])
-    
-    return manifestInfo
+        manifest_info[g] = create_manifest_dictionary(
+            genomes[g]["accessions"],
+            genomes[g]["alias"],
+            genomes[g]["assembly_software"],
+            genomes[g]["sequencingMethod"],
+            genomes[g]["genome_path"],
+            g,
+            genomes[g]["study"],
+            genomes[g]["genome_coverage"],
+            genomes[g]["co-assembly"],
+        )
+
+    return manifest_info
+
 
 def get_study_from_xml(sample):
     description = sample.childNodes[5].childNodes[0].data
-    study = description.split(' ')[-1][:-1]
+    study = description.split(" ")[-1][:-1]
 
     return study
 
-def recover_info_from_xml(genomeDict, sample_xml, live_mode):
+
+def recover_info_from_xml(genome_dict, sample_xml, live_mode):
     logger.info("Retrieving data for genome submission...")
 
     # extract list of genomes (samples) to be registered
     xml_structure = minidom.parse(sample_xml)
     samples = xml_structure.getElementsByTagName("SAMPLE")
-    
+
     for s in samples:
         study = get_study_from_xml(s)
 
         # extract alias from xml and find a match with genomes the user is uploading
-        XMLalias = s.attributes["alias"].value
-        if not live_mode:         # remove time stamp if test mode is selected
-            aliasSplit = XMLalias.split("_")
-            XMLalias = '_'.join(aliasSplit[:-1])
-        for gen in genomeDict:
+        xml_alias = s.attributes["alias"].value
+        if not live_mode:  # remove time stamp if test mode is selected
+            alias_split = xml_alias.split("_")
+            xml_alias = "_".join(alias_split[:-1])
+        for gen in genome_dict:
             # if match is found, associate attributes listed in the xml file
             # with genomes to upload
-            if XMLalias == gen:
+            if xml_alias == gen:
                 if not live_mode:
-                    currentTimestamp = str(int(dt.timestamp(dt.now())))
-                    XMLalias = gen + '_' + currentTimestamp
-                    s.attributes["alias"].value = XMLalias
-                    sampleTitle = s.getElementsByTagName("TITLE")[0]
-                    sampleTitleValue = sampleTitle.firstChild.nodeValue.split("_")
-                    sampleTitleValue[-1] = currentTimestamp
-                    newSampleTitle = '_'.join(sampleTitleValue)
-                    s.getElementsByTagName("TITLE")[0].firstChild.replaceWholeText(newSampleTitle)
+                    current_time_stamp = str(int(dt.timestamp(dt.now())))
+                    xml_alias = gen + "_" + current_time_stamp
+                    s.attributes["alias"].value = xml_alias
+                    sample_title = s.getElementsByTagName("TITLE")[0]
+                    sample_title_value = sample_title.firstChild.nodeValue.split("_")
+                    sample_title_value[-1] = current_time_stamp
+                    new_sample_title = "_".join(sample_title_value)
+                    s.getElementsByTagName("TITLE")[0].firstChild.replaceWholeText(new_sample_title)
                 attributes = s.childNodes[7].getElementsByTagName("SAMPLE_ATTRIBUTE")
-                seqMethod, assSoftware = "", ""
+                seq_method, ass_software = "", ""
                 for a in attributes:
-                    tagElem = a.getElementsByTagName("TAG")
-                    tag = tagElem[0].childNodes[0].nodeValue
+                    tag_elem = a.getElementsByTagName("TAG")
+                    tag = tag_elem[0].childNodes[0].nodeValue
                     if tag == "sequencing method":
-                        seqMethodElem = a.getElementsByTagName("VALUE")
-                        seqMethod = seqMethodElem[0].childNodes[0].nodeValue
+                        seq_method_elem = a.getElementsByTagName("VALUE")
+                        seq_method = seq_method_elem[0].childNodes[0].nodeValue
                     elif tag == "assembly software":
-                        assSoftwareElem = a.getElementsByTagName("VALUE")
-                        assSoftware = assSoftwareElem[0].childNodes[0].nodeValue
-                    if not seqMethod == "" and not assSoftware == "":
+                        ass_software_elem = a.getElementsByTagName("VALUE")
+                        ass_software = ass_software_elem[0].childNodes[0].nodeValue
+                    if not seq_method == "" and not ass_software == "":
                         break
 
-                genomeDict[gen]["accessions"] = ','.join(genomeDict[gen]["accessions"])
-                genomeDict[gen]["alias"] = XMLalias
-                genomeDict[gen]["assembly_software"] = assSoftware
-                genomeDict[gen]["sequencingMethod"] = seqMethod
-                genomeDict[gen]["study"] = study
+                genome_dict[gen]["accessions"] = ",".join(genome_dict[gen]["accessions"])
+                genome_dict[gen]["alias"] = xml_alias
+                genome_dict[gen]["assembly_software"] = ass_software
+                genome_dict[gen]["sequencingMethod"] = seq_method
+                genome_dict[gen]["study"] = study
                 break
 
     if not live_mode:
         for s in samples:
             xml_structure.firstChild.appendChild(s)
-        
-        with open(sample_xml, 'wb') as f:
+
+        with open(sample_xml, "wb") as f:
             dom_string = xml_structure.toprettyxml().encode("utf-8")
-            dom_string = b'\n'.join([s for s in dom_string.splitlines() if s.strip()])
+            dom_string = b"\n".join([s for s in dom_string.splitlines() if s.strip()])
             f.write(dom_string)
 
+
 def create_sample_attribute(sample_attributes, data_list, mag_data=None):
     tag = data_list[0]
     value = data_list[1]
@@ -632,15 +455,16 @@ def create_sample_attribute(sample_attributes, data_list, mag_data=None):
     units = None
     if len(data_list) == 3:
         units = data_list[2]
-    
+
     new_sample_attr = ET.SubElement(sample_attributes, "SAMPLE_ATTRIBUTE")
-    ET.SubElement(new_sample_attr, 'TAG').text = tag
-    ET.SubElement(new_sample_attr, 'VALUE').text = value
+    ET.SubElement(new_sample_attr, "TAG").text = tag
+    ET.SubElement(new_sample_attr, "VALUE").text = value
 
     if units:
-        ET.SubElement(new_sample_attr, 'UNITS').text = units
+        ET.SubElement(new_sample_attr, "UNITS").text = units
 
-def write_genomes_xml(genomes, xml_path, genomeType, centreName, tpa):
+
+def write_genomes_xml(genomes, xml_path, genome_type, centre_name, tpa):
     map_sample_attributes = [
         # tag - value - unit (optional)
         ["project name", "description"],
@@ -664,10 +488,10 @@ def write_genomes_xml(genomes, xml_path, genomeType, centreName, tpa):
         ["metagenomic source", "metagenome"],
     ]
 
-    checklist, assemblyType = "ERC000047", "Metagenome-assembled genome"
-    if genomeType == "bins":
+    checklist, assembly_type = "ERC000047", "Metagenome-assembled genome"
+    if genome_type == "bins":
         checklist = "ERC000050"
-        assemblyType = "binned metagenome"
+        assembly_type = "binned metagenome"
 
     constant_sample_attributes = [
         # tag - value
@@ -676,27 +500,25 @@ def write_genomes_xml(genomes, xml_path, genomeType, centreName, tpa):
         ["ENA-CHECKLIST", checklist],
     ]
 
-    tpaDescription = ""
+    tpa_description = ""
     if tpa:
-        tpaDescription = "Third Party Annotation (TPA) "
+        tpa_description = "Third Party Annotation (TPA) "
 
     sample_set = ET.Element("SAMPLE_SET")
 
     for g in genomes:
         plural = ""
         if genomes[g]["co-assembly"]:
-            plural = 's'
-        description = ("This sample represents a {}{} assembled from the "
-            "metagenomic run{} {} of study {}.".format(tpaDescription, 
-            assemblyType, plural, genomes[g]["accessions"], 
-            genomes[g]["study"]))
-        
+            plural = "s"
+        description = "This sample represents a {}{} assembled from the " "metagenomic run{} {} of study {}.".format(
+            tpa_description, assembly_type, plural, genomes[g]["accessions"], genomes[g]["study"]
+        )
+
         sample = ET.SubElement(sample_set, "SAMPLE")
         sample.set("alias", genomes[g]["alias"])
-        sample.set("center_name", centreName)
+        sample.set("center_name", centre_name)
 
-        ET.SubElement(sample, 'TITLE').text = ("{}: {}".format(assemblyType, 
-            genomes[g]["alias"]))
+        ET.SubElement(sample, "TITLE").text = "{}: {}".format(assembly_type, genomes[g]["alias"])
         sample_name = ET.SubElement(sample, "SAMPLE_NAME")
         ET.SubElement(sample_name, "TAXON_ID").text = genomes[g]["taxID"]
         ET.SubElement(sample_name, "SCIENTIFIC_NAME").text = genomes[g]["scientific_name"]
@@ -712,230 +534,453 @@ def write_genomes_xml(genomes, xml_path, genomeType, centreName, tpa):
         for constant in constant_sample_attributes:
             create_sample_attribute(sample_attributes, constant)
 
-    with open(xml_path, 'wb') as f:
-        dom = minidom.parseString(
-            ET.tostring(sample_set, encoding="utf-8")
-        )
+    with open(xml_path, "wb") as f:
+        dom = minidom.parseString(ET.tostring(sample_set, encoding="utf-8"))
         f.write(dom.toprettyxml().encode("utf-8"))
 
+
 def write_submission_xml(upload_dir, centre_name, study=True):
     today = str(date.today())
-    sub_xml = os.path.join(upload_dir, 'submission.xml')
+    sub_xml = os.path.join(upload_dir, "submission.xml")
 
-    submission = ET.Element('SUBMISSION')
-    submission.set('center_name', centre_name)
+    submission = ET.Element("SUBMISSION")
+    submission.set("center_name", centre_name)
 
     # template
-    actions = ET.SubElement(submission, 'ACTIONS')
-    action_sub = ET.SubElement(actions, 'ACTION')
-    ET.SubElement(action_sub, 'ADD')
+    actions = ET.SubElement(submission, "ACTIONS")
+    action_sub = ET.SubElement(actions, "ACTION")
+    ET.SubElement(action_sub, "ADD")
 
     # attributes: function and hold date
     if study:
-        action_hold = ET.SubElement(actions, 'ACTION')
-        hold = ET.SubElement(action_hold, 'HOLD')
-        hold.set('HoldUntilDate', today)
+        action_hold = ET.SubElement(actions, "ACTION")
+        hold = ET.SubElement(action_hold, "HOLD")
+        hold.set("HoldUntilDate", today)
 
-    with open(sub_xml, 'wb') as submission_file:
-        dom = minidom.parseString(
-            ET.tostring(submission, encoding="utf-8")
-        )
+    with open(sub_xml, "wb") as submission_file:
+        dom = minidom.parseString(ET.tostring(submission, encoding="utf-8"))
         submission_file.write(dom.toprettyxml().encode("utf-8"))
 
     return sub_xml
 
-def generate_genome_manifest(genomeInfo, study, manifestsRoot, aliasToSample, genomeType, tpa):
-    manifest_path = os.path.join(manifestsRoot, f'{genomeInfo["genome_name"]}.manifest')
-    
-    tpaAddition, multipleRuns = "", ""
+
+def generate_genome_manifest(genome_info, study, manifests_root, alias_to_sample, genome_type, tpa):
+    manifest_path = os.path.join(manifests_root, f'{genome_info["genome_name"]}.manifest')
+
+    tpa_addition, multiple_runs = "", ""
     if tpa:
-        tpaAddition = "Third Party Annotation (TPA) "
-    if genomeInfo["co-assembly"]:
-        multipleRuns = "s"
-    assemblyType = "Metagenome-Assembled Genome (MAG)"
-    if genomeType == "bins":
-        assemblyType = "binned metagenome"
+        tpa_addition = "Third Party Annotation (TPA) "
+    if genome_info["co-assembly"]:
+        multiple_runs = "s"
+    assembly_type = "Metagenome-Assembled Genome (MAG)"
+    if genome_type == "bins":
+        assembly_type = "binned metagenome"
 
     values = (
-        ('STUDY', study),
-        ('SAMPLE', aliasToSample[genomeInfo["alias"]]),
-        ('ASSEMBLYNAME', genomeInfo["alias"]),
-        ('ASSEMBLY_TYPE', assemblyType),
-        ('COVERAGE', genomeInfo["coverageDepth"]),
-        ('PROGRAM', genomeInfo["assembler"]),
-        ('PLATFORM', genomeInfo["sequencingMethod"]),
-        ('MOLECULETYPE', "genomic DNA"),
-        ('DESCRIPTION', ("This is a {}bin derived from the primary whole genome "
-            "shotgun (WGS) data set {}. This sample represents a {} from the "
-            "metagenomic run{} {}.".format(tpaAddition, genomeInfo["study"], 
-            assemblyType, multipleRuns, genomeInfo["accessions"]))),
-        ('RUN_REF', genomeInfo["accessions"]),
-        ('FASTA', os.path.abspath(genomeInfo["genome_path"]))
+        ("STUDY", study),
+        ("SAMPLE", alias_to_sample[genome_info["alias"]]),
+        ("ASSEMBLYNAME", genome_info["alias"]),
+        ("ASSEMBLY_TYPE", assembly_type),
+        ("COVERAGE", genome_info["coverageDepth"]),
+        ("PROGRAM", genome_info["assembler"]),
+        ("PLATFORM", genome_info["sequencingMethod"]),
+        ("MOLECULETYPE", "genomic DNA"),
+        (
+            "DESCRIPTION",
+            (
+                "This is a {}bin derived from the primary whole genome "
+                "shotgun (WGS) data set {}. This sample represents a {} from the "
+                "metagenomic run{} {}.".format(tpa_addition, genome_info["study"], assembly_type, multiple_runs, genome_info["accessions"])
+            ),
+        ),
+        ("RUN_REF", genome_info["accessions"]),
+        ("FASTA", os.path.abspath(genome_info["genome_path"])),
     )
-    logger.info("Writing manifest file (.manifest) for {}.".format(genomeInfo["alias"]))
+    logger.info("Writing manifest file (.manifest) for {}.".format(genome_info["alias"]))
     with open(manifest_path, "w") as outfile:
-        for (k, v) in values:
-            manifest = f'{k}\t{v}\n'
+        for k, v in values:
+            manifest = f"{k}\t{v}\n"
             outfile.write(manifest)
         if tpa:
             outfile.write("TPA\ttrue\n")
 
-def main():
-    ENA_uploader = GenomeUpload()
-    
-    if not ENA_uploader.live:
-        logger.warning("Warning: genome submission is not in live mode, " +
-            "files will be validated, but not uploaded.")
-
-    xmlGenomeFile, xmlSubFile = "genome_samples.xml", "submission.xml"
-    samples_xml = os.path.join(ENA_uploader.upload_dir, xmlGenomeFile)
-    submissionXmlPath = os.path.join(ENA_uploader.upload_dir, xmlSubFile)
-    submission_xml = submissionXmlPath
-    genomes, manifestInfo = {}, {}
-
-    # submission xml existence
-    if not os.path.exists(submissionXmlPath):
-        submission_xml = write_submission_xml(ENA_uploader.upload_dir, ENA_uploader.centre_name, False)
-
-    # sample xml generation or recovery
-    genomes = ENA_uploader.create_genome_dictionary(samples_xml)
-        
-    # manifests creation
-    manifestDir = os.path.join(ENA_uploader.upload_dir, "manifests")
-    os.makedirs(manifestDir, exist_ok=True)
-    
-    accessionsgen = "registered_MAGs.tsv"
-    if ENA_uploader.genomeType == "bins":
-        accessionsgen = accessionsgen.replace("MAG", "bin")
-    if not ENA_uploader.live:
-        accessionsgen = accessionsgen.replace(".tsv", "_test.tsv")
-    
-    accessionsFile = os.path.join(ENA_uploader.upload_dir, accessionsgen)
-    save = False
-    writeMode = 'a'
-    if os.path.exists(accessionsFile):
-        if not ENA_uploader.live:
-            save = True
-            if ENA_uploader.force:
-                writeMode = 'w'
-        if not save:
-            logger.info("Genome samples already registered, reading ERS accessions...")
-            aliasToNewSampleAccession = getAccessions(accessionsFile)
-    else:
-        save = True
-        
-    if save:
-        logger.info("Registering genome samples XMLs...")
-        aliasToNewSampleAccession = ena.handle_genomes_registration(samples_xml, 
-            submission_xml, ENA_uploader.username, ENA_uploader.password, ENA_uploader.live)
-        saveAccessions(aliasToNewSampleAccession, accessionsFile, writeMode)
-
-    logger.info("Generating manifest files...")
-    
-    manifestInfo = compute_manifests(genomes)
-
-    for m in manifestInfo:
-        generate_genome_manifest(manifestInfo[m], ENA_uploader.upStudy,  
-            manifestDir, aliasToNewSampleAccession, ENA_uploader.genomeType, ENA_uploader.tpa)
 
 class GenomeUpload:
-    def __init__(self, argv=sys.argv[1:]):
-        self.args = self.parse_args(argv)
-        self.upStudy = self.args.upload_study
-        self.genomeMetadata = self.args.genome_info
-        self.genomeType = "bins" if self.args.bins else "MAGs"
-        self.live = True if self.args.live else False
-        
-        if self.args.webin and self.args.password:
-            self.username = self.args.webin
-            self.password = self.args.password
-        else:
-            # Config file
-            user_config = Path.home() / ".genome_uploader.config.env"
-            if user_config.exists():
-                logger.debug("Loading the env variables from ".format(user_config))
-                load_dotenv(str(user_config))
-            else:
-                cwd_config = Path.cwd() / ".genome_uploader.config.env"
-                if cwd_config.exists():
-                    logger.debug("Loading the variables from the current directory.")
-                    load_dotenv(str(cwd_config))
-                else:
-                    logger.debug("Trying to load env variables from the .env file")
-                    # from a local .env file
-                    load_dotenv()
-
-            self.username = os.getenv("ENA_WEBIN")
-            self.password = os.getenv("ENA_WEBIN_PASSWORD")
-        
-        if not self.username or not self.password:
-            logger.error("ENA Webin username or password are empty")
-            sys.exit(1)
-
-        self.tpa = True if self.args.tpa else False
-        self.centre_name = self.args.centre_name
-        self.force = True if self.args.force else False
-
-        workDir = self.args.out if self.args.out else os.getcwd()
-        self.upload_dir = self.generate_genomes_upload_dir(workDir, self.genomeType)
-    
-    def parse_args(self, argv):
-        parser = argparse.ArgumentParser(formatter_class = argparse.ArgumentDefaultsHelpFormatter,
-            description="Create xmls and manifest files for genome upload to ENA")
-        
-        parser.add_argument('-u', '--upload_study', type=str, help="Study accession for genomes upload")
-        parser.add_argument('--genome_info', type=str, required=True, help="Genomes metadata file")
-
-        genomeType = parser.add_mutually_exclusive_group(required=True)
-        genomeType.add_argument('-m', '--mags', action='store_true', help="Select for MAG upload")
-        genomeType.add_argument('-b', '--bins', action='store_true', help="Select for bin upload")
-        
-        parser.add_argument('--out', type=str, help="Output folder. Default: working directory")
-        parser.add_argument('--force', action='store_true', help="Forces reset of sample xml's backups")
-        parser.add_argument('--live', action='store_true', help="Uploads on ENA. Omitting this " +
-            "option allows to validate samples beforehand")
-        parser.add_argument('--tpa', action='store_true', help="Select if uploading TPA-generated genomes")
-        
-        # Users can provide their credentials and centre name manually or using a config file
-        parser.add_argument('--webin', required=False, help="Webin id")
-        parser.add_argument('--password', required=False, help="Webin password")
-        parser.add_argument('--centre_name', required=False, help="Name of the centre uploading genomes")
-
-        args = parser.parse_args(argv)
-
-        if not args.upload_study:
+    def __init__(
+        self,
+        upload_study: str,
+        centre_name: str,
+        genome_info: str,
+        bins: bool = False,
+        live: bool = False,
+        private: bool = False,
+        tpa: bool = False,
+        force: bool = False,
+        out: str = None,
+    ):
+        """
+        Submission of genomes.
+
+        :param upload_study: Study accession for genomes upload.
+        :param centre_name: Name of the centre uploading genomes.
+        :param genome_info: Genomes metadata file.
+        :param bins: Performs bin upload.
+        :params live: Live upload to ENA.
+        :param private: Is this a private study?
+        :param tpa: Is this a third-party assembly?
+        :param force: Resets sample XML backups.
+        :param out: Output folder.
+        """
+
+        self.genome_type = "bins" if bins else "MAGs"
+        self.live = live
+        self.private = private
+
+        self.tpa = tpa
+        self.centre_name = centre_name
+        self.force = force
+
+        self.work_dir = out if out is not None else os.getcwd()
+        self.upload_dir = self.generate_genomes_upload_dir()
+
+        if not upload_study:
             raise ValueError("No project selected for genome upload [-u, --upload_study].")
-        
-        if not os.path.exists(args.genome_info):
-            raise FileNotFoundError('Genome metadata file "{}" does not exist'.format(args.genome_info))
+        self.upload_study = upload_study
+
+        if not os.path.exists(genome_info):
+            raise FileNotFoundError(f"Genome metadata file {genome_info} does not exist")
+        self.genome_metadata = genome_info
+
+    def validate_metadata_tsv(self):
+        logger.info("Retrieving info for genomes to submit...")
+
+        all_fields = MAG_MANDATORY_FIELDS + BIN_MANDATORY_FIELDS
+        metadata = pd.read_csv(self.genome_metadata, sep="\t", usecols=all_fields)
 
-        return args
+        # make sure there are no empty cells
+        clean_columns = list(metadata.dropna(axis=1))
+        if self.genome_type == "MAGs":
+            missing_values = [item for item in all_fields if item not in clean_columns]
+        else:
+            missing_values = [item for item in BIN_MANDATORY_FIELDS if item not in clean_columns]
+
+        if missing_values:
+            raise ValueError(
+                "The following mandatory fields have missing values in " + "the input file: {}".format(", ".join(missing_values))
+            )
+
+        # check amount of genomes to register at the same time
+        if len(metadata) >= 5000:
+            raise ValueError("Genomes need to be registered in batches of 5000 genomes or smaller.")
+
+        # check whether accessions follow the right format
+        accessions_reg_exp = re.compile(r"([E|S|D]R[R|Z]\d{6,})")
+
+        accession_comparison = pd.DataFrame(columns=["genome_name", "attemptive_accessions", "correct", "mismatching", "co-assembly"])
+        accession_comparison["genome_name"] = metadata["genome_name"]
+
+        accession_comparison["attemptive_accessions"] = metadata["accessions"].map(lambda a: len(a.split(",")))
 
-    def generate_genomes_upload_dir(self, dir, genomeType):
-        uploadName = "MAG_upload"
-        if genomeType == "bins":
-            uploadName = uploadName.replace("MAG", "bin")
-        upload_dir = os.path.join(dir, uploadName)
+        accession_comparison["correct"] = metadata["accessions"].map(lambda a: len(accessions_reg_exp.findall(a)))
+
+        accession_comparison["mismatching"] = accession_comparison.apply(
+            lambda row: True if row["attemptive_accessions"] == row["correct"] else None, axis=1
+        ).isna()
+
+        mismatching_accessions = accession_comparison[accession_comparison["mismatching"]]["genome_name"]
+        if not mismatching_accessions.empty:
+            raise ValueError(
+                "Run accessions are not correctly formatted for the following " + "genomes: " + ",".join(mismatching_accessions.values)
+            )
+
+        # check whether completeness and contamination are floats
+        try:
+            pd.to_numeric(metadata["completeness"])
+            pd.to_numeric(metadata["contamination"])
+            pd.to_numeric(metadata["genome_coverage"])
+        except ValueError:
+            raise ValueError("Completeness, contamination or coverage values should be formatted as floats")
+
+        # check whether all co-assemblies have more than one run associated and viceversa
+        accession_comparison["co-assembly"] = metadata["co-assembly"]
+        coassembly_discrepancy = metadata[
+            ((accession_comparison["correct"] < 2) & (accession_comparison["co-assembly"]))
+            | ((accession_comparison["correct"] > 1) & (~accession_comparison["co-assembly"]))
+        ]["genome_name"]
+        if not coassembly_discrepancy.empty:
+            raise ValueError(
+                "The following genomes show discrepancy between number of runs "
+                "involved and co-assembly status: " + ",".join(coassembly_discrepancy.values)
+            )
+
+        # are provided metagenomes part of the accepted metagenome list?
+        if False in metadata.apply(lambda row: True if row["metagenome"] in METAGENOMES else False, axis=1).unique():
+            raise ValueError("Metagenomes associated with each genome need to belong to ENA's " + "approved metagenomes list.")
+
+        # do provided file paths exist?
+        if False in metadata.apply(lambda row: True if os.path.exists(row["genome_path"]) else False, axis=1).unique():
+            raise FileNotFoundError("Some genome paths do not exist.")
+
+        # check genome name lengths
+        # if not (metadata["genome_name"].map(lambda a: len(a) < 20).all()):
+        #    raise ValueError("Genome names must be shorter than 20 characters.")
+
+        # create dictionary while checking genome name uniqueness
+        uniqueness = metadata["genome_name"].nunique() == metadata["genome_name"].size
+        if uniqueness:
+            if not self.live:
+                timestamp = str(int(dt.timestamp(dt.now())))
+                timestamp_names = [row["genome_name"] + "_" + timestamp for index, row in metadata.iterrows()]
+                metadata["unique_genome_name"] = timestamp_names
+                genome_info = metadata.set_index("unique_genome_name").transpose().to_dict()
+            else:
+                genome_info = metadata.set_index("genome_name").transpose().to_dict()
+        else:
+            raise ValueError("Duplicate names found in genome names")
+
+        return genome_info
+
+    def extract_genomes_info(self):
+        genome_info = self.validate_metadata_tsv()
+        for gen in genome_info:
+            genome_info[gen]["accessions"] = genome_info[gen]["accessions"].split(",")
+            accession_type = "run"
+            assembly_reg_exp = re.compile(r"([E|S|D]RZ\d{6,})")
+            if assembly_reg_exp.findall(genome_info[gen]["accessions"][0]):
+                accession_type = "assembly"
+            genome_info[gen]["accessionType"] = accession_type
+
+            genome_info[gen]["isolationSource"] = genome_info[gen]["metagenome"]
+
+            try:
+                (
+                    genome_info[gen]["MAG_quality"],
+                    genome_info[gen]["completeness"],
+                    genome_info[gen]["contamination"],
+                ) = compute_mag_quality(
+                    str(round_stats(genome_info[gen]["completeness"])),
+                    str(round_stats(genome_info[gen]["contamination"])),
+                    genome_info[gen]["rRNA_presence"],
+                )
+            except IndexError:
+                pass
+
+            if str(genome_info[gen]["co-assembly"]).lower() in ["yes", "y", "true"]:
+                genome_info[gen]["co-assembly"] = True
+            else:
+                genome_info[gen]["co-assembly"] = False
+
+            genome_info[gen]["alias"] = gen
+
+            tax_id, scientific_name = extract_tax_info(genome_info[gen]["NCBI_lineage"])
+            genome_info[gen]["taxID"] = tax_id
+            genome_info[gen]["scientific_name"] = scientific_name
+
+        return genome_info
+
+    def extract_ena_info(self, genome_info):
+        logger.info("Retrieving project and run info from ENA (this might take a while)...")
+
+        # retrieving metadata from runs (and runs from assembly accessions if provided)
+        all_runs = []
+        for g in genome_info:
+            if genome_info[g]["accessionType"] == "assembly":
+                derived_runs = []
+                for acc in genome_info[g]["accessions"]:
+                    ena_query = EnaQuery(acc, "run_assembly", self.private)
+                    derived_runs.append(ena_query.build_query())
+                genome_info[g]["accessions"] = derived_runs
+            all_runs.extend(genome_info[g]["accessions"])
+
+        runs_set, study_set, samples_dict, temp_dict = set(all_runs), set(), {}, {}
+        for r in runs_set:
+            ena_query = EnaQuery(r, "run", self.private)
+            run_info = ena_query.build_query()
+            study_set.add(run_info["secondary_study_accession"])
+            samples_dict[r] = run_info["sample_accession"]
+
+        if not study_set:
+            raise ValueError("No study corresponding to runs found.")
+
+        backup_file = os.path.join(self.upload_dir, "ENA_backup.json")
+        counter = 0
+        if not os.path.exists(backup_file):
+            with open(backup_file, "w") as file:
+                pass
+        with open(backup_file, "r+") as file:
+            try:
+                backup_dict = json.load(file)
+                temp_dict = dict(backup_dict)
+                logger.info(f"A backup file {backup_file} for ENA sample metadata has been found.")
+            except json.decoder.JSONDecodeError:
+                backup_dict = {}
+            for s in study_set:
+                ena_query = EnaQuery(s, "study", self.private)
+                study_info = ena_query.build_query()
+                project_description = study_info["study_description"]
+
+                ena_query = EnaQuery(s, "study_runs", self.private)
+                ena_info = ena_query.build_query()
+                if ena_info == []:
+                    raise IOError("No runs found on ENA for project {}.".format(s))
+
+                for run, item in enumerate(ena_info):
+                    run_accession = ena_info[run]["run_accession"]
+                    if run_accession not in backup_dict:
+                        if run_accession in runs_set:
+                            sample_accession = ena_info[run]["sample_accession"]
+                            ena_query = EnaQuery(sample_accession, "sample", self.private)
+                            sample_info = ena_query.build_query()
+
+                            if "latitude" in sample_info and "longitude" in sample_info:
+                                latitude = sample_info["latitude"]
+                                longitude = sample_info["longitude"]
+                            else:
+                                location = sample_info["location"]
+                                latitude, longitude = None, None
+                                if "N" in location:
+                                    latitude = location.split("N")[0].strip()
+                                    longitude = location.split("N")[1].strip()
+                                elif "S" in location:
+                                    latitude = "-" + location.split("S")[0].strip()
+                                    longitude = location.split("S")[1].strip()
+
+                                if "W" in longitude:
+                                    longitude = "-" + longitude.split("W")[0].strip()
+                                elif longitude.endswith("E"):
+                                    longitude = longitude.split("E")[0].strip()
+
+                            if latitude:
+                                latitude = "{:.{}f}".format(round(float(latitude), GEOGRAPHY_DIGIT_COORDS), GEOGRAPHY_DIGIT_COORDS)
+                            else:
+                                latitude = "not provided"
+
+                            if longitude:
+                                longitude = "{:.{}f}".format(round(float(longitude), GEOGRAPHY_DIGIT_COORDS), GEOGRAPHY_DIGIT_COORDS)
+                            else:
+                                longitude = "not provided"
+
+                            country = sample_info["country"].split(":")[0]
+                            if country not in GEOGRAPHIC_LOCATIONS:
+                                country = "not provided"
+
+                            collectionDate = sample_info["collection_date"]
+                            if collectionDate == "" or collectionDate == "missing":
+                                collectionDate = "not provided"
+
+                            temp_dict[run_accession] = {
+                                "instrumentModel": ena_info[run]["instrument_model"],
+                                "collectionDate": collectionDate,
+                                "country": country,
+                                "latitude": latitude,
+                                "longitude": longitude,
+                                "projectDescription": project_description,
+                                "study": s,
+                                "sampleAccession": samples_dict[run_accession],
+                            }
+                            counter += 1
+
+                            if (counter % 10 == 0) or (len(runs_set) - len(backup_dict) == counter):
+                                file.seek(0)
+                                file.write(json.dumps(temp_dict))
+                                file.truncate()
+        temp_dict = {**temp_dict, **backup_dict}
+        combine_ena_info(genome_info, temp_dict)
+
+    def generate_genomes_upload_dir(self):
+        upload_name = "MAG_upload"
+        if self.genome_type == "bins":
+            upload_name = upload_name.replace("MAG", "bin")
+        upload_dir = os.path.join(self.work_dir, upload_name)
         os.makedirs(upload_dir, exist_ok=True)
         return upload_dir
 
     def create_genome_dictionary(self, samples_xml):
-        logger.info('Retrieving data for MAG submission...')
+        logger.info("Retrieving data for MAG submission...")
 
-        genomeInfo = extract_genomes_info(self.genomeMetadata, self.genomeType, self.live)
+        genome_info = self.extract_genomes_info()
 
         if not os.path.exists(samples_xml) or self.force:
-            extract_ENA_info(genomeInfo, self.upload_dir, self.username, self.password)
+            self.extract_ena_info(genome_info)
             logger.info("Writing genome registration XML...")
 
-            write_genomes_xml(genomeInfo, samples_xml, self.genomeType, 
-                              self.centre_name, self.tpa)
+            write_genomes_xml(genome_info, samples_xml, self.genome_type, self.centre_name, self.tpa)
             logger.info("All files have been written to " + self.upload_dir)
         else:
-            recover_info_from_xml(genomeInfo, samples_xml, self.live)
+            recover_info_from_xml(genome_info, samples_xml, self.live)
+
+        return genome_info
+
+    def genome_upload(self):
+        if not self.live:
+            logger.warning("Warning: genome submission is not in live mode, " + "files will be validated, but not uploaded.")
+        samples_xml = os.path.join(self.upload_dir, "genome_samples.xml")
+        submission_xml = os.path.join(self.upload_dir, "submission.xml")
+        genomes, manifest_info = {}, {}
+
+        # submission xml existence
+        if not os.path.exists(submission_xml):
+            submission_xml = write_submission_xml(self.upload_dir, self.centre_name, False)
+
+        # sample xml generation or recovery
+        genomes = self.create_genome_dictionary(samples_xml)
+
+        # manifests creation
+        manifest_dir = os.path.join(self.upload_dir, "manifests")
+        os.makedirs(manifest_dir, exist_ok=True)
+
+        accessionsgen = "registered_MAGs.tsv"
+        if self.genome_type == "bins":
+            accessionsgen = accessionsgen.replace("MAG", "bin")
+        if not self.live:
+            accessionsgen = accessionsgen.replace(".tsv", "_test.tsv")
+
+        accessions_file = os.path.join(self.upload_dir, accessionsgen)
+        save = False
+        write_mode = "a"
+        if os.path.exists(accessions_file):
+            if not self.live:
+                save = True
+                if self.force:
+                    write_mode = "w"
+            if not save:
+                logger.info("Genome samples already registered, reading ERS accessions...")
+                alias_to_new_sample_accession = get_accessions(accessions_file)
+        else:
+            save = True
+
+        if save:
+            logger.info("Registering genome samples XMLs...")
+            ena_submit = EnaSubmit(samples_xml, submission_xml, self.live)
+            alias_to_new_sample_accession = ena_submit.handle_genomes_registration()
+            save_accessions(alias_to_new_sample_accession, accessions_file, write_mode)
+
+        logger.info("Generating manifest files...")
+
+        manifest_info = compute_manifests(genomes)
+
+        for m in manifest_info:
+            generate_genome_manifest(
+                manifest_info[m], self.upload_study, manifest_dir, alias_to_new_sample_accession, self.genome_type, self.tpa
+            )
+
+
+def main():
+    args = parse_args(sys.argv[1:])
+    ena_upload = GenomeUpload(
+        upload_study=args.upload_study,
+        centre_name=args.centre_name,
+        genome_info=args.genome_info,
+        bins=args.bins,
+        live=args.live,
+        private=args.private,
+        tpa=args.tpa,
+        force=args.force,
+        out=args.out,
+    )
+    ena_upload.genome_upload()
 
-        return genomeInfo
 
 if __name__ == "__main__":
     main()
-    logger.info('Completed')
+    logger.info("Completed")
diff --git a/pyproject.toml b/pyproject.toml
index 82527f3..968ffa2 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -52,6 +52,7 @@ test = [
     "pytest-md==0.2.0",
     "pytest-workflow==2.0.1",
     "pytest-cov==3.0.0",
+    "pytest-responses==0.5.1",
 ]
 
 [build-system]
diff --git a/pytest.ini b/pytest.ini
index 8e78522..654f24a 100644
--- a/pytest.ini
+++ b/pytest.ini
@@ -1,3 +1,4 @@
 [pytest]
 python_files = tests/*.py
 testpaths = tests
+required_plugins = pytest-workflow
diff --git a/requirements-test.yml b/requirements-test.yml
index 333f06c..914434e 100644
--- a/requirements-test.yml
+++ b/requirements-test.yml
@@ -1 +1 @@
--r requirements.yml
\ No newline at end of file
+-r requirements.yml
diff --git a/tests/conftest.py b/tests/conftest.py
new file mode 100644
index 0000000..032779d
--- /dev/null
+++ b/tests/conftest.py
@@ -0,0 +1,103 @@
+from pathlib import Path
+
+import pytest
+
+
+@pytest.fixture(scope="module")
+def test_file_dir():
+    return Path(__file__).resolve().parent / Path("fixtures/responses")
+
+
+@pytest.fixture(scope="module")
+def public_study_json(test_file_dir):
+    return test_file_dir / Path("public_study.json")
+
+
+@pytest.fixture(scope="module")
+def private_study_xml(test_file_dir):
+    return test_file_dir / Path("private_study.xml")
+
+
+@pytest.fixture(scope="module")
+def public_run_json(test_file_dir):
+    return test_file_dir / Path("public_run.json")
+
+
+@pytest.fixture(scope="module")
+def private_run_json(test_file_dir):
+    return test_file_dir / Path("private_run.json")
+
+
+@pytest.fixture(scope="module")
+def public_run_from_assembly_xml(test_file_dir):
+    return test_file_dir / Path("public_run_from_assembly.xml")
+
+
+@pytest.fixture(scope="module")
+def private_run_from_assembly_xml(test_file_dir):
+    return test_file_dir / Path("private_run_from_assembly.xml")
+
+
+@pytest.fixture(scope="module")
+def public_study_runs_json(test_file_dir):
+    return test_file_dir / Path("public_study_runs.json")
+
+
+@pytest.fixture(scope="module")
+def private_study_runs_json(test_file_dir):
+    return test_file_dir / Path("private_study_runs.json")
+
+
+@pytest.fixture(scope="module")
+def public_sample_json(test_file_dir):
+    return test_file_dir / Path("public_sample.json")
+
+
+@pytest.fixture(scope="module")
+def private_sample_xml(test_file_dir):
+    return test_file_dir / Path("private_sample.xml")
+
+
+@pytest.fixture
+def public_study_data():
+    return {
+        "study_accession": "PRJEB41657",
+        "study_description": "Metagenomic raw reads, assemblies, and bins derived from HoloFood salmon gut samples from trial A and B. The samples in this project contributed to the salmon MAG catalogue (project: PRJEB55376 [ERP140265]).",
+    }
+
+
+#   private XML has no PRJ study ID
+@pytest.fixture
+def private_study_data():
+    return {
+        "study_accession": "ERP125469",
+        "study_description": "Metagenomic raw reads, assemblies, and bins derived from HoloFood salmon gut samples from trial A and B. The samples in this project contributed to the salmon MAG catalogue (project: PRJEB55376 [ERP140265]).",
+    }
+
+
+@pytest.fixture
+def public_run_data():
+    return {"run_accession": "ERR4918394", "sample_accession": "SAMEA7687881", "secondary_study_accession": "ERP125469"}
+
+
+#   private json only has ERS sample IDs
+@pytest.fixture
+def private_run_data():
+    return {"run_accession": "ERR4918394", "sample_accession": "ERS5444411", "secondary_study_accession": "ERP125469"}
+
+
+@pytest.fixture
+def public_sample_data():
+    return {"sample_accession": "SAMEA7687881", "collection_date": "2019-06-11", "country": "Norway", "location": "66.079905 N 12.587848 E"}
+
+
+#  private xml can only query with RS accession and lat and long are split
+@pytest.fixture
+def private_sample_data():
+    return {
+        "sample_accession": "ERS5444411",
+        "collection_date": "2019-06-11",
+        "country": "Norway",
+        "latitude": "66.079905",
+        "longitude": "12.587848",
+    }
diff --git a/tests/fixtures/responses/private_run.json b/tests/fixtures/responses/private_run.json
new file mode 100644
index 0000000..09c0a03
--- /dev/null
+++ b/tests/fixtures/responses/private_run.json
@@ -0,0 +1,17 @@
+[
+  {
+    "report": {
+      "id": "ERR4918394",
+      "alias": "webin-reads-SA01_02C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:14:11",
+      "firstPublic": "2022-08-02T17:24:21",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783303",
+      "sampleId": "ERS5444411"
+    },
+    "links": []
+  }
+]
diff --git a/tests/fixtures/responses/private_run_from_assembly.xml b/tests/fixtures/responses/private_run_from_assembly.xml
new file mode 100644
index 0000000..a387933
--- /dev/null
+++ b/tests/fixtures/responses/private_run_from_assembly.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+  <ANALYSIS_SET>
+    <ANALYSIS alias="webin-genome-ERR4918394_62952e961c26f0859b4ebdd83d93e4e7" center_name="UNIVERSITY OF COPENHAGEN" accession="ERZ2626953">
+      <IDENTIFIERS>
+        <PRIMARY_ID>ERZ2626953</PRIMARY_ID>
+        <SUBMITTER_ID namespace="UNIVERSITY OF COPENHAGEN">webin-genome-ERR4918394_62952e961c26f0859b4ebdd83d93e4e7</SUBMITTER_ID>
+      </IDENTIFIERS>
+      <TITLE>Genome assembly: ERR4918394_62952e961c26f0859b4ebdd83d93e4e7</TITLE>
+      <STUDY_REF accession="ERP125469">
+        <IDENTIFIERS>
+          <PRIMARY_ID>ERP125469</PRIMARY_ID>
+          <SECONDARY_ID>PRJEB41657</SECONDARY_ID>
+        </IDENTIFIERS>
+      </STUDY_REF>
+      <SAMPLE_REF accession="ERS5444411">
+        <IDENTIFIERS>
+          <PRIMARY_ID>ERS5444411</PRIMARY_ID>
+        </IDENTIFIERS>
+      </SAMPLE_REF>
+      <RUN_REF accession="ERR4918394">
+        <IDENTIFIERS>
+          <PRIMARY_ID>ERR4918394</PRIMARY_ID>
+        </IDENTIFIERS>
+      </RUN_REF>
+      <ANALYSIS_TYPE>
+        <SEQUENCE_ASSEMBLY>
+          <NAME>ERR4918394_62952e961c26f0859b4ebdd83d93e4e7</NAME>
+          <TYPE>primary metagenome</TYPE>
+          <PARTIAL>false</PARTIAL>
+          <COVERAGE>17.0</COVERAGE>
+          <PROGRAM>metaSPAdes v3.14.1</PROGRAM>
+          <PLATFORM>DNBSEQ-G400</PLATFORM>
+          <MOL_TYPE>genomic DNA</MOL_TYPE>
+        </SEQUENCE_ASSEMBLY>
+      </ANALYSIS_TYPE>
+      <FILES>
+        <FILE checksum="62952e961c26f0859b4ebdd83d93e4e7" checksum_method="MD5" filename="ERZ262/ERZ2626953/ERR4918394.fasta.gz" filetype="fasta"/>
+      </FILES>
+    </ANALYSIS>
+  </ANALYSIS_SET>
diff --git a/tests/fixtures/responses/private_sample.xml b/tests/fixtures/responses/private_sample.xml
new file mode 100644
index 0000000..e0a50bd
--- /dev/null
+++ b/tests/fixtures/responses/private_sample.xml
@@ -0,0 +1,159 @@
+<?xml version="1.0" encoding="UTF-8"?>
+  <SAMPLE_SET>
+    <SAMPLE alias="SA01_02C1a_metaG" center_name="UNIVERSITY OF COPENHAGEN" accession="ERS5444411">
+      <IDENTIFIERS>
+        <PRIMARY_ID>ERS5444411</PRIMARY_ID>
+        <SUBMITTER_ID namespace="UNIVERSITY OF COPENHAGEN">SA01_02C1a_metaG</SUBMITTER_ID>
+      </IDENTIFIERS>
+      <TITLE>SA01.02C1a</TITLE>
+      <SAMPLE_NAME>
+        <TAXON_ID>1602388</TAXON_ID>
+        <SCIENTIFIC_NAME>fish gut metagenome</SCIENTIFIC_NAME>
+      </SAMPLE_NAME>
+      <DESCRIPTION>Salmon metagenomic DNA data from Distal gut content; animal SA01.02 from tank SA01 with treatment Tiger</DESCRIPTION>
+      <SAMPLE_ATTRIBUTES>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>adapters</TAG>
+          <VALUE>AAGTCGGAGGCCAAGCGGTCTTAGGAAGACAA;GAACGACATGGCTACGATCCGACTT</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>collection date</TAG>
+          <VALUE>2019-06-11</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>geographic location (country and/or sea)</TAG>
+          <VALUE>Norway</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>geographic location (latitude)</TAG>
+          <VALUE>66.079905</VALUE>
+          <UNITS>DD</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>geographic location (longitude)</TAG>
+          <VALUE>12.587848</VALUE>
+          <UNITS>DD</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>geographic location (region and locality)</TAG>
+          <VALUE>Dønna; Nordland county</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host body site</TAG>
+          <VALUE>Distal gut content</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host common name</TAG>
+          <VALUE>Atlantic salmon</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host diet</TAG>
+          <VALUE>Fermented algae meal (added in in oil coating)</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host diet treatment</TAG>
+          <VALUE>Tiger: SA Control</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host disease status</TAG>
+          <VALUE>Wounded:-</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host scientific name</TAG>
+          <VALUE>Salmo salar</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host subject id</TAG>
+          <VALUE>SA01.02</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host taxid</TAG>
+          <VALUE>8030</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host total mass</TAG>
+          <VALUE>290.4</VALUE>
+          <UNITS>g</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>nucleic acid extraction</TAG>
+          <VALUE>D-rex protocol</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>pcr primers</TAG>
+          <VALUE>N/A</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>project name</TAG>
+          <VALUE>HoloFood Salmon - Metagenomic DNA</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>reference host genome for decontamination</TAG>
+          <VALUE>GCA_000000000.0</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>sample storage buffer</TAG>
+          <VALUE>Shield</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>sample storage container</TAG>
+          <VALUE>2ml E-matrix</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>sample storage location</TAG>
+          <VALUE>UCPH</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>sample storage temperature</TAG>
+          <VALUE>-20</VALUE>
+          <UNITS>°C</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>sample volume or weight for DNA extraction</TAG>
+          <VALUE>0.2</VALUE>
+          <UNITS>mL</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>sequencing method</TAG>
+          <VALUE>MGISEQ-2000</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>trial timepoint</TAG>
+          <VALUE>0</VALUE>
+          <UNITS>days</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host storage container temperature</TAG>
+          <VALUE>12.76</VALUE>
+          <UNITS>°C</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host storage container pH</TAG>
+          <VALUE>7.05</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host gutted mass</TAG>
+          <VALUE>246.6</VALUE>
+          <UNITS>g</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host length</TAG>
+          <VALUE>29.5</VALUE>
+          <UNITS>cm</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host diet treatment concentration</TAG>
+          <VALUE>0</VALUE>
+          <UNITS>% mass</UNITS>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>host storage container</TAG>
+          <VALUE>LetSea Tank: Tiger</VALUE>
+        </SAMPLE_ATTRIBUTE>
+        <SAMPLE_ATTRIBUTE>
+          <TAG>ENA-CHECKLIST</TAG>
+          <VALUE>ERC000052</VALUE>
+        </SAMPLE_ATTRIBUTE>
+      </SAMPLE_ATTRIBUTES>
+    </SAMPLE>
+  </SAMPLE_SET>
diff --git a/tests/fixtures/responses/private_study.xml b/tests/fixtures/responses/private_study.xml
new file mode 100644
index 0000000..32ef2c1
--- /dev/null
+++ b/tests/fixtures/responses/private_study.xml
@@ -0,0 +1,16 @@
+<?xml version="1.0" encoding="UTF-8"?>
+  <STUDY_SET>
+    <STUDY accession="ERP125469" alias="ena-STUDY-UNIVERSITY OF COPENHAGEN-02-12-2020-08:09:52:762-111" center_name="UNIVERSITY OF COPENHAGEN">
+      <IDENTIFIERS>
+        <PRIMARY_ID>ERP125469</PRIMARY_ID>
+        <SUBMITTER_ID namespace="UNIVERSITY OF COPENHAGEN">ena-STUDY-UNIVERSITY OF COPENHAGEN-02-12-2020-08:09:52:762-111</SUBMITTER_ID>
+      </IDENTIFIERS>
+      <DESCRIPTOR>
+        <STUDY_TITLE>HoloFood Salmon Trial A+B Gut Metagenome</STUDY_TITLE>
+        <STUDY_ABSTRACT>Metagenomic raw reads, assemblies, and bins derived from HoloFood salmon gut samples from trial A and B. The samples in this project contributed to the salmon MAG catalogue (project: PRJEB55376 [ERP140265]).</STUDY_ABSTRACT>
+        <STUDY_DESCRIPTION>Metagenomic raw reads, assemblies, and bins derived from HoloFood salmon gut samples from trial A and B. The samples in this project contributed to the salmon MAG catalogue (project: PRJEB55376 [ERP140265]).</STUDY_DESCRIPTION>
+        <CENTER_PROJECT_NAME>HoloFood Salmon Trial A+B Gut Metagenome</CENTER_PROJECT_NAME>
+        <STUDY_TYPE existing_study_type="Other"/>
+      </DESCRIPTOR>
+    </STUDY>
+  </STUDY_SET>
\ No newline at end of file
diff --git a/tests/fixtures/responses/private_study_runs.json b/tests/fixtures/responses/private_study_runs.json
new file mode 100644
index 0000000..b3f7149
--- /dev/null
+++ b/tests/fixtures/responses/private_study_runs.json
@@ -0,0 +1,152 @@
+[
+  {
+    "report": {
+      "id": "ERR4918752",
+      "alias": "webin-reads-SB06_18C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:32:19",
+      "firstPublic": "2022-08-02T17:24:21",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783661",
+      "sampleId": "ERS5444665"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918751",
+      "alias": "webin-reads-SB06_13C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:31:46",
+      "firstPublic": "2022-08-02T17:24:21",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783660",
+      "sampleId": "ERS5444660"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918750",
+      "alias": "webin-reads-SB10_16C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:31:17",
+      "firstPublic": "2022-08-02T17:25:05",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783659",
+      "sampleId": "ERS5444722"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918749",
+      "alias": "webin-reads-SB10_30C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:30:42",
+      "firstPublic": "2022-08-02T17:24:20",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783658",
+      "sampleId": "ERS5444736"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918748",
+      "alias": "webin-reads-SB11_25C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:30:20",
+      "firstPublic": "2022-08-02T17:25:03",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783657",
+      "sampleId": "ERS5444756"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918747",
+      "alias": "webin-reads-SB11_13C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:30:12",
+      "firstPublic": "2022-08-02T17:25:04",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783656",
+      "sampleId": "ERS5444744"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918746",
+      "alias": "webin-reads-SB11_29C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:29:52",
+      "firstPublic": "2022-08-02T17:24:21",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783655",
+      "sampleId": "ERS5444760"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918745",
+      "alias": "webin-reads-SA09_02C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:29:29",
+      "firstPublic": "2022-08-02T17:24:20",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783654",
+      "sampleId": "ERS5444529"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918744",
+      "alias": "webin-reads-SB08_05C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:29:27",
+      "firstPublic": "2022-08-02T17:24:21",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783653",
+      "sampleId": "ERS5444707"
+    },
+    "links": []
+  },
+  {
+    "report": {
+      "id": "ERR4918743",
+      "alias": "webin-reads-SB11_28C1a_metaG",
+      "instrumentModel": "DNBSEQ-G400",
+      "firstCreated": "2020-12-04T12:29:25",
+      "firstPublic": "2022-08-02T17:25:05",
+      "releaseStatus": "PUBLIC",
+      "submissionAccountId": "Webin-51990",
+      "studyId": "ERP125469",
+      "experimentId": "ERX4783652",
+      "sampleId": "ERS5444759"
+    },
+    "links": []
+  }
+]
diff --git a/tests/fixtures/responses/public_run.json b/tests/fixtures/responses/public_run.json
new file mode 100644
index 0000000..cc790e8
--- /dev/null
+++ b/tests/fixtures/responses/public_run.json
@@ -0,0 +1,7 @@
+[
+  {
+    "run_accession": "ERR4918394",
+    "sample_accession": "SAMEA7687881",
+    "secondary_study_accession": "ERP125469"
+  }
+]
diff --git a/tests/fixtures/responses/public_run_from_assembly.xml b/tests/fixtures/responses/public_run_from_assembly.xml
new file mode 100644
index 0000000..7d5b620
--- /dev/null
+++ b/tests/fixtures/responses/public_run_from_assembly.xml
@@ -0,0 +1,25 @@
+<ANALYSIS_SET>
+  <ANALYSIS accession="ERZ2626953" alias="webin-genome-ERR4918394_62952e961c26f0859b4ebdd83d93e4e7" center_name="UNIVERSITY OF COPENHAGEN">
+    <IDENTIFIERS>
+      <PRIMARY_ID>ERZ2626953</PRIMARY_ID>
+      <SUBMITTER_ID namespace="UNIVERSITY OF COPENHAGEN">webin-genome-ERR4918394_62952e961c26f0859b4ebdd83d93e4e7</SUBMITTER_ID>
+    </IDENTIFIERS>
+    <TITLE>Genome assembly: ERR4918394_62952e961c26f0859b4ebdd83d93e4e7</TITLE>
+    <STUDY_REF accession="ERP125469">
+      <IDENTIFIERS>
+        <PRIMARY_ID>ERP125469</PRIMARY_ID>
+        <SECONDARY_ID>PRJEB41657</SECONDARY_ID>
+      </IDENTIFIERS>
+    </STUDY_REF>
+    <SAMPLE_REF accession="ERS5444411">
+      <IDENTIFIERS>
+        <PRIMARY_ID>ERS5444411</PRIMARY_ID>
+        <EXTERNAL_ID namespace="BioSample">SAMEA7687881</EXTERNAL_ID>
+      </IDENTIFIERS>
+    </SAMPLE_REF>
+    <RUN_REF accession="ERR4918394">
+      <IDENTIFIERS>
+        <PRIMARY_ID>ERR4918394</PRIMARY_ID>
+      </IDENTIFIERS>
+    </RUN_REF>
+    <ANALYSIS_TYPE>
diff --git a/tests/fixtures/responses/public_sample.json b/tests/fixtures/responses/public_sample.json
new file mode 100644
index 0000000..d59db96
--- /dev/null
+++ b/tests/fixtures/responses/public_sample.json
@@ -0,0 +1,8 @@
+[
+  {
+    "sample_accession": "SAMEA7687881",
+    "collection_date": "2019-06-11",
+    "country": "Norway",
+    "location": "66.079905 N 12.587848 E"
+  }
+]
diff --git a/tests/fixtures/responses/public_study.json b/tests/fixtures/responses/public_study.json
new file mode 100644
index 0000000..eb360c6
--- /dev/null
+++ b/tests/fixtures/responses/public_study.json
@@ -0,0 +1,6 @@
+[
+    {
+      "study_accession": "PRJEB41657",
+      "study_description": "Metagenomic raw reads, assemblies, and bins derived from HoloFood salmon gut samples from trial A and B. The samples in this project contributed to the salmon MAG catalogue (project: PRJEB55376 [ERP140265])."
+    }
+  ]
diff --git a/tests/fixtures/responses/public_study_runs.json b/tests/fixtures/responses/public_study_runs.json
new file mode 100644
index 0000000..ae8c790
--- /dev/null
+++ b/tests/fixtures/responses/public_study_runs.json
@@ -0,0 +1,52 @@
+[
+  {
+    "run_accession": "ERR4918394",
+    "sample_accession": "SAMEA7687881",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918397",
+    "sample_accession": "SAMEA7687882",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918398",
+    "sample_accession": "SAMEA7678309",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918404",
+    "sample_accession": "SAMEA7687910",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918405",
+    "sample_accession": "SAMEA7687902",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918406",
+    "sample_accession": "SAMEA7687895",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918408",
+    "sample_accession": "SAMEA7687899",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918413",
+    "sample_accession": "SAMEA7687903",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918414",
+    "sample_accession": "SAMEA7687912",
+    "instrument_model": "DNBSEQ-G400"
+  },
+  {
+    "run_accession": "ERR4918416",
+    "sample_accession": "SAMEA7687892",
+    "instrument_model": "DNBSEQ-G400"
+  }
+]
diff --git a/tests/test_dummy.py b/tests/test_dummy.py
deleted file mode 100644
index 9095527..0000000
--- a/tests/test_dummy.py
+++ /dev/null
@@ -1,5 +0,0 @@
-
-class TestDummy:
-
-    def test_dummy(self):
-        assert 1 == 1
\ No newline at end of file
diff --git a/tests/unit/test_ena.py b/tests/unit/test_ena.py
new file mode 100644
index 0000000..902cf13
--- /dev/null
+++ b/tests/unit/test_ena.py
@@ -0,0 +1,156 @@
+import json
+
+import pytest
+import responses
+from requests.exceptions import ConnectionError, HTTPError
+
+from genomeuploader.ena import EnaQuery
+
+ENA_WEBIN = "ENA_WEBIN"
+ENA_WEBIN_PASSWORD = "ENA_WEBIN_PASSWORD"
+
+
+def read_json(path):
+    with open(path, "r") as jpath:
+        return json.load(jpath)
+
+
+def read_xml(path):
+    with open(path, "r") as xpath:
+        return xpath.read()
+
+
+def test_ena_study(public_study_data, private_study_data, public_study_json, private_study_xml):
+    responses.add(responses.POST, "https://www.ebi.ac.uk/ena/portal/api/search", json=read_json(public_study_json))
+
+    responses.add(
+        responses.GET,
+        "https://www.ebi.ac.uk/ena/submit/report/studies/xml/ERP125469",
+        body=read_xml(private_study_xml),
+        content_type="application/xml",
+    )
+
+    ena_study_public = EnaQuery(accession="ERP125469", query_type="study", private=False)
+
+    ena_study_private = EnaQuery(
+        accession="ERP125469",
+        query_type="study",
+        private=True,
+    )
+    assert ena_study_public.build_query() == public_study_data
+    assert ena_study_private.build_query() == private_study_data
+
+
+def test_ena_run(public_run_data, private_run_data, public_run_json, private_run_json):
+    responses.add(responses.POST, "https://www.ebi.ac.uk/ena/portal/api/search", json=read_json(public_run_json))
+
+    responses.add(responses.GET, "https://www.ebi.ac.uk/ena/submit/report/runs/ERR4918394", json=read_json(private_run_json))
+
+    ena_run_public = EnaQuery(accession="ERR4918394", query_type="run", private=False)
+
+    ena_run_private = EnaQuery(accession="ERR4918394", query_type="run", private=True)
+    assert ena_run_public.build_query() == public_run_data
+    assert ena_run_private.build_query() == private_run_data
+
+
+def test_ena_run_from_assembly(public_run_from_assembly_xml, private_run_from_assembly_xml):
+    responses.add(
+        responses.GET,
+        "https://www.ebi.ac.uk/ena/browser/api/xml/ERZ2626953",
+        body=read_xml(public_run_from_assembly_xml),
+        content_type="application/xml",
+    )
+
+    responses.add(
+        responses.GET,
+        "https://www.ebi.ac.uk/ena/submit/report/analyses/xml/ERZ2626953",
+        body=read_xml(private_run_from_assembly_xml),
+        content_type="application/xml",
+    )
+
+    ena_run_from_assembly_public = EnaQuery(accession="ERZ2626953", query_type="run_assembly", private=False)
+
+    ena_run_from_assembly_public = EnaQuery(accession="ERZ2626953", query_type="run_assembly", private=True)
+
+    assert ena_run_from_assembly_public.build_query() and ena_run_from_assembly_public.build_query() == "ERR4918394"
+
+
+def test_ena_study_runs(public_study_runs_json, private_study_runs_json):
+    """response mock limited to 10 runs for the study, need to check if there is page limit"""
+    responses.add(responses.POST, "https://www.ebi.ac.uk/ena/portal/api/search", json=read_json(public_study_runs_json))
+
+    responses.add(responses.GET, "https://www.ebi.ac.uk/ena/submit/report/runs/ERP125469", json=read_json(private_study_runs_json))
+
+    ena_study_runs_public = EnaQuery(accession="ERP125469", query_type="study_runs", private=False)
+
+    ena_study_runs_private = EnaQuery(accession="ERP125469", query_type="study_runs", private=True)
+
+    assert len(ena_study_runs_public.build_query()) and len(ena_study_runs_private.build_query()) == 10
+
+
+def test_ena_sample(public_sample_data, private_sample_data, public_sample_json, private_sample_xml):
+    responses.add(responses.POST, "https://www.ebi.ac.uk/ena/portal/api/search", json=read_json(public_sample_json))
+
+    responses.add(responses.GET, "https://www.ebi.ac.uk/ena/submit/report/samples/xml/ERS5444411", body=read_xml(private_sample_xml))
+
+    ena_sample_public = EnaQuery(accession="SAMEA7687881", query_type="sample", private=False)
+
+    ena_sample_private = EnaQuery(accession="ERS5444411", query_type="sample", private=True)
+
+    assert ena_sample_public.build_query() == public_sample_data
+    assert ena_sample_private.build_query() == private_sample_data
+
+
+def test_ena_exceptions(monkeypatch):
+    monkeypatch.setenv(ENA_WEBIN, "fake-webin-999")
+    monkeypatch.setenv(ENA_WEBIN_PASSWORD, "fakewebinpw")
+
+    ena_query = EnaQuery(
+        accession="ERP125469",
+        query_type="study",
+        private=True,
+    )
+
+    public_api_data = {
+        "format": "json",
+        "includeMetagenomes": True,
+        "dataPortal": "ena",
+        "result": "study",
+        "query": "secondary_study_accession=ERP125469",
+        "fields": "study_accession,study_description",
+    }
+
+    responses.add(
+        responses.GET,
+        "https://www.ebi.ac.uk/ena/submit/report/studies/ERP125469",
+        body=ConnectionError("Test retry private error"),
+    )
+
+    responses.add(responses.POST, "https://www.ebi.ac.uk/ena/portal/api/search", body=ConnectionError("Test retry public error"))
+
+    with pytest.raises(
+        ValueError,
+        match="Could not find ERP125469 in ENA after 3 attempts. Error: Test retry private error",
+    ):
+        ena_query.retry_or_handle_request_error(
+            request=ena_query.get_request,
+            url="https://www.ebi.ac.uk/ena/submit/report/studies/ERP125469",
+        )
+    assert responses.assert_call_count("https://www.ebi.ac.uk/ena/submit/report/studies/ERP125469", 3)
+
+    with pytest.raises(ValueError, match="Could not find ERP125469 in ENA after 3 attempts. Error: Test retry public error"):
+        ena_query.retry_or_handle_request_error(request=ena_query.post_request, data=public_api_data)
+    assert responses.assert_call_count("https://www.ebi.ac.uk/ena/portal/api/search", 3)
+
+    responses.add(
+        responses.GET,
+        "https://www.ebi.ac.uk/ena/submit/report/studies/ERPXYZ",
+        body=HTTPError("Test failure error"),
+    )
+
+    with pytest.raises(HTTPError):
+        ena_query.retry_or_handle_request_error(
+            request=ena_query.get_request,
+            url="https://www.ebi.ac.uk/ena/submit/report/studies/ERPXYZ",
+        )
+    assert responses.assert_call_count("https://www.ebi.ac.uk/ena/submit/report/studies/ERPXYZ", 1)