Skip to content

Commit 8d7dcde

Browse files
authored
Merge pull request #60 from apriltuesday/ftp-upload
Add FTP upload
2 parents ab7c5f7 + 74a5404 commit 8d7dcde

File tree

6 files changed

+157
-1
lines changed

6 files changed

+157
-1
lines changed

mars-cli/generate_config.py

+2
Original file line numberDiff line numberDiff line change
@@ -32,8 +32,10 @@ def create_settings_file(settings_dir):
3232
config["ena"] = {
3333
"development-url": "https://wwwdev.ebi.ac.uk/ena/submit/webin-v2/",
3434
"development-submission-url": "https://wwwdev.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA",
35+
"development-data-submission-url": "webin2.ebi.ac.uk",
3536
"production-url": "https://www.ebi.ac.uk/ena/submit/webin-v2/",
3637
"production-submission-url": "https://www.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA",
38+
"production-data-submission-url": "webin2.ebi.ac.uk",
3739
}
3840

3941
config["biosamples"] = {

mars-cli/mars_cli.py

+34
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,11 @@
6363
"development-submission-url",
6464
fallback="https://wwwdev.ebi.ac.uk/biosamples/samples/submit",
6565
),
66+
"DATA-SUBMISSION": config.get(
67+
"ena",
68+
"development-data-submission-url",
69+
fallback="webin2.ebi.ac.uk",
70+
),
6671
},
6772
"WEBIN": {
6873
"SERVICE": config.get(
@@ -101,6 +106,11 @@
101106
"production-submission-url",
102107
fallback="https://www.ebi.ac.uk/ena/submit/drop-box/submit/?auth=ENA",
103108
),
109+
"DATA-SUBMISSION": config.get(
110+
"ena",
111+
"development-data-submission-url",
112+
fallback="webin2.ebi.ac.uk",
113+
),
104114
},
105115
"WEBIN": {
106116
"SERVICE": config.get(
@@ -167,6 +177,23 @@ def cli(ctx, development):
167177
)
168178
@click.argument("isa_json_file", type=click.File("r"))
169179
@click.option("--submit-to-ena", type=click.BOOL, default=True, help="Submit to ENA.")
180+
@click.option(
181+
"--file-transfer",
182+
type=click.STRING,
183+
help="provide the name of a file transfer solution, like ftp or aspera",
184+
)
185+
@click.option(
186+
"--data-files",
187+
type=click.File("r"),
188+
multiple=True,
189+
help="Path of files to upload",
190+
)
191+
# @click.option(
192+
# "--data-submit-to-ena",
193+
# type=click.BOOL,
194+
# default=False,
195+
# help="Submit data files to ENA.",
196+
# )
170197
@click.option(
171198
"--submit-to-metabolights",
172199
type=click.BOOL,
@@ -189,6 +216,8 @@ def submit(
189216
submit_to_ena,
190217
submit_to_metabolights,
191218
investigation_is_root,
219+
file_transfer,
220+
data_files,
192221
):
193222
"""Start a submission to the target repositories."""
194223
target_repositories = [TargetRepository.BIOSAMPLES]
@@ -209,6 +238,9 @@ def submit(
209238
)
210239

211240
urls_dict = ctx.obj["FILTERED_URLS"]
241+
242+
data_file_paths = [f.name for f in data_files] if file_transfer else []
243+
212244
try:
213245
submission(
214246
credential_service_name,
@@ -218,6 +250,8 @@ def submit(
218250
target_repositories,
219251
investigation_is_root,
220252
urls_dict,
253+
file_transfer,
254+
data_file_paths,
221255
)
222256
except requests.RequestException as err:
223257
tb = sys.exc_info()[2] # Traceback value

mars-cli/mars_lib/ftp_upload.py

+59
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
import ftplib
2+
import os
3+
from pathlib import Path
4+
from typing import List
5+
6+
from retry import retry
7+
from mars_lib.logging import print_and_log
8+
9+
10+
class PatchFTP_TLS(ftplib.FTP_TLS):
11+
"""
12+
Modification from https://stackoverflow.com/questions/14659154/ftpes-session-reuse-required
13+
to work around bug in Python standard library: https://bugs.python.org/issue19500
14+
Explicit FTPS, with shared TLS session
15+
"""
16+
17+
def ntransfercmd(self, cmd, rest=None):
18+
conn, size = ftplib.FTP.ntransfercmd(self, cmd, rest)
19+
if self._prot_p:
20+
conn = self.context.wrap_socket(
21+
conn, server_hostname=self.host, session=self.sock.session
22+
) # this is the fix
23+
return conn, size
24+
25+
26+
class FTPUploader:
27+
def __init__(self, ftp_host: str, username: str, password: str):
28+
self.ftp_host = ftp_host
29+
self.username = username
30+
self.password = password
31+
32+
@retry(exceptions=ftplib.all_errors, tries=3, delay=2, backoff=1.2, jitter=(1, 3))
33+
def upload(self, file_paths: List[Path], target_location: str = "/") -> bool:
34+
# Heuristic to set the expected timeout assuming 10Mb/s upload speed but no less than 30 sec
35+
# and no more than an hour
36+
max_file_size = max([os.path.getsize(f) for f in file_paths])
37+
timeout = min(max(int(max_file_size / 10000000), 30), 3600)
38+
with PatchFTP_TLS() as ftps:
39+
ftps.context.set_ciphers("HIGH:!DH:!aNULL")
40+
ftps.connect(self.ftp_host, port=21, timeout=timeout)
41+
ftps.login(self.username, self.password)
42+
ftps.prot_p()
43+
44+
ftps.cwd(target_location)
45+
previous_content = ftps.nlst()
46+
for file_to_upload in file_paths:
47+
file_name = os.path.basename(file_to_upload)
48+
if file_name in previous_content and ftps.size(
49+
file_name
50+
) == os.path.getsize(file_to_upload):
51+
print_and_log(
52+
f"{file_name} already exists and has the same size on the FTP, skipping"
53+
)
54+
continue
55+
print_and_log(f"Uploading {file_name} to FTP")
56+
with open(file_to_upload, "rb") as open_file:
57+
ftps.storbinary("STOR %s" % file_name, open_file)
58+
59+
return True

mars-cli/mars_lib/submit.py

+39-1
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,10 @@
1717
from mars_lib.logging import print_and_log
1818
from pydantic import ValidationError
1919

20+
from mars_lib.ftp_upload import FTPUploader
21+
from pathlib import Path
22+
from typing import List
23+
2024

2125
def submission(
2226
credential_service_name: str,
@@ -26,6 +30,8 @@ def submission(
2630
target_repositories: list[str],
2731
investigation_is_root: bool,
2832
urls: dict[str, Any],
33+
file_transfer: str,
34+
data_file_paths=None,
2935
):
3036
# If credential manager info found:
3137
# Get password from the credential manager
@@ -53,7 +59,18 @@ def submission(
5359
f"ISA JSON with investigation '{isa_json.investigation.title}' is valid."
5460
)
5561

56-
if TargetRepository.ENA in target_repositories:
62+
if (
63+
TargetRepository.ENA in target_repositories
64+
and data_file_paths
65+
and file_transfer
66+
):
67+
upload_to_ena(
68+
file_paths=data_file_paths,
69+
user_credentials=user_credentials,
70+
submission_url=urls["ENA"]["DATA-SUBMISSION"],
71+
file_transfer=file_transfer,
72+
)
73+
elif TargetRepository.ENA in target_repositories:
5774
# TODO: Filter out other assays
5875
ena_result = submit_to_ena(
5976
isa_json=isa_json,
@@ -64,6 +81,7 @@ def submission(
6481
f"Submission to {TargetRepository.ENA} was successful. Result:\n{ena_result.json()}"
6582
)
6683
# TODO: Update `isa_json`, based on the receipt returned
84+
6785
elif TargetRepository.BIOSAMPLES in target_repositories:
6886
# Submit to Biosamples
6987
biosamples_result = submit_to_biosamples(
@@ -158,6 +176,26 @@ def submit_to_ena(
158176
return result
159177

160178

179+
def upload_to_ena(
180+
file_paths: List[Path],
181+
user_credentials: dict[str, str],
182+
submission_url: str,
183+
file_transfer: str,
184+
):
185+
ALLOWED_FILE_TRANSFER_SOLUTIONS = {"ftp", "aspera"}
186+
file_transfer = file_transfer.lower()
187+
188+
if file_transfer not in ALLOWED_FILE_TRANSFER_SOLUTIONS:
189+
raise ValueError(f"Unsupported transfer protocol: {file_transfer}")
190+
if file_transfer == "ftp":
191+
uploader = FTPUploader(
192+
submission_url,
193+
user_credentials["username"],
194+
user_credentials["password"],
195+
)
196+
uploader.upload(file_paths)
197+
198+
161199
def create_external_references(
162200
biosamples_credentials: dict[str, str],
163201
biosamples_externalReferences: dict[str, Any],

mars-cli/requirements.txt

+1
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@ jsonschema
33
keyring
44
pydantic
55
click
6+
retry

mars-cli/tests/test_ftp_upload.py

+22
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
import json
2+
3+
import pytest
4+
from pathlib import Path
5+
import ftplib
6+
7+
from mars_lib.ftp_upload import FTPUploader
8+
9+
10+
def test_upload_login_failure():
11+
uploader = FTPUploader("webin2.ebi.ac.uk", "junk", "more junk")
12+
with pytest.raises(ftplib.error_perm, match="530 Login incorrect."):
13+
uploader.upload([Path("./tests/fixtures/not_a_json_file.txt")])
14+
15+
16+
@pytest.mark.skip(reason="Relies on real ENA credentials in test_credentials_example.json")
17+
def test_upload_success():
18+
# For local testing, add ENA username/password to test_credentials_example.json
19+
with open("./tests/test_credentials_example.json") as f:
20+
creds = json.load(f)
21+
uploader = FTPUploader("webin2.ebi.ac.uk", creds["username"], creds["password"])
22+
uploader.upload([Path("../test-data/ENA_TEST2.R1.fastq.gz"), Path("./tests/fixtures/not_a_json_file.txt")])

0 commit comments

Comments
 (0)