Skip to content

Commit b907a74

Browse files
authored
Merge pull request #64 from elixir-europe/process-isa-json-after-biosamples
Process isa json after biosamples
2 parents a180bab + ba58c6c commit b907a74

File tree

7 files changed

+169
-90
lines changed

7 files changed

+169
-90
lines changed

mars-cli/mars_cli.py

+18-6
Original file line numberDiff line numberDiff line change
@@ -176,6 +176,12 @@ def cli(ctx, development):
176176
help="Name of a credentials file",
177177
)
178178
@click.argument("isa_json_file", type=click.File("r"))
179+
@click.option(
180+
"--submit-to-biosamples",
181+
type=click.BOOL,
182+
default=True,
183+
help="Submit to BioSamples.",
184+
)
179185
@click.option("--submit-to-ena", type=click.BOOL, default=True, help="Submit to ENA.")
180186
@click.option(
181187
"--file-transfer",
@@ -200,29 +206,34 @@ def cli(ctx, development):
200206
type=click.BOOL,
201207
help="Boolean indicating if the investigation is the root of the ISA JSON. Set this to True if the ISA-JSON does not contain a 'investigation' field.",
202208
)
209+
@click.option(
210+
"--output",
211+
type=click.STRING,
212+
default=f"output_{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}",
213+
)
203214
@click.pass_context
204215
def submit(
205216
ctx,
206217
credential_service_name,
207218
username_credentials,
208219
credentials_file,
209220
isa_json_file,
221+
submit_to_biosamples,
210222
submit_to_ena,
211223
submit_to_metabolights,
212224
investigation_is_root,
213225
file_transfer,
226+
output,
214227
data_files,
215228
):
216229
"""Start a submission to the target repositories."""
217-
target_repositories = [TargetRepository.BIOSAMPLES]
230+
target_repositories = []
231+
232+
if submit_to_biosamples:
233+
target_repositories.append(TargetRepository.BIOSAMPLES)
218234

219235
if submit_to_ena:
220236
target_repositories.append(TargetRepository.ENA)
221-
target_repositories.remove(TargetRepository.BIOSAMPLES)
222-
print_and_log(
223-
f"Skipping {TargetRepository.BIOSAMPLES} repository due to {TargetRepository.ENA} being present in the list of repositories",
224-
level="debug",
225-
)
226237

227238
if submit_to_metabolights:
228239
target_repositories.append(TargetRepository.METABOLIGHTS)
@@ -245,6 +256,7 @@ def submit(
245256
investigation_is_root,
246257
urls_dict,
247258
file_transfer,
259+
output,
248260
data_file_paths,
249261
)
250262
except requests.RequestException as err:

mars-cli/mars_lib/isa_json.py

+30-32
Original file line numberDiff line numberDiff line change
@@ -22,21 +22,21 @@
2222

2323

2424
def reduce_isa_json_for_target_repo(
25-
input_isa_json: Investigation, target_repo: str
26-
) -> Investigation:
25+
input_isa_json: IsaJson, target_repo: str
26+
) -> IsaJson:
2727
"""
2828
Filters out assays that are not meant to be sent to the specified target repository.
2929
3030
Args:
31-
input_isa_json (Investigation): Input ISA JSON that contains the original information.
31+
input_isa_json (IsaJson): Input ISA JSON that contains the original information.
3232
target_repo (TargetRepository): Target repository as a constant.
3333
3434
Returns:
35-
Investigation: Filtered ISA JSON.
35+
IsaJson: Filtered ISA JSON.
3636
"""
3737
filtered_isa_json = input_isa_json.model_copy(deep=True)
3838
new_studies = []
39-
studies = filtered_isa_json.studies
39+
studies = filtered_isa_json.investigation.studies
4040
for study in studies:
4141
if target_repo == TargetRepository.BIOSAMPLES:
4242
filtered_assays = []
@@ -51,7 +51,7 @@ def reduce_isa_json_for_target_repo(
5151
study.assays = filtered_assays
5252
new_studies.append(study)
5353

54-
filtered_isa_json.studies = new_studies
54+
filtered_isa_json.investigation.studies = new_studies
5555
return filtered_isa_json
5656

5757

@@ -64,6 +64,8 @@ def detect_target_repo_comment(comments: List[Comment]) -> Comment:
6464
Returns:
6565
Comment: The comment where the name corresponds with the name of the provided target repo.
6666
"""
67+
if len(comments) < 1:
68+
raise ValueError("No comments found! Not able to detect the target repository!")
6769
return next(comment for comment in comments if comment.name == TARGET_REPO_KEY)
6870

6971

@@ -188,13 +190,15 @@ def accession_characteristic_present(
188190
f"'where' atribute is missing in path {material_type_path.key}."
189191
)
190192

191-
accession_characteristics = [
192-
char
193-
for char in material.characteristics
194-
if char.category
195-
and char.category.characteristicType
196-
and char.category.characteristicType.annotationValue == "accession"
197-
]
193+
accession_characteristics = []
194+
for char in material.characteristics:
195+
if char.category and char.category.characteristicType:
196+
if char.category.characteristicType.annotationValue:
197+
if char.category.characteristicType.annotationValue == "accession":
198+
accession_characteristics.append(char)
199+
else:
200+
if char.category.characteristicType == "accession":
201+
accession_characteristics.append(char)
198202

199203
if len(accession_characteristics) > 1:
200204
raise AttributeError(
@@ -255,17 +259,12 @@ def add_accession_to_node(
255259
if not updated_material_accession_characteristic:
256260
raise ValueError("Accession characteristic is not present.")
257261

258-
if updated_material_accession_characteristic.value and hasattr(
259-
updated_material_accession_characteristic.value, "annotationValue"
260-
):
261-
accession_ontology_annotation = OntologyAnnotation()
262-
accession_ontology_annotation.id = (
263-
f"#ontology_annotation/accession_{updated_material.id}"
264-
)
265-
accession_ontology_annotation.annotationValue = accession_number
266-
updated_material_accession_characteristic.value = accession_ontology_annotation
267-
else:
268-
updated_material_accession_characteristic.value = accession_number
262+
accession_ontology_annotation = OntologyAnnotation()
263+
accession_ontology_annotation.id = (
264+
f"#ontology_annotation/accession_{updated_material.id}"
265+
)
266+
accession_ontology_annotation.annotationValue = accession_number
267+
updated_material_accession_characteristic.value = accession_ontology_annotation
269268

270269
updated_material.characteristics.append(updated_material_accession_characteristic)
271270
print(f"{updated_material.id}: {updated_material_accession_characteristic.value}.")
@@ -352,20 +351,18 @@ def create_accession_characteristic(
352351
updated_material.characteristics.append(new_material_attribute_value)
353352

354353

355-
def update_investigation(
356-
investigation: Investigation, repo_response: RepositoryResponse
357-
) -> Investigation:
354+
def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> IsaJson:
358355
"""
359356
Adds the accession to the ISA JSON.
360357
361358
Args:
362-
isa_json (Investigation): The ISA JSON to be updated.
359+
isa_json (IsaJson): The ISA JSON to be updated.
363360
repo_response (RepositoryResponse): The response from the repository.
364361
365362
Returns:
366-
Investigation: The updated ISA JSON.
363+
IsaJson: The updated ISA JSON.
367364
"""
368-
updated_investigation = investigation.model_copy(deep=True)
365+
investigation = isa_json.investigation
369366
for accession in repo_response.accessions:
370367

371368
has_assay_in_path = [p for p in accession.path if p.key == "assays"]
@@ -380,7 +377,7 @@ def update_investigation(
380377
if not study_filter:
381378
raise ValueError(f"Study filter is not present in {accession.path}.")
382379

383-
updated_node = apply_filter(study_filter, updated_investigation.studies)
380+
updated_node = apply_filter(study_filter, investigation.studies)
384381

385382
if target_level == "assay":
386383
assay_filter = get_filter_for_accession_key(accession, "assays")
@@ -407,4 +404,5 @@ def update_investigation(
407404

408405
add_accession_to_node(updated_node, accession.value, material_type_path)
409406

410-
return updated_investigation
407+
isa_json.investigation = investigation
408+
return isa_json

mars-cli/mars_lib/submit.py

+70-24
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import os
2+
from datetime import datetime
13
from io import TextIOWrapper
24
import requests
35
import json
@@ -11,8 +13,13 @@
1113
input_json_schema_filepath,
1214
)
1315
from mars_lib.credential import CredentialManager
14-
from mars_lib.isa_json import load_isa_json
16+
from mars_lib.isa_json import (
17+
load_isa_json,
18+
reduce_isa_json_for_target_repo,
19+
update_isa_json,
20+
)
1521
from mars_lib.models.isa_json import IsaJson
22+
from mars_lib.models.repository_response import RepositoryResponse
1623
from mars_lib.target_repo import TargetRepository
1724
from mars_lib.logging import print_and_log
1825
from pydantic import ValidationError
@@ -22,6 +29,17 @@
2229
from typing import List
2330

2431

32+
def save_step_to_file(time_stamp: float, filename: str, isa_json: IsaJson):
33+
dir_path = f"tmp/{datetime.now().strftime('%Y-%m-%dT%H:%M:%S')}"
34+
os.makedirs(dir_path, exist_ok=True)
35+
36+
with open(f"{dir_path}/{filename}.json", "w") as f:
37+
f.write(isa_json.model_dump_json(by_alias=True, exclude_none=True))
38+
39+
40+
DEBUG = os.getenv("MARS_DEBUG") in ["1", 1]
41+
42+
2543
def submission(
2644
credential_service_name: str,
2745
username_credentials: str,
@@ -31,8 +49,9 @@ def submission(
3149
investigation_is_root: bool,
3250
urls: dict[str, Any],
3351
file_transfer: str,
52+
output: str,
3453
data_file_paths=None,
35-
):
54+
) -> None:
3655
# If credential manager info found:
3756
# Get password from the credential manager
3857
# Else:
@@ -59,6 +78,37 @@ def submission(
5978
f"ISA JSON with investigation '{isa_json.investigation.title}' is valid."
6079
)
6180

81+
time_stamp = datetime.timestamp(datetime.now())
82+
83+
if DEBUG:
84+
save_step_to_file(time_stamp, "0_Initial_ISA_JSON_in_model", isa_json)
85+
86+
if all(
87+
repo not in TargetRepository.available_repositories()
88+
for repo in target_repositories
89+
):
90+
raise ValueError("No target repository selected.")
91+
92+
if TargetRepository.BIOSAMPLES in target_repositories:
93+
# Submit to Biosamples
94+
biosamples_result = submit_to_biosamples(
95+
isa_json=isa_json,
96+
biosamples_credentials=user_credentials,
97+
biosamples_url=urls["BIOSAMPLES"]["SUBMISSION"],
98+
webin_token_url=urls["WEBIN"]["TOKEN"],
99+
)
100+
print_and_log(
101+
f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}",
102+
level="info",
103+
)
104+
# Update `isa_json`, based on the receipt returned
105+
bs_mars_receipt = RepositoryResponse.model_validate(
106+
json.loads(biosamples_result.content)
107+
)
108+
isa_json = update_isa_json(isa_json, bs_mars_receipt)
109+
if DEBUG:
110+
save_step_to_file(time_stamp, "1_after_biosamples", isa_json)
111+
62112
if TargetRepository.ENA in target_repositories:
63113
# Step 1 : upload data if file paths are provided
64114
if data_file_paths and file_transfer:
@@ -68,8 +118,8 @@ def submission(
68118
submission_url=urls["ENA"]["DATA-SUBMISSION"],
69119
file_transfer=file_transfer,
70120
)
121+
71122
# Step 2 : submit isa-json to ena
72-
# TODO: Filter out other assays
73123
ena_result = submit_to_ena(
74124
isa_json=isa_json,
75125
user_credentials=user_credentials,
@@ -78,40 +128,32 @@ def submission(
78128
print_and_log(
79129
f"Submission to {TargetRepository.ENA} was successful. Result:\n{ena_result.json()}"
80130
)
81-
# TODO: Update `isa_json`, based on the receipt returned
131+
# Update `isa_json`, based on the receipt returned
132+
ena_mars_receipt = RepositoryResponse.from_json(str(ena_result.content))
133+
isa_json = update_isa_json(isa_json, ena_mars_receipt)
134+
if DEBUG:
135+
save_step_to_file(time_stamp, "2_after_ena", isa_json)
82136

83-
elif TargetRepository.BIOSAMPLES in target_repositories:
84-
# Submit to Biosamples
85-
biosamples_result = submit_to_biosamples(
86-
isa_json=isa_json,
87-
biosamples_credentials=user_credentials,
88-
biosamples_url=urls["BIOSAMPLES"]["SUBMISSION"],
89-
webin_token_url=urls["WEBIN"]["TOKEN"],
90-
)
91-
print_and_log(
92-
f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}",
93-
level="info",
94-
)
95-
# TODO: Update `isa_json`, based on the receipt returned
96-
elif TargetRepository.METABOLIGHTS in target_repositories:
137+
if TargetRepository.METABOLIGHTS in target_repositories:
97138
# Submit to MetaboLights
98139
# TODO: Filter out other assays
99140
print_and_log(
100141
f"Submission to {TargetRepository.METABOLIGHTS} was successful",
101142
level="info",
102143
)
103144
# TODO: Update `isa_json`, based on the receipt returned
104-
elif TargetRepository.EVA in target_repositories:
145+
146+
if TargetRepository.EVA in target_repositories:
105147
# Submit to EVA
106148
# TODO: Filter out other assays
107149
print_and_log(
108150
f"Submission to {TargetRepository.EVA} was successful", level="info"
109151
)
110152
# TODO: Update `isa_json`, based on the receipt returned
111-
else:
112-
raise ValueError("No target repository selected.")
113153

114-
# TODO: Return the updated ISA JSON
154+
# Return the updated ISA JSON
155+
with open(f"{output}.json", "w") as f:
156+
f.write(isa_json.model_dump_json(by_alias=True, exclude_none=True))
115157

116158

117159
def submit_to_biosamples(
@@ -130,7 +172,9 @@ def submit_to_biosamples(
130172
biosamples_url,
131173
headers=headers,
132174
params=params,
133-
json=isa_json.model_dump(by_alias=True, exclude_none=True),
175+
json=reduce_isa_json_for_target_repo(
176+
isa_json, TargetRepository.BIOSAMPLES
177+
).model_dump(by_alias=True, exclude_none=True),
134178
)
135179

136180
if result.status_code != 200:
@@ -158,7 +202,9 @@ def submit_to_ena(
158202
submission_url,
159203
headers=headers,
160204
params=params,
161-
json=isa_json.model_dump(by_alias=True, exclude_none=True),
205+
json=reduce_isa_json_for_target_repo(isa_json, TargetRepository.ENA).model_dump(
206+
by_alias=True, exclude_none=True
207+
),
162208
)
163209

164210
if result.status_code != 200:

mars-cli/mars_lib/target_repo.py

+4
Original file line numberDiff line numberDiff line change
@@ -13,3 +13,7 @@ class TargetRepository(str, Enum):
1313
METABOLIGHTS = "metabolights"
1414
BIOSAMPLES = "biosamples"
1515
EVA = "eva"
16+
17+
@classmethod
18+
def available_repositories(cls):
19+
return {item.value for item in cls}

mars-cli/tests/test_ftp_upload.py

+9-2
Original file line numberDiff line numberDiff line change
@@ -13,10 +13,17 @@ def test_upload_login_failure():
1313
uploader.upload([Path("./tests/fixtures/not_a_json_file.txt")])
1414

1515

16-
@pytest.mark.skip(reason="Relies on real ENA credentials in test_credentials_example.json")
16+
@pytest.mark.skip(
17+
reason="Relies on real ENA credentials in test_credentials_example.json"
18+
)
1719
def test_upload_success():
1820
# For local testing, add ENA username/password to test_credentials_example.json
1921
with open("./tests/test_credentials_example.json") as f:
2022
creds = json.load(f)
2123
uploader = FTPUploader("webin2.ebi.ac.uk", creds["username"], creds["password"])
22-
uploader.upload([Path("../test-data/ENA_TEST2.R1.fastq.gz"), Path("./tests/fixtures/not_a_json_file.txt")])
24+
uploader.upload(
25+
[
26+
Path("../test-data/ENA_TEST2.R1.fastq.gz"),
27+
Path("./tests/fixtures/not_a_json_file.txt"),
28+
]
29+
)

0 commit comments

Comments
 (0)