Skip to content

Commit 98c8b1e

Browse files
authored
Merge pull request #87 from elixir-europe/map-data-files-to-repository
Map data files to repository
2 parents 7390763 + 5bd6cb4 commit 98c8b1e

6 files changed

+169
-32
lines changed

mars-cli/mars_cli.py

+3-4
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,6 @@
1010
from mars_lib.logging import print_and_log
1111
from mars_lib.validation import validate, CustomValidationException
1212
from logging.handlers import RotatingFileHandler
13-
from pydantic import ValidationError
1413
import requests
1514
import sys
1615
import os
@@ -264,13 +263,13 @@ def submit(
264263
target_repositories = []
265264

266265
if submit_to_biosamples:
267-
target_repositories.append(TargetRepository.BIOSAMPLES)
266+
target_repositories.append(TargetRepository.BIOSAMPLES.value)
268267

269268
if submit_to_ena:
270-
target_repositories.append(TargetRepository.ENA)
269+
target_repositories.append(TargetRepository.ENA.value)
271270

272271
if submit_to_metabolights:
273-
target_repositories.append(TargetRepository.METABOLIGHTS)
272+
target_repositories.append(TargetRepository.METABOLIGHTS.value)
274273

275274
print_and_log(
276275
f"Starting submission of the ISA JSON to the target repositories: {', '.join(target_repositories)}."

mars-cli/mars_lib/isa_json.py

+57-2
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
import json
2-
from typing import Union, List, Any, Tuple, Optional
2+
from typing import Union, List, Any, Tuple, Optional, Dict
3+
4+
from mars_lib.logging import print_and_log
35
from mars_lib.models.isa_json import (
46
Investigation,
57
Assay,
@@ -38,7 +40,7 @@ def reduce_isa_json_for_target_repo(
3840
new_studies = []
3941
studies = filtered_isa_json.investigation.studies
4042
for study in studies:
41-
if target_repo == TargetRepository.BIOSAMPLES:
43+
if target_repo == TargetRepository.BIOSAMPLES.value:
4244
filtered_assays = []
4345
else:
4446
assays = study.assays
@@ -420,3 +422,56 @@ def update_isa_json(isa_json: IsaJson, repo_response: RepositoryResponse) -> Isa
420422

421423
isa_json.investigation = investigation
422424
return isa_json
425+
426+
427+
def map_data_files_to_repositories(
428+
files: List[str], isa_json: IsaJson
429+
) -> Dict[str, List[str]]:
430+
# Note: This works well in
431+
df_map: Dict[str, List[str]] = {}
432+
assays: List[Assay] = [
433+
assay for study in isa_json.investigation.studies for assay in study.assays
434+
]
435+
436+
files_dicts = [{"full_name": f, "short_name": f.split("/")[-1]} for f in files]
437+
remaining_files = files_dicts.copy()
438+
for assay in assays:
439+
target_repo_comment: Comment = detect_target_repo_comment(assay.comments)
440+
# This is an effect of everything being optional in the Comment model.
441+
# Should we decide to make the value mandatory, this guard clause would not be necessary anymore.
442+
if target_repo_comment.value is None:
443+
raise ValueError(
444+
f"At least one assay in the ISA-JSON has no '{TARGET_REPO_KEY}' comment. Mapping not possible. Make sure all assays in the ISA-JSON have this comment!"
445+
)
446+
assay_data_files = [df.name for df in assay.dataFiles]
447+
448+
# Check if the files in the ISA-JSON are present in the command
449+
# If not, raise an error
450+
for adf in assay_data_files:
451+
if adf not in [fd["short_name"] for fd in files_dicts]:
452+
raise ValueError(
453+
f"""Assay for repository '{target_repo_comment.value}' has encountered a mismatch while mapping the data files to the ISA-JSON.
454+
Data File '{adf}' is missing in the data files passed in the command:
455+
{files}
456+
Please correct the mismatch!"""
457+
)
458+
else:
459+
remaining_files = [
460+
fd for fd in remaining_files if fd["short_name"] != adf
461+
]
462+
463+
df_map[target_repo_comment.value] = [
464+
fd["full_name"]
465+
for fd in files_dicts
466+
if fd["short_name"] in assay_data_files
467+
]
468+
469+
[
470+
print_and_log(
471+
msg=f"File '{rf['short_name']}' could not be mapped to any data file in the ISA-JSON. For this reason, it will be skipped during submission!",
472+
level="warning",
473+
)
474+
for rf in remaining_files
475+
]
476+
477+
return df_map

mars-cli/mars_lib/submit.py

+27-17
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
load_isa_json,
2020
reduce_isa_json_for_target_repo,
2121
update_isa_json,
22+
map_data_files_to_repositories,
2223
)
2324
from mars_lib.models.isa_json import Comment, IsaJson
2425
from mars_lib.models.repository_response import RepositoryResponse
@@ -52,7 +53,7 @@ def submission(
5253
urls: dict[str, Any],
5354
file_transfer: str,
5455
output: str,
55-
data_file_paths=None,
56+
data_file_paths: List[TextIOWrapper] = [],
5657
) -> None:
5758
# If credential manager info found:
5859
# Get password from the credential manager
@@ -80,6 +81,11 @@ def submission(
8081
f"ISA JSON with investigation '{isa_json.investigation.title}' is valid."
8182
)
8283

84+
# create data file map
85+
data_file_map = map_data_files_to_repositories(
86+
files=[str(dfp) for dfp in data_file_paths], isa_json=isa_json
87+
)
88+
8389
time_stamp = datetime.timestamp(datetime.now())
8490

8591
if DEBUG:
@@ -91,7 +97,7 @@ def submission(
9197
):
9298
raise ValueError("No target repository selected.")
9399

94-
if TargetRepository.BIOSAMPLES in target_repositories:
100+
if TargetRepository.BIOSAMPLES.value in target_repositories:
95101
# Submit to Biosamples
96102
biosamples_result = submit_to_biosamples(
97103
isa_json=isa_json,
@@ -100,7 +106,7 @@ def submission(
100106
webin_token_url=urls["WEBIN"]["TOKEN"],
101107
)
102108
print_and_log(
103-
f"Submission to {TargetRepository.BIOSAMPLES} was successful. Result:\n{biosamples_result.json()}",
109+
f"Submission to {TargetRepository.BIOSAMPLES.value} was successful. Result:\n{biosamples_result.json()}",
104110
level="info",
105111
)
106112
# Update `isa_json`, based on the receipt returned
@@ -111,16 +117,20 @@ def submission(
111117
if DEBUG:
112118
save_step_to_file(time_stamp, "1_after_biosamples", isa_json)
113119

114-
if TargetRepository.ENA in target_repositories:
120+
if TargetRepository.ENA.value in target_repositories:
115121
# Step 1 : upload data if file paths are provided
116122
if data_file_paths and file_transfer:
117123
upload_to_ena(
118-
file_paths=data_file_paths,
124+
file_paths=[
125+
Path(df) for df in data_file_map[TargetRepository.ENA.value]
126+
],
119127
user_credentials=user_credentials,
120128
submission_url=urls["ENA"]["DATA-SUBMISSION"],
121129
file_transfer=file_transfer,
122130
)
123-
print_and_log(f"Start submitting to {TargetRepository.ENA}.", level="debug")
131+
print_and_log(
132+
f"Start submitting to {TargetRepository.ENA.value}.", level="debug"
133+
)
124134

125135
# Step 2 : submit isa-json to ena
126136
ena_result = submit_to_ena(
@@ -129,11 +139,11 @@ def submission(
129139
submission_url=urls["ENA"]["SUBMISSION"],
130140
)
131141
print_and_log(
132-
f"Submission to {TargetRepository.ENA} was successful. Result:\n{ena_result.json()}"
142+
f"Submission to {TargetRepository.ENA.value} was successful. Result:\n{ena_result.json()}"
133143
)
134144

135145
print_and_log(
136-
f"Update ISA-JSON based on receipt from {TargetRepository.ENA}.",
146+
f"Update ISA-JSON based on receipt from {TargetRepository.ENA.value}.",
137147
level="debug",
138148
)
139149
ena_mars_receipt = RepositoryResponse.model_validate(
@@ -143,10 +153,10 @@ def submission(
143153
if DEBUG:
144154
save_step_to_file(time_stamp, "2_after_ena", isa_json)
145155

146-
if TargetRepository.METABOLIGHTS in target_repositories:
156+
if TargetRepository.METABOLIGHTS.value in target_repositories:
147157
# Submit to MetaboLights
148158
metabolights_result = upload_to_metabolights(
149-
file_paths=data_file_paths,
159+
file_paths=data_file_map[TargetRepository.METABOLIGHTS.value],
150160
file_transfer=file_transfer,
151161
isa_json=isa_json,
152162
metabolights_credentials=user_credentials,
@@ -155,7 +165,7 @@ def submission(
155165
)
156166
metabolights_receipt_obj = metabolights_result.json()
157167
print_and_log(
158-
f"Submission to {TargetRepository.METABOLIGHTS} was successful. Result:\n{metabolights_receipt_obj}",
168+
f"Submission to {TargetRepository.METABOLIGHTS.value} was successful. Result:\n{metabolights_receipt_obj}",
159169
level="info",
160170
)
161171
metabolights_receipt = RepositoryResponse.model_validate(
@@ -171,11 +181,11 @@ def submission(
171181
if DEBUG:
172182
save_step_to_file(time_stamp, "3_after_metabolights", isa_json)
173183

174-
if TargetRepository.EVA in target_repositories:
184+
if TargetRepository.EVA.value in target_repositories:
175185
# Submit to EVA
176186
# TODO: Filter out other assays
177187
print_and_log(
178-
f"Submission to {TargetRepository.EVA} was successful.", level="info"
188+
f"Submission to {TargetRepository.EVA.value} was successful.", level="info"
179189
)
180190
# TODO: Update `isa_json`, based on the receipt returned
181191

@@ -201,7 +211,7 @@ def submit_to_biosamples(
201211
headers=headers,
202212
params=params,
203213
json=reduce_isa_json_for_target_repo(
204-
isa_json, TargetRepository.BIOSAMPLES
214+
isa_json, TargetRepository.BIOSAMPLES.value
205215
).model_dump(by_alias=True, exclude_none=True),
206216
)
207217

@@ -338,9 +348,9 @@ def submit_to_ena(
338348
submission_url,
339349
headers=headers,
340350
params=params,
341-
json=reduce_isa_json_for_target_repo(isa_json, TargetRepository.ENA).model_dump(
342-
by_alias=True, exclude_none=True
343-
),
351+
json=reduce_isa_json_for_target_repo(
352+
isa_json, TargetRepository.ENA.value
353+
).model_dump(by_alias=True, exclude_none=True),
344354
)
345355

346356
if result.status_code != 200:

mars-cli/mars_lib/target_repo.py

+1
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ class TargetRepository(str, Enum):
1313
METABOLIGHTS = "metabolights"
1414
BIOSAMPLES = "biosamples"
1515
EVA = "eva"
16+
ARRAYEXPRESS = "arrayexpress"
1617

1718
@classmethod
1819
def available_repositories(cls):

mars-cli/tests/test_biosample_external_references.py

+1-1
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def test_validate_bs_accession():
6767
validate_bs_accession(invalid_accession)
6868

6969
valid_accession = "SAMEA112654119"
70-
assert validate_bs_accession(valid_accession) != ValueError
70+
validate_bs_accession(valid_accession)
7171

7272

7373
def test_validate_json_against_schema():

0 commit comments

Comments
 (0)