Skip to content

Commit 859814e

Browse files
committed
Add validator for filename
1 parent eb8881b commit 859814e

6 files changed

+73
-15
lines changed

mars-cli/mars_lib/models/isa_json.py

+33-3
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,7 @@
11
from __future__ import annotations
2+
3+
import re
4+
25
from enum import Enum
36
from typing import List, Optional, Union
47
from pydantic import BaseModel, Field, field_validator, ConfigDict
@@ -189,6 +192,15 @@ class Assay(CommentedIsaBase):
189192
technologyType: Optional[OntologyAnnotation] = None
190193
unitCategories: List[OntologyAnnotation] = []
191194

195+
@field_validator("filename")
196+
def validate_filename(cls, v: str) -> Union[str, None]:
197+
if v is None:
198+
return v
199+
elif re.match(r"^a_", v):
200+
return v
201+
else:
202+
raise ValueError("'filename' should start with 'a_'")
203+
192204
@field_validator("comments")
193205
def detect_target_repo_comments(cls, v: List[Comment]) -> Optional[List[Comment]]:
194206
target_repo_comments = [
@@ -242,14 +254,14 @@ class MaterialAttribute(IsaBase):
242254

243255

244256
class Study(CommentedIsaBase):
245-
id: Optional[str] = Field(alias="@id", default=None)
257+
id: str = Field(alias="@id", default=None)
246258
assays: List[Assay] = []
247259
characteristicCategories: List[MaterialAttribute] = []
248260
description: Optional[str] = None
249261
factors: List[Factor] = []
250262
filename: Optional[str] = None
251263
identifier: Optional[str] = None
252-
materials: Optional[StudyMaterialType]
264+
materials: Optional[StudyMaterialType] = None
253265
people: List[Person] = []
254266
processSequence: List[Process] = []
255267
protocols: List[Protocol] = []
@@ -260,9 +272,18 @@ class Study(CommentedIsaBase):
260272
title: Optional[str] = None
261273
unitCategories: List[OntologyAnnotation] = []
262274

275+
@field_validator("filename")
276+
def validate_filename(cls, v: str) -> Union[str, None]:
277+
if v is None:
278+
return v
279+
elif re.match(r"^s_", v):
280+
return v
281+
else:
282+
raise ValueError("'filename' should start with 's_'")
283+
263284

264285
class Investigation(CommentedIsaBase):
265-
id: Optional[str] = Field(alias="@id", default=None)
286+
id: str = Field(alias="@id", default=None)
266287
description: Optional[str] = None
267288
filename: Optional[str] = None
268289
identifier: Optional[str] = None
@@ -274,6 +295,15 @@ class Investigation(CommentedIsaBase):
274295
submissionDate: Optional[str] = None
275296
title: Optional[str] = None
276297

298+
@field_validator("filename")
299+
def validate_filename(cls, v: str) -> Union[str, None]:
300+
if v is None:
301+
return v
302+
elif re.match(r"^i_", v):
303+
return v
304+
else:
305+
raise ValueError("'filename' should start with 'i_'")
306+
277307

278308
class IsaJson(IsaBase):
279309
investigation: Investigation

mars-cli/tests/test_isa_json.py

+29-1
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
import re
2+
13
from mars_lib.isa_json import (
24
reduce_isa_json_for_target_repo,
35
load_isa_json,
@@ -6,7 +8,15 @@
68
from mars_lib.target_repo import TargetRepository, TARGET_REPO_KEY
79
import pytest
810
from pydantic import ValidationError
9-
from mars_lib.models.isa_json import Data, Material, Assay, Person, IsaJson
11+
from mars_lib.models.isa_json import (
12+
Data,
13+
Material,
14+
Assay,
15+
Person,
16+
IsaJson,
17+
Investigation,
18+
Study,
19+
)
1020
from mars_lib.models.repository_response import RepositoryResponse
1121
import json
1222

@@ -225,3 +235,21 @@ def test_update_study_materials_with_accession_categories():
225235
updated_investigation.studies[0].materials.samples[0].characteristics[-1].value
226236
== repo_response.accessions[1].value
227237
)
238+
239+
240+
def test_filename_validation():
241+
# ISA should have a filename that starts with 'x_'
242+
with pytest.raises(ValidationError, match=f"'filename' should start with 'i_'"):
243+
Investigation.model_validate({"@id": "1", "filename": "bad filename"})
244+
245+
with pytest.raises(ValidationError, match=f"'filename' should start with 's_'"):
246+
Study.model_validate({"@id": "2", "filename": "bad filename"})
247+
248+
with pytest.raises(ValidationError, match=f"'filename' should start with 'a_'"):
249+
Assay.model_validate({"@id": "3", "filename": "bad filename"})
250+
251+
assert re.match(r"^i_", "i_Good_file_name")
252+
253+
assert Investigation.model_validate({"@id": "4", "filename": "i_Good_File_Name"})
254+
assert Study.model_validate({"@id": "5", "filename": "s_Good_File_Name"})
255+
assert Assay.model_validate({"@id": "6", "filename": "a_Good_File_Name"})

test-data/biosamples-input-isa.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"submissionDate": "",
77
"publicReleaseDate": "",
88
"ontologySourceReferences": [],
9-
"filename": "Bob's investigation.txt",
9+
"filename": "i_Bob's investigation.txt",
1010
"comments": [
1111
{
1212
"name": "ISAjson export time",
@@ -60,7 +60,7 @@
6060
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
6161
"submissionDate": "",
6262
"publicReleaseDate": "",
63-
"filename": "Arabidopsis thaliana.txt",
63+
"filename": "s_Arabidopsis thaliana.txt",
6464
"comments": [
6565
{
6666
"name": "SEEK Study ID",
@@ -1008,4 +1008,4 @@
10081008
}
10091009
]
10101010
}
1011-
}
1011+
}

test-data/biosamples-modified-isa.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"submissionDate": "",
77
"publicReleaseDate": "",
88
"ontologySourceReferences": [],
9-
"filename": "Bob's investigation.txt",
9+
"filename": "i_Bob's investigation.txt",
1010
"comments": [
1111
{
1212
"name": "ISAjson export time",
@@ -60,7 +60,7 @@
6060
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
6161
"submissionDate": "",
6262
"publicReleaseDate": "",
63-
"filename": "Arabidopsis thaliana.txt",
63+
"filename": "s_Arabidopsis thaliana.txt",
6464
"comments": [
6565
{
6666
"name": "SEEK Study ID",
@@ -1019,4 +1019,4 @@
10191019
}
10201020
]
10211021
}
1022-
}
1022+
}

test-data/biosamples-original-isa-no-accesion-char.json

+2-2
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"submissionDate": "",
77
"publicReleaseDate": "",
88
"ontologySourceReferences": [],
9-
"filename": "Bob's investigation.txt",
9+
"filename": "i_Bob's investigation.txt",
1010
"comments": [
1111
{
1212
"name": "ISAjson export time",
@@ -60,7 +60,7 @@
6060
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
6161
"submissionDate": "",
6262
"publicReleaseDate": "",
63-
"filename": "Arabidopsis thaliana.txt",
63+
"filename": "s_Arabidopsis thaliana.txt",
6464
"comments": [
6565
{
6666
"name": "SEEK Study ID",

test-data/biosamples-original-isa.json

+3-3
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
"submissionDate": "",
77
"publicReleaseDate": "",
88
"ontologySourceReferences": [],
9-
"filename": "Bob's investigation.txt",
9+
"filename": "i_Bob's investigation.txt",
1010
"comments": [
1111
{
1212
"name": "ISAjson export time",
@@ -60,7 +60,7 @@
6060
"description": "Nucleic acid sequencing and metabolomics and proteomics of Arabidopsis thaliana in specific experimental conditions to test a specific hypothesis.\r\n",
6161
"submissionDate": "",
6262
"publicReleaseDate": "",
63-
"filename": "Arabidopsis thaliana.txt",
63+
"filename": "s_Arabidopsis thaliana.txt",
6464
"comments": [
6565
{
6666
"name": "SEEK Study ID",
@@ -1002,4 +1002,4 @@
10021002
}
10031003
]
10041004
}
1005-
}
1005+
}

0 commit comments

Comments
 (0)