-
Notifications
You must be signed in to change notification settings - Fork 0
/
dataset_schema.json
186 lines (186 loc) · 8.32 KB
/
dataset_schema.json
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
{
"id": "http://biocaddie.github.io/dats/dataset_schema.json#",
"$schema": "http://json-schema.org/draft-04/schema",
"title": "DATS Dataset Schema",
"description": "A set of dimensions about an entity being observed. A collection of data, published or curated by a single agent, and available for access or download in one or more formats (from DCAT: http://www.w3.org/TR/vocab-dcat/#Class:_Dataset). A body of structured information describing some topic(s) of interest (from: http://schema.org/Dataset).",
"type": "object",
"properties": {
"@context": {
"anyOf": [
{
"type": "string"
},
{
"type": "object"
}
]
},
"@type": { "type": "string", "enum": [ "Dataset"] },
"identifier": {
"$ref": "http://biocaddie.github.io/dats/identifier_info_schema.json#"
},
"alternateIdentifiers": {
"description": "Alternate identifiers for the dataset.",
"type": "array",
"items": {
"$ref": "http://biocaddie.github.io/dats/alternate_identifier_info_schema.json#"
}
},
"relatedIdentifiers": {
"description": "Related identifiers for the dataset.",
"type": "array",
"items": {
"$ref": "http://biocaddie.github.io/dats/related_identifier_info_schema.json#"
}
},
"title" : {
"description" : "The name of the dataset, usually one sentece or short description of the dataset.",
"type" : "string"
},
"description" : {
"description": "A textual narrative comprised of one or more statements describing the dataset.",
"type" : "string"
},
"dates" : {
"description": "Relevant dates for the datasets, a date must be added, e.g. creation date or last modification date should be added.",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/date_info_schema.json#"
}
},
"storedIn" : {
"description": "The data repository hosting the dataset.",
"$ref": "http://biocaddie.github.io/dats/data_repository_schema.json#"
},
"spatialCoverage": {
"description": "The geographical extension and span covered by the dataset and its measured dimensions/variables.",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/place_schema.json#"
}
},
"types" : {
"description": "A term, ideally from a controlled terminology, identifying the dataset type or nature of the data, placing it in a typology",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/data_type_schema.json#"
},
"minItems" : 1
},
"availability": {
"description": "A qualifier indicating the different types of availability for a dataset (available, unavailable, embargoed, available with restriction, information not available).",
"type": "string"
},
"refinement": {
"description": "A qualifier to describe the level of data processing of the dataset and its distributions.",
"type": "string"
},
"aggregation": {
"description": "A qualifier indicating if the entity represents an 'instance of dataset' or a 'collection of datasets'.",
"type": "string"
},
"privacy": {
"description": "A qualifier to describe the data protection applied to the dataset. This is relevant for clinical data.",
"type": "string"
},
"distributions" : {
"description": "The distribution(s) by which datasets are made available (for example: mySQL dump).",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/dataset_distribution_schema.json#"
}
},
"dimensions" : {
"description": "The different dimensions (granular components) making up a dataset.",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/dimension_schema.json#"
}
},
"primaryPublications" : {
"description": "The primary publication(s) associated with the dataset, usually describing how the dataset was produced.",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/publication_schema.json#"
}
},
"citations" : {
"description": "The publication(s) that cite this dataset.",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/publication_schema.json#"
}
},
"citationCount": {
"description": "The number of publications that cite this dataset (enumerated in the citations property)",
"type": "integer"
},
"producedBy" : {
"description": "A study process which generated a given dataset, if any.",
"$ref" : "http://biocaddie.github.io/dats/study_schema.json#"
},
"creators" : {
"description": "The person(s) or organization(s) which contributed to the creation of the dataset.",
"type" : "array",
"items" : {
"oneOf": [
{"$ref" : "http://biocaddie.github.io/dats/person_schema.json#"},
{"$ref" : "http://biocaddie.github.io/dats/organization_schema.json#"}
]
},
"minItems" : 1
},
"licenses": {
"description": "The terms of use of the dataset.",
"type": "array",
"items": {
"$ref": "http://biocaddie.github.io/dats/license_schema.json#"
}
},
"isAbout": {
"description" : "Different entities (biological entity, taxonomic information, disease, molecular entity, anatomical part, treatment) associated with this dataset.",
"type": "array",
"items": {
"anyOf": [
{"$ref" : "http://biocaddie.github.io/dats/biological_entity_schema.json#"},
{"$ref" : "http://biocaddie.github.io/dats/taxonomic_info_schema.json#"},
{"$ref" : "http://biocaddie.github.io/dats/disease_schema.json#"},
{"$ref" : "http://biocaddie.github.io/dats/molecular_entity_schema.json#"},
{"$ref" : "http://biocaddie.github.io/dats/anatomical_part_schema.json#"},
{"$ref" : "http://biocaddie.github.io/dats/treatment_schema.json#"},
{"$ref" : "http://biocaddie.github.io/dats/annotation_schema.json#"}
]
}
},
"hasPart" : {
"description": "A Dataset that is a subset of this Dataset; Datasets declaring the 'hasPart' relationship are considered a collection of Datasets, the aggregation criteria could be included in the 'description' field.",
"type": "array",
"items": {
"$ref" : "http://biocaddie.github.io/dats/dataset_schema.json#"
}
},
"acknowledges" : {
"description": "The grant(s) which funded and supported the work reported by the dataset.",
"type" : "array",
"items" : {
"$ref" : "http://biocaddie.github.io/dats/grant_schema.json#"
}
},
"keywords" : {
"description": "Tags associated with the dataset, which will help in its discovery.",
"type": "array",
"items": {
"$ref" : "http://biocaddie.github.io/dats/annotation_schema.json#"
}
},
"extraProperties": {
"description": "Extra properties that do not fit in the previous specified attributes. ",
"type": "array",
"items": {
"$ref" : "http://biocaddie.github.io/dats/category_values_pair_schema.json#"
}
}
},
"additionalProperties": false,
"required" : [ "title", "types", "creators" ]
}