Skip to content

Commit 7f562f3

Browse files
committed
add some demo schema structure
1 parent 11e31bb commit 7f562f3

File tree

6 files changed

+176
-35
lines changed

6 files changed

+176
-35
lines changed

docs/schemas/README.md

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# Refget schemas
2+
3+
The Refget specifications use schemas to describe data for several purposes. Eventually we intend to provide these via a [GA4GH schema registry](https://ga4gh.github.io/schema-registry/), once it exists. Until then, the schemas are stored here, and you can access them directly like this:
4+
5+
6+
| Schema name and link | Description |
7+
| ---------------------------- | ---------------------------- |
8+
| [seqcol_minimal_v1.0.0.json](seqcol_minimal_v1.0.0.json) | Minimal schema, describes only the 3 primary attributes (names, lengths, and sequences) |
9+
| [seqcol_extended_v1.0.0.json](seqcol_extended_v1.0.0.json) | Extended schema, adds descriptions to |
10+
| [seqcol_refs_v1.0.0.json](seqcol_refs_v1.0.0.json) | Minimal schema, using references |
11+
12+
Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
{
2+
"description": "A collection of biological sequences.",
3+
"type": "object",
4+
"properties": {
5+
"lengths": {
6+
"type": "array",
7+
"collated": true,
8+
"description": "Number of elements, such as nucleotides or amino acids, in each sequence.",
9+
"items": {
10+
"type": "integer"
11+
}
12+
},
13+
"names": {
14+
"type": "array",
15+
"collated": true,
16+
"description": "Human-readable labels of each sequence (chromosome names).",
17+
"items": {
18+
"type": "string"
19+
}
20+
},
21+
"sequences": {
22+
"type": "array",
23+
"collated": true,
24+
"items": {
25+
"type": "string",
26+
"description": "Refget sequences v2 identifiers for sequences."
27+
}
28+
},
29+
"name_length_pairs": {
30+
"type": "array",
31+
"collated": true,
32+
"description": "Human-readable labels paired with number of elements in each sequence.",
33+
"items": {
34+
"type": "object",
35+
"properties": {
36+
"name": {
37+
"type": "string",
38+
"description": "Human-readable label of the sequence (chromosome name)."
39+
},
40+
"length": {
41+
"type": "integer",
42+
"description": "Number of elements, such as nucleotides or amino acids, in the sequence."
43+
}
44+
},
45+
"required": [
46+
"name",
47+
"length"
48+
]
49+
}
50+
},
51+
"sorted_name_length_pairs": {
52+
"type": "array",
53+
"collated": true,
54+
"description": "Human-readable labels paired with number of elements in each sequence, sorted lexicographically.",
55+
"items": {
56+
"type": "object",
57+
"properties": {
58+
"name": {
59+
"type": "string",
60+
"description": "Human-readable label of the sequence (chromosome name)."
61+
},
62+
"length": {
63+
"type": "integer",
64+
"description": "Number of elements, such as nucleotides or amino acids, in the sequence."
65+
}
66+
},
67+
"required": [
68+
"name",
69+
"length"
70+
]
71+
}
72+
},
73+
"sorted_sequences": {
74+
"type": "array",
75+
"collated": true,
76+
"items": {
77+
"type": "string",
78+
"description": "Refget sequences v2 identifiers for sequences."
79+
}
80+
},
81+
},
82+
"required": [
83+
"names",
84+
"lengths",
85+
"sequences"
86+
],
87+
"ga4gh": {
88+
"inherent": [
89+
"names",
90+
"sequences"
91+
],
92+
"transient": [
93+
"sorted_name_length_pairs"
94+
]
95+
}
96+
}
Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
{
2+
"description": "A collection of biological sequences.",
3+
"type": "object",
4+
"properties": {
5+
"lengths": {
6+
"type": "array",
7+
"collated": true,
8+
"description": "Number of elements, such as nucleotides or amino acids, in each sequence.",
9+
"items": {
10+
"type": "integer"
11+
}
12+
},
13+
"names": {
14+
"type": "array",
15+
"collated": true,
16+
"description": "Human-readable labels of each sequence (chromosome names).",
17+
"items": {
18+
"type": "string"
19+
}
20+
},
21+
"sequences": {
22+
"type": "array",
23+
"collated": true,
24+
"items": {
25+
"type": "string",
26+
"description": "Refget sequences v2 identifiers for sequences."
27+
}
28+
}
29+
},
30+
"required": [
31+
"names",
32+
"lengths",
33+
"sequences"
34+
],
35+
"ga4gh": {
36+
"inherent": [
37+
"names",
38+
"sequences"
39+
]
40+
}
41+
}

docs/schemas/seqcol_minimal_v1.0.0.json

Lines changed: 0 additions & 35 deletions
This file was deleted.
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"description": "A collection of biological sequences.",
3+
"type": "object",
4+
"$id": "https://ga4gh.github.io/refget/schemas/seqcol_minimal_v1.0.0.json",
5+
"properties": {
6+
"lengths": {
7+
"$ref": "/lengths"
8+
},
9+
"names": {
10+
"$ref": "/names"
11+
},
12+
"sequences": {
13+
"$ref": "/sequences"
14+
}
15+
},
16+
"required": [
17+
"names",
18+
"sequences"
19+
],
20+
"ga4gh": {
21+
"inherent": [
22+
"names",
23+
"sequences"
24+
]
25+
}
26+
}

mkdocs.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@ nav:
1616
- API: implementation_api.md
1717
- Reference:
1818
- How to cite: citation.md
19+
- Schemas: schemas/README.md
1920
- Decision record: decision_record.md
2021
- Contributing: contributing.md
2122

0 commit comments

Comments
 (0)