Skip to content

Commit

Permalink
Allow possibility to add version to schema metadata (#2600)
Browse files Browse the repository at this point in the history
  • Loading branch information
geoffreyaldebert authored May 25, 2021
1 parent 32d906a commit dd7bc85
Show file tree
Hide file tree
Showing 10 changed files with 204 additions and 32 deletions.
3 changes: 2 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@

## Current (in progress)

- Nothing yet
- Modify `schema` field to resource. This field is now a nested field containing two sub-properties `name` and `version` [#2600](https://github.com/opendatateam/udata/pull/2600).
- Add a `schema_version` facet to the dataset search (need to be reindex to appear in results) [#2600](https://github.com/opendatateam/udata/pull/2600).

## 2.6.5 (2021-05-19)

Expand Down
2 changes: 1 addition & 1 deletion js/components/dataset/resource/form.vue
Original file line number Diff line number Diff line change
Expand Up @@ -255,7 +255,7 @@ export default {
if (this.hasSchemas) {
const values = [{id: '', label: ''}].concat(schemas.data);
return [{
id: 'schema',
id: 'schema.name',
label: this._('Schema'),
widget: 'select-input',
values,
Expand Down
1 change: 0 additions & 1 deletion udata/core/dataset/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -574,7 +574,6 @@ def get(self):
return [{'id': id, 'label': label}
for id, label in RESOURCE_TYPES.items()]


@ns.route('/schemas/', endpoint='schemas')
class SchemasAPI(API):
@api.doc('schemas')
Expand Down
8 changes: 4 additions & 4 deletions udata/core/dataset/api_fields.py
Original file line number Diff line number Diff line change
Expand Up @@ -76,9 +76,7 @@
'loaded as a standalone page (ie. iframe or '
'new page)',
readonly=True),
'schema': fields.String(
description='The schema slug the resource adheres to',
allow_null=True),
'schema': fields.Raw(description='Reference to the associated schema', readonly=True),
})

upload_fields = api.inherit('UploadedResource', resource_fields, {
Expand Down Expand Up @@ -214,7 +212,9 @@
'label': fields.String(description='The resource type display name')
})


schema_fields = api.model('Schema', {
'id': fields.String(description='The schema identifier'),
'label': fields.String(description='The schema display name')
'label': fields.String(description='The schema display name'),
'versions': fields.List(fields.String, description='The available versions of the schema'),
})
38 changes: 29 additions & 9 deletions udata/core/dataset/forms.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,13 +33,33 @@ def normalize_format(data):

def enforce_allowed_schemas(form, field):
schema = field.data
allowed_schemas = [s['id'] for s in ResourceSchema.objects()]
if schema not in allowed_schemas:
message = _('Schema "{schema}" is not an allowed value. Allowed values: {values}')
raise validators.ValidationError(message.format(
schema=schema,
values=', '.join(allowed_schemas)
))
if schema:
allowed_schemas = [s['id'] for s in ResourceSchema.objects()]
if schema.get('name') not in allowed_schemas:
message = _('Schema name "{schema}" is not an allowed value. Allowed values: {values}')
raise validators.ValidationError(message.format(
schema=schema.get('name'),
values=', '.join(allowed_schemas)
))

schema_versions = [d['versions'] for d in ResourceSchema.objects() if d['id'] == schema.get('name')]
allowed_versions = schema_versions[0] if schema_versions else []
allowed_versions.append('latest')
if 'version' in schema:
if schema.get('version') not in allowed_versions:
message = _('Version "{version}" is not an allowed value. Allowed values: {values}')
raise validators.ValidationError(message.format(
version=schema.get('version'),
values=', '.join(allowed_versions)
))
properties = ['name', 'version']
for prop in schema:
if prop not in properties:
message = _('Sub-property "{prop}" is not allowed value in schema field. Allowed values is : {properties}')
raise validators.ValidationError(message.format(
prop=prop,
properties=', '.join(properties),
))


class BaseResourceForm(ModelForm):
Expand Down Expand Up @@ -74,9 +94,9 @@ class BaseResourceForm(ModelForm):
_('Publication date'),
description=_('The publication date of the resource'))
extras = fields.ExtrasField()
schema = fields.StringField(
schema = fields.DictField(
_('Schema'),
default=None,
default={},
validators=[validators.optional(), enforce_allowed_schemas],
description=_('The schema slug the resource adheres to'))

Expand Down
8 changes: 6 additions & 2 deletions udata/core/dataset/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -239,7 +239,7 @@ class ResourceMixin(object):
filesize = db.IntField() # `size` is a reserved keyword for mongoengine.
fs_filename = db.StringField()
extras = db.ExtrasField()
schema = db.StringField()
schema = db.DictField()

created_at = db.DateTimeField(default=datetime.now, required=True)
modified = db.DateTimeField(default=datetime.now, required=True)
Expand Down Expand Up @@ -799,7 +799,11 @@ def objects():
else:
schemas = response.json().get('schemas', [])
content = [
{'id': s['name'], 'label': s['title']} for s in schemas
{
'id': s['name'],
'label': s['title'],
'versions': [d['version_name'] for d in s['versions']],
} for s in schemas
]
cache.set(cache_key, content)
# no cached version or no content
Expand Down
6 changes: 4 additions & 2 deletions udata/core/dataset/search.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ class Meta:
'description': String(),
'format': String(index='not_analyzed'),
'schema': String(index='not_analyzed'),
'schema_version': String(index='not_analyzed'),
})
format_suggest = Completion(analyzer=simple,
search_analyzer=simple,
Expand Down Expand Up @@ -136,6 +137,7 @@ class Meta:
labelizer=granularity_labelizer),
'format': TermsFacet(field='resources.format'),
'schema': TermsFacet(field='resources.schema'),
'schema_version': TermsFacet(field='resources.schema_version'),
'resource_type': TermsFacet(field='resources.type',
labelizer=resource_type_labelizer),
'reuses': RangeFacet(field='metrics.reuses',
Expand Down Expand Up @@ -190,7 +192,6 @@ def serialize(cls, dataset):
image_url = owner.avatar(40, external=True)

certified = organization and organization.certified

document = {
'title': dataset.title,
'description': dataset.description,
Expand All @@ -204,7 +205,8 @@ def serialize(cls, dataset):
'description': r.description,
'format': r.format,
'type': r.type,
'schema': r.schema,
'schema': dict(r.schema).get('name', {}),
'schema_version': dict(r.schema).get('version', {}),
}
for r in dataset.resources],
'format_suggest': [r.format.lower()
Expand Down
32 changes: 32 additions & 0 deletions udata/migrations/2021-04-08-update-schema-with-new-structure.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
'''
The purpose here is to update every resource's metadata 'schema'
with a new format (string to object)
'''
import logging

from udata.models import Dataset

log = logging.getLogger(__name__)


def migrate(db):
log.info('Processing resources.')

datasets = Dataset.objects().no_cache().timeout(False)
for dataset in datasets:
save_res = False
for resource in dataset.resources:
if hasattr(resource, 'schema'):
schema = resource.schema
resource.schema = {'name': None}
if schema is not None and isinstance(schema, str):
resource.schema = {'name': schema}
save_res = True
if save_res:
try:
dataset.save()
except Exception as e:
log.warning(e)
pass

log.info('Completed.')
90 changes: 81 additions & 9 deletions udata/tests/api/test_datasets_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -1331,7 +1331,7 @@ def test_community_resource_api_update_with_file(self):
'new description')
self.assertTrue(
CommunityResource.objects.first().url.endswith('test.txt'))

def test_community_resource_file_update_old_file_deletion(self):
'''It should update a community resource's file and delete the old one'''
dataset = VisibleDatasetFactory()
Expand Down Expand Up @@ -1454,6 +1454,7 @@ def test_community_resource_api_delete(self):
self.assertEqual(CommunityResource.objects.count(), 0)
self.assertEqual(list(storages.resources.list_files()), [])


class ResourcesTypesAPITest(APITestCase):

def test_resource_types_list(self):
Expand All @@ -1473,13 +1474,39 @@ def test_dataset_schemas_api_list(self, api, rmock, app):
app.config['SCHEMA_CATALOG_URL'] = 'https://example.com/schemas'

rmock.get('https://example.com/schemas', json={
'schemas': [{"name": "etalab/schema-irve", "title": "Schéma IRVE"}]
"schemas": [
{
"name": "etalab/schema-irve",
"title": "Schéma IRVE",
"versions": [
{
"version_name": "1.0.0"
},
{
"version_name": "1.0.1"
},
{
"version_name": "1.0.2"
}
]
}
]
})

response = api.get(url_for('api.schemas'))

print(response.json)
assert200(response)
assert response.json == [{"id": "etalab/schema-irve", "label": "Schéma IRVE"}]
assert response.json == [
{
"id": "etalab/schema-irve",
"label": "Schéma IRVE",
"versions": [
"1.0.0",
"1.0.1",
"1.0.2"
]
}
]

@pytest.mark.options(SCHEMA_CATALOG_URL=None)
def test_dataset_schemas_api_list_no_catalog_url(self, api):
Expand All @@ -1503,15 +1530,51 @@ def test_dataset_schemas_api_list_error_no_cache(self, api, rmock):
@pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
def test_dataset_schemas_api_list_error_w_cache(self, api, rmock, mocker):
cache_mock_set = mocker.patch.object(cache, 'set')
mocker.patch.object(cache, 'get', return_value=[{"id": "etalab/schema-irve", "label": "Schéma IRVE"}])
mocker.patch.object(cache, 'get', return_value=[
{
"id": "etalab/schema-irve",
"label": "Schéma IRVE",
"versions": [
"1.0.0",
"1.0.1",
"1.0.2"
]
}
])

# Fill cache
rmock.get('https://example.com/schemas', json={
'schemas': [{"name": "etalab/schema-irve", "title": "Schéma IRVE"}]
"schemas": [
{
"name": "etalab/schema-irve",
"title": "Schéma IRVE",
"versions": [
{
"version_name": "1.0.0"
},
{
"version_name": "1.0.1"
},
{
"version_name": "1.0.2"
}
]
}
]
})
response = api.get(url_for('api.schemas'))
assert200(response)
assert response.json == [{"id": "etalab/schema-irve", "label": "Schéma IRVE"}]
assert response.json == [
{
"id": "etalab/schema-irve",
"label": "Schéma IRVE",
"versions": [
"1.0.0",
"1.0.1",
"1.0.2"
]
}
]
assert cache_mock_set.called

# Endpoint becomes unavailable
Expand All @@ -1520,5 +1583,14 @@ def test_dataset_schemas_api_list_error_w_cache(self, api, rmock, mocker):
# Long term cache is used
response = api.get(url_for('api.schemas'))
assert200(response)
assert response.json == [{"id": "etalab/schema-irve", "label": "Schéma IRVE"}]

assert response.json == [
{
"id": "etalab/schema-irve",
"label": "Schéma IRVE",
"versions": [
"1.0.0",
"1.0.1",
"1.0.2"
]
}
]
48 changes: 45 additions & 3 deletions udata/tests/dataset/test_dataset_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -437,10 +437,36 @@ def test_resource_schema_objects_timeout_no_cache(self, client, rmock):
@pytest.mark.options(SCHEMA_CATALOG_URL='https://example.com/schemas')
def test_resource_schema_objects(self, app, rmock):
rmock.get('https://example.com/schemas', json={
'schemas': [{"name": "etalab/schema-irve", "title": "Schéma IRVE"}]
"schemas": [
{
"name": "etalab/schema-irve",
"title": "Schéma IRVE",
"versions": [
{
"version_name": "1.0.0"
},
{
"version_name": "1.0.1"
},
{
"version_name": "1.0.2"
}
]
}
]
})

assert ResourceSchema.objects() == [{"id": "etalab/schema-irve", "label": "Schéma IRVE"}]
assert ResourceSchema.objects() == [
{
"id": "etalab/schema-irve",
"label": "Schéma IRVE",
"versions": [
"1.0.0",
"1.0.1",
"1.0.2"
]
}
]

@pytest.mark.options(SCHEMA_CATALOG_URL=None)
def test_resource_schema_objects_no_catalog_url(self):
Expand All @@ -453,7 +479,23 @@ def test_resource_schema_objects_w_cache(self, rmock, mocker):

# fill cache
rmock.get('https://example.com/schemas', json={
'schemas': [{"name": "etalab/schema-irve", "title": "Schéma IRVE"}]
"schemas": [
{
"name": "etalab/schema-irve",
"title": "Schéma IRVE",
"versions": [
{
"version_name": "1.0.0"
},
{
"version_name": "1.0.1"
},
{
"version_name": "1.0.2"
}
]
}
]
})
ResourceSchema.objects()
assert cache_mock_set.called
Expand Down

0 comments on commit dd7bc85

Please sign in to comment.