Skip to content

Commit

Permalink
Rewrite load/normalize/lookup logic for INSPIRE vocabularies
Browse files Browse the repository at this point in the history
This should resolve #138. It also addresses #137 but tests have to be added.
  • Loading branch information
drmalex07 committed Jun 7, 2015
1 parent 20310c5 commit d533046
Show file tree
Hide file tree
Showing 5 changed files with 54 additions and 34 deletions.
11 changes: 11 additions & 0 deletions ckanext/publicamundi/lib/metadata/types/_common.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,17 @@ class FreeKeyword(Object):
reference_date = None
date_type = None

@classmethod
def normalize_keyword(cls, s):
from inflection import dasherize, underscore
return dasherize(underscore(unicode(s)))

def __init__(self, **kwargs):
value = kwargs.get('value')
if value:
kwargs['value'] = self.normalize_keyword(value)
super(FreeKeyword, self).__init__(**kwargs)

@object_null_adapter()
class GeographicBoundingBox(Object):

Expand Down
4 changes: 1 addition & 3 deletions ckanext/publicamundi/lib/metadata/types/inspire_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -192,10 +192,9 @@ def to_responsible_party(alist):
thes_version = None
else:
thes_version = re.sub(r'^[ ]*version[ ]+(\d\.\d)$', r'\1', thes_version)
thes_name = 'keywords-' + vocabularies.munge(thes_title)
# Note thes_version can be used to enforce a specific thesaurus version
try:
thes = Thesaurus.lookup(name=thes_name)
thes = Thesaurus.lookup(title=thes_title, for_keywords=True)
except ValueError:
thes = None
# Treat present keywords depending on if they belong to a thesaurus
Expand All @@ -214,7 +213,6 @@ def to_responsible_party(alist):
vocab_date = to_date(it['thesaurus']['date'])
vocab_datetype = it['thesaurus']['datetype']
for keyword in it['keywords']:
# Todo Maybe convert keyword to a canonical form (e.g. munge)
free_keywords.append(FreeKeyword(
value = keyword,
reference_date = vocab_date,
Expand Down
39 changes: 24 additions & 15 deletions ckanext/publicamundi/lib/metadata/types/thesaurus.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,31 +22,40 @@ class Thesaurus(Object):

@property
def vocabulary(self):
spec = vocabularies.get_by_name(self.name)
return spec.get('vocabulary') if spec else None
vocab = vocabularies.get_by_name(self.name)
return vocab.get('vocabulary') if vocab else None

# Factory for Thesaurus

@classmethod
def lookup(cls, name):
'''Lookup a thesaurus by it's name and return a Thesaurus instance.
The metadata for a newly created thesaurus are queried from vocabularies
module.
def lookup(cls, name=None, title=None, for_keywords=False):
'''Lookup by name or title and return a Thesaurus instance.
This is a factory method that tries to instantiate a Thesaurus object
from a collection of well-known (mostly related to INSPIRE) vocabularies.
'''

vocab = None

if (name is None) and title:
name = vocabularies.normalize_thesaurus_title(title, for_keywords)

if name:
vocab = vocabularies.get_by_name(name)
else:
raise ValueError('Expected a name/title lookup')

spec = vocabularies.get_by_name(name)
if spec:
if vocab:
kwargs = {
'title': spec.get('title'),
'name': spec.get('name'),
'reference_date': spec.get('reference_date'),
'version' : spec.get('version'),
'date_type': spec.get('date_type'),
'title': vocab.get('title'),
'name': vocab.get('name'),
'reference_date': vocab.get('reference_date'),
'version' : vocab.get('version'),
'date_type': vocab.get('date_type'),
}
return cls(**kwargs)
else:
raise ValueError(
'Cannot find an INSPIRE thesaurus named "%s"' %(name))
raise ValueError('Cannot find a thesaurus named "%s"' %(name))

@object_null_adapter()
class ThesaurusTerms(Object):
Expand Down
8 changes: 3 additions & 5 deletions ckanext/publicamundi/lib/metadata/vocabularies/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,13 @@

# Import loader

from ckanext.publicamundi.lib.metadata.vocabularies import json_loader

munge = json_loader.munge
from ckanext.publicamundi.lib.metadata.vocabularies.json_loader import (
make_vocabularies, normalize_keyword, normalize_thesaurus_title)

def _update(data_file, name_prefix='', overwrite=False):
'''Update the module-global vocabularies from external JSON data.
'''

for name, desc in json_loader.make_vocabularies(data_file):
for name, desc in make_vocabularies(data_file):
assert overwrite or not (name in vocabularies), (
'A vocabulary named %r is allready loaded' % (name))
vocabularies[name_prefix + name] = desc
Expand Down
26 changes: 15 additions & 11 deletions ckanext/publicamundi/lib/metadata/vocabularies/json_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,10 +6,8 @@
import zope.schema
from zope.schema.vocabulary import SimpleVocabulary, SimpleTerm

def munge(name):
'''Convert human-friendly to machine-friendly terms.
Needed when a machine-friendly version is not supplied.
def _munge(name):
'''Convert human-friendly to machine-friendly names.
'''

re_bad = re.compile('[\(\),]+')
Expand All @@ -23,24 +21,31 @@ def munge(name):

return name

def normalize_keyword(name):
return _munge(name)

def normalize_thesaurus_title(name, for_keywords=False):
if not for_keywords:
return _munge(name)
else:
return _munge('keywords' + ' ' + name)

def make_vocabulary(data):
'''Convert raw data to a SimpleVocabulary instance.
The input data can be one of the following:
* a list of human-readable terms or a
* a dict that maps machine-readable to human-readable terms.
'''

# Note: A SimpleTerm is a tuple (value, token, title)

terms = []
if isinstance(data, list):
for t in data:
k = munge(t)
k = normalize_keyword(t)
terms.append(SimpleTerm(k, t, t))
elif isinstance(data, dict):
for k, t in data.items():
#k = munge(k)
#k = normalize_keyword(k)
terms.append(SimpleTerm(k, t, t))
return SimpleVocabulary(terms, swallow_duplicates=True)

Expand All @@ -55,7 +60,7 @@ def make_vocabularies(data_file):
data = json.loads(fp.read())

for title in (set(data.keys()) - set(['Keywords'])):
name = munge(title)
name = normalize_thesaurus_title(title)
desc = {
'name': name,
'title': title,
Expand All @@ -67,8 +72,7 @@ def make_vocabularies(data_file):
for title in keywords_data.keys():
keywords = keywords_data.get(title)
keywords_terms = make_vocabulary(keywords.get('terms'))

name = munge('Keywords-' + title)
name = normalize_thesaurus_title(title, for_keywords=True)
desc = {
'name': name,
'title': title,
Expand Down

0 comments on commit d533046

Please sign in to comment.