import click
import logging
import yaml
from dataclasses import dataclass
from typing import List, Union, Iterator
from linkml_runtime.linkml_model import SchemaDefinition, Element, PermissibleValue, ClassDefinition, SlotDefinition
from linkml_runtime.utils.metamodelcore import Curie
from linkml_runtime.utils.schemaview import SchemaView, re, EnumDefinition
from oaklib import BasicOntologyInterface
from oaklib.datamodels.search import SearchConfiguration
from oaklib.datamodels.text_annotator import TextAnnotation
from oaklib.interfaces import SearchInterface
from oaklib.interfaces.text_annotator_interface import TextAnnotatorInterface
from schema_automator.utils.schemautils import minify_schema
camel_case_pattern = re.compile(r'(?<!^)(?=[A-Z])')
def uncamel(n: str):
# TODO: replace with equiv from linkml-runtime
return camel_case_pattern.sub(' ', n).lower().replace('_', ' ')
[docs]
@dataclass
class SchemaAnnotator:
"""
An engine for enhancing schemas by performing lookup and annotation operations
using an ontology service.
A SchemaAnnotator wraps an OAK ontology interface.
See `OAK documentation <https://incatools.github.io/ontology-access-kit/>`_ for more details
"""
ontology_implementation: BasicOntologyInterface = None
mine_descriptions: bool = False
allow_partial: bool = False
curie_only: bool = True
assign_element_uris: bool = False
assign_enum_meanings: bool = False
[docs]
def annotate_element(self, elt: Union[PermissibleValue, Element]) -> None:
"""
Annotates an element or a permissible value
:param elt:
:return:
"""
if isinstance(elt, Element):
texts = [elt.name]
elif isinstance(elt, PermissibleValue):
texts = [elt.text]
else:
raise ValueError(f"Unexpected type {type(elt)}")
texts += elt.aliases
if self.mine_descriptions and elt.description:
texts.append(elt.description)
for text in texts:
logging.info(f"Annotating: {text}")
for r in self.annotate_text(text):
logging.debug(f'MATCH: {r}')
if self.allow_partial or r.matches_whole_text:
xref = r.object_id
if self.curie_only and not Curie.is_curie(xref):
continue
logging.info(f'Mapping from "{text}" to {xref}')
if isinstance(elt, PermissibleValue):
if self.assign_enum_meanings:
if not elt.meaning:
elt.meaning = xref
continue
else:
if self.assign_element_uris:
if isinstance(elt, ClassDefinition):
if not elt.class_uri:
elt.class_uri = xref
continue
if isinstance(elt, SlotDefinition):
if not elt.slot_uri:
elt.slot_uri = xref
continue
if isinstance(elt, EnumDefinition):
if not elt.enum_uri:
elt.enum_uri = xref
continue
if xref not in elt.exact_mappings:
elt.exact_mappings.append(xref)
def annotate_text(self, text: str) -> Iterator[TextAnnotation]:
# this is a wrapper over OAK annotation and search;
# it (1) expands CamelCase (2) abstracts over annotation vs search
# TODO: fold this functionality back into OAK
oi = self.ontology_implementation
text_exp = uncamel(text) # TODO: use main linkml_runtime method
if isinstance(oi, TextAnnotatorInterface):
logging.debug(f"Using TextAnnotatorInterface on {text_exp}")
# TextAnnotation is available; use this by default
for r in oi.annotate_text(text_exp):
yield r
if text_exp != text.lower():
for r in oi.annotate_text(text_exp):
yield r
elif isinstance(oi, SearchInterface):
logging.debug(f"Using SearchInterface on {text_exp}")
# use search as an alternative
cfg = SearchConfiguration(is_complete=True)
for r in oi.basic_search(text, config=cfg):
yield TextAnnotation(object_id=r, matches_whole_text=True)
if text_exp != text.lower():
for r in oi.basic_search(text_exp, config=cfg):
yield TextAnnotation(object_id=r, matches_whole_text=True)
else:
raise NotImplementedError
[docs]
def annotate_schema(self, schema: Union[SchemaDefinition, str]) -> SchemaDefinition:
"""
Annotate all elements of a schema, adding mappings.
This requires that the OntologyInterface implements either BasicOntologyInterface or SearchInterface
"""
sv = SchemaView(schema)
oi = self.ontology_implementation
for elt_name, elt in sv.all_elements().items():
self.annotate_element(elt)
for e in sv.all_enums().values():
for pv in e.permissible_values.values():
self.annotate_element(pv)
return sv.schema
[docs]
def enrich(self, schema: Union[SchemaDefinition, str]) -> SchemaDefinition:
"""
Enrich a schema by performing lookups on the external ontology/vocabulary endpoint,
and copying over metadata
Currently, the only metadata obtained is text definitions
.. code-block:: python
>>> from schema_automator.annotators.schema_annotator import SchemaAnnotator
>>> from oaklib.selector import get_implementation_from_shorthand
>>> oi = get_implementation_from_shorthand("sqlite:obo:so")
>>> sa = SchemaAnnotator(ontology_implementation=oi)
>>> schema = sa.enrich("tests/data/schema.yaml")
:param schema:
:return:
"""
sv = SchemaView(schema)
oi = self.ontology_implementation
for elt_name, elt in sv.all_elements().items():
logging.debug(f"Enriching {elt_name}")
if isinstance(elt, EnumDefinition):
curies = []
for pv in elt.permissible_values.values():
if pv.meaning:
pv_curies = [pv.meaning]
else:
pv_curies = []
self._add_description_from_curies(pv, pv_curies)
else:
curies = [sv.get_uri(elt)]
for rel, ms in sv.get_mappings(elt_name).items():
curies += ms
self._add_description_from_curies(elt, curies)
return sv.schema
def _add_description_from_curies(self, elt: Union[Element, PermissibleValue], curies: List[str]):
oi = self.ontology_implementation
logging.info(f"Looking up descriptions using {curies}")
for x in curies:
logging.info(f"Fetching description using: {curies}")
if elt.description:
break
try:
defn = oi.definition(x)
if defn:
elt.description = defn
else:
mm = oi.entity_metadata_map(x)
logging.debug(f"MM={mm}")
for p in ['rdfs:comment', 'skos:definition', 'dcterms:description']:
if p in mm:
elt.description = mm[p]
last
except Exception:
pass
@click.command()
@click.argument('schema')
@click.option('--input', '-i', help="OAK input ontology selector")
@click.option('--output', '-o', help="Path to saved yaml schema")
def annotate_schema(schema: str, input: str, output: str, **args):
"""
Annotate all elements of a schema.
DEPRECATED: use main schemauto CLI instead
"""
logging.basicConfig(level=logging.INFO)
annr = SchemaAnnotator()
schema = annr.annotate_schema(schema)
sd = minify_schema(schema)
if output:
with open(output, 'w') as stream:
yaml.safe_dump(sd, stream, sort_keys=False)
else:
print(yaml.safe_dump(sd, sort_keys=False))
if __name__ == '__main__':
annotate_schema()