import logging
from dataclasses import dataclass
from pathlib import Path
from types import ModuleType
from typing import Optional, TextIO, Type, Union
import yaml
from lark import Tree
from linkml_runtime import SchemaView
from linkml_runtime.linkml_model import SchemaDefinition
from linkml_runtime.processing.referencevalidator import ReferenceValidator
from linkml_runtime.utils.compile_python import compile_python
from linkml_runtime.utils.formatutils import camelcase
from pydantic import BaseModel
from semdsl.datamodel.semdsl_model import Disjunction, SchemaGrammar
from semdsl.importers.schema_importer import SchemaImporter
from semdsl.mapper.mapper import Mapper
from semdsl.writers.lark_writer import LarkWriter
logger = logging.getLogger(__name__)
[docs]@dataclass
class DSLEngine:
"""
Engine for generating DSLs from LinkML Schemas and for parsing serialization to these DSLs.
>>> from semdsl import DSLEngine
>>> engine = DSLEngine()
>>> engine.load_schema("examples/clue/model_clue.yaml") ## Annotated LinkML schema
>>> obj = engine.parse_as_dict("< Colonel Mustard in the Kitchen with the Candlestick >")
>>> print(obj)
{'person': 'Colonel Mustard', 'location': 'Kitchen', 'weapon': 'Candlestick'}
This works as follows:
- The LinkML schema is loaded from a YAML source
- A Lark grammar is generated from the LinkML schema, using annotations in the schema
- The Lark grammar is used to parse the input into a tree
- The tree is automatically transformed into an object conformant with the schema
"""
schemaview: Optional[SchemaView] = None
"""Wrapper onto a LinkML schema"""
_grammar: Optional[SchemaGrammar] = None
_lark_serialization: Optional[str] = None
_compiled_grammar_module: Optional[ModuleType] = None
_compiled_datamodel: Optional[ModuleType] = None
normalizer: Optional[ReferenceValidator] = None
"""Uses to normalize object structures prior to object initialization"""
[docs] def load_schema(self, schema: Union[str, TextIO, SchemaDefinition]) -> None:
"""Load a schema from a LinkML schema source.
>>> from semdsl import DSLEngine
>>> engine = DSLEngine()
>>> engine.load_schema("examples/clue/model_clue.yaml")
:param schema: path to a schema or a schema object
"""
self.schemaview = SchemaView(schema)
[docs] def parse_as_dict(self, input: str, start=None, target_class=None) -> dict:
"""
Parses input string to a raw object/dict.
>>> from semdsl import DSLEngine
>>> engine = DSLEngine()
>>> engine.load_schema("examples/clue/model_clue.yaml")
>>> obj = engine.parse_as_dict("< Colonel Mustard in the Kitchen with the Candlestick >")
>>> print(obj)
{'person': 'Colonel Mustard', 'location': 'Kitchen', 'weapon': 'Candlestick'}
:param input: input string to parse
:param start: start symbol in grammar
:param target_class: target data model class for parsing
:return: input parsed into a dict object conforming to the schema/data model
"""
if start is None:
start = self._get_start_symbol(target_class)
mod = self.compiled_grammar_module
tree = mod.grammar.parse(input, start=start)
mapper = Mapper(schemaview=self.schemaview, schemagrammar=self.grammar)
return mapper.transform(tree)
[docs] def parse_as_object(
self, input: str, start=None, target_class: Optional[Union[str, Type]] = None
) -> BaseModel:
"""
Parses the input string into a pydantic BaseModel.
.. note ::
this requires EITHER that the main linkml package is installed, OR that
the module for the pydantic data model is set using :ref:`compiled_datamodel`
>>> from semdsl import DSLEngine
>>> engine = DSLEngine()
>>> engine.load_schema("examples/clue/model_clue.yaml")
>>> obj = engine.parse_as_object("< Colonel Mustard in the Kitchen with the Candlestick >")
>>> print(type(obj))
<class 'test.ClueHypothesis'>
>>> print(obj.weapon)
Candlestick
>>> print(obj.dict())
{'person': 'Colonel Mustard', 'location': 'Kitchen', 'weapon': 'Candlestick'}
:param input: input string to parse
:param start: start symbol in grammar
:param target_class: target data model class for parsing
:return: pydantic object representing the parsed input
"""
if target_class is None:
if len(self.schemaview.all_classes()) == 1:
candidates = list(self.schemaview.all_classes().values())
else:
candidates = [x for x in self.schemaview.all_classes().values() if x.tree_root]
target_class = candidates[0].name
d = self.parse_as_dict(input, start=start, target_class=target_class)
module = self.compiled_datamodel
if isinstance(target_class, str):
py_class = module.__dict__[camelcase(target_class)]
else:
py_class = target_class
if self.grammar.normalize_collections:
if self.normalizer is None:
self.normalizer = ReferenceValidator(self.schemaview)
d = self.normalizer.normalize(d)
# print(f"Creating {py_class} from dict: {d}")
return py_class(**d)
@property
def grammar(self) -> SchemaGrammar:
"""Access the grammar object.
.. note ::
for most purposes, you do not need to access the grammar object directly.
If the grammar object is not explicitly set, this will
generate a grammar from the schema, using annotations if
present, and auto-generating otherwise.
>>> from semdsl import DSLEngine
>>> engine = DSLEngine()
>>> engine.load_schema("examples/clue/model_clue.yaml")
>>> grammar = engine.grammar
>>> print(grammar.rules[0].lhs_symbol)
class_clue_hypothesis
:return: a SchemaGrammar object
"""
if self._grammar is None:
self.generate_grammar()
return self._grammar
[docs] def load_grammar(self, grammar: Union[str, Path, TextIO, SchemaGrammar]) -> None:
"""Load a grammar from a DSL YAML file.
>>> from semdsl import DSLEngine
>>> engine = DSLEngine()
>>> engine.load_grammar("examples/linkml_lite/linkml_model_lite.semdsl.yaml")
>>> grammar = engine.grammar
>>> print(grammar.rules[0].lhs_symbol)
schema
>>> print(engine.lark_serialization)
from lark import Lark
...
schema : schema_id schema_name description? (comment|import|prefix|class|slot|type|enum)*
class : "class" class_name is_a? mixins? "{" description? class_uri? (comment|attribute|slot_usage|slots)* "}"
slot : "slot" slot_block
...
:param grammar: path to a YAML file containing the grammar using the semdsl metamodel
"""
if not isinstance(grammar, SchemaGrammar):
with open(grammar) as f:
obj = yaml.safe_load(f)
grammar = SchemaGrammar.parse_obj(obj)
self._grammar = grammar
[docs] def generate_grammar(self) -> None:
"""Generate a grammar from the current schema.
This will use annotations if present, and auto-generate otherwise.
.. note ::
if the grammar has previously been set, this will overwrite it.
"""
importer = SchemaImporter()
self._grammar = importer.convert(self.schemaview.schema)
@property
def lark_serialization(self) -> str:
"""Serialize as a lark program.
>>> from semdsl import DSLEngine
>>> engine = DSLEngine()
>>> engine.load_schema("examples/clue/model_clue.yaml")
>>> print(engine.lark_serialization)
from lark import Lark
...
class_clue_hypothesis : "<" person "in the" location "with the" weapon ">"
person : WORD WORD
location : WORD
weapon : WORD
...
:return: a string containing the lark program
"""
if self._lark_serialization is None:
writer = LarkWriter()
self._lark_serialization = writer.dumps(self.grammar)
return self._lark_serialization
@property
def compiled_grammar_module(self) -> ModuleType:
"""Get the python module for the compiled grammar.
Note that most clients do not need to use this directly.
"""
if self._compiled_grammar_module is None:
ser = self.lark_serialization
self._compiled_grammar_module = compile_python(ser)
return self._compiled_grammar_module
@property
def compiled_datamodel(self) -> ModuleType:
"""Get the pydantic module for the current data model."""
if self._compiled_datamodel is None:
from linkml.generators.pydanticgen import PydanticGenerator
ser = PydanticGenerator(self.schemaview.schema).serialize()
self._compiled_datamodel = compile_python(ser)
return self._compiled_datamodel
@compiled_datamodel.setter
def compiled_datamodel(self, module: ModuleType) -> None:
"""
Set the compiled pydantic datamodel module.
:param module:
:return:
"""
self._compiled_datamodel = module
[docs] def parse_tree(self, input: str, start=None, target_class=None) -> Tree:
"""
Generates Lark parse tree using grammar from input string.
:param input:
:param start:
:param target_class:
:return:
"""
if start is None:
start = self._get_start_symbol(target_class)
mod = self.compiled_grammar_module
return mod.grammar.parse(input, start=start)
def _get_start_symbol(self, target_class: Optional[Union[str, Type]] = None) -> str:
if len(self.grammar.start_symbols) == 1:
return self.grammar.start_symbols[0]
if target_class is None:
[target_class] = [x.name for x in self.schemaview.all_classes().values() if x.tree_root]
if not isinstance(target_class, str):
target_class = target_class.__name__
for rule in self.grammar.rules:
if rule.source_class == target_class:
return rule.lhs_symbol
if isinstance(rule.rhs, Disjunction):
for op in rule.rhs.operands:
if op.source_class == target_class:
return rule.lhs_symbol
for rule in self.grammar.rules:
if rule.lhs_symbol == target_class:
return rule.lhs_symbol
raise ValueError(f"Cannot find start symbol for {target_class}")