Source code for linkml.utils.schemasynopsis

from dataclasses import dataclass, field
from typing import Dict, List, Set, Union

from linkml_runtime.linkml_model.meta import (
    ClassDefinition,
    ClassDefinitionName,
    Definition,
    DefinitionName,
    Element,
    ElementName,
    EnumDefinition,
    EnumDefinitionName,
    SchemaDefinition,
    SlotDefinition,
    SlotDefinitionName,
    SubsetDefinitionName,
    TypeDefinition,
    TypeDefinitionName,
)
from linkml_runtime.utils.metamodelcore import empty_dict
from linkml_runtime.utils.yamlutils import TypedNode
from rdflib import URIRef

from linkml.utils.typereferences import (
    ClassType,
    EnumType,
    References,
    RefType,
    SlotType,
    SubsetType,
    TypeType,
)


[docs]def empty_references() -> field: return field(default_factory=References)
ClassOrSlotName = Union[ClassDefinitionName, TypeDefinitionName]
[docs]@dataclass class SchemaSynopsis: schema: SchemaDefinition = field(repr=False, compare=False) # References by type -- set by add_ref typerefs: Dict[TypeDefinitionName, References] = empty_dict() # Type name to all references slotrefs: Dict[SlotDefinitionName, References] = empty_dict() # Slot name to all references classrefs: Dict[ClassDefinitionName, References] = empty_dict() # Class name to all references subsetrefs: Dict[SubsetDefinitionName, References] = empty_dict() # Subset name to references enumrefs: Dict[EnumDefinitionName, References] = empty_dict() # Enum name to references # Type specific typebases: Dict[str, Set[TypeDefinitionName]] = empty_dict() # Base referencing types (direct and indirect) typeofs: Dict[TypeDefinitionName, TypeDefinitionName] = empty_dict() # Type to specializations # Slot specific slotclasses: Dict[SlotDefinitionName, Set[ClassDefinitionName]] = empty_dict() # Slot to including classes definingslots: Dict[SlotDefinitionName, Set[ClassDefinitionName]] = empty_dict() # Slot to defining decls slotusages: Dict[SlotDefinitionName, Set[ClassDefinitionName]] = empty_dict() # Slot to overriding classes owners: Dict[SlotDefinitionName, Set[ClassDefinitionName]] = empty_dict() # Slot to owning classes (sb. 1) inverses: Dict[str, Set[str]] = empty_dict() # Slots declared as inverses of other slots # Class specific ownslots: Dict[ClassDefinitionName, Set[SlotDefinitionName]] = empty_dict() # Slots directly owned by class # Enum specific codesets: Dict[URIRef, Set[EnumDefinitionName]] = empty_dict() # Code set URI to enumeration definition # Class to slot domains == class.slots # Slot or Class (Definition) specific roots: References = empty_references() # Definitions with no parents isarefs: Dict[DefinitionName, References] = empty_dict() # Definition to isa references mixinrefs: Dict[DefinitionName, References] = empty_dict() # Mixin to referencing classes or slots mixins: References = empty_references() # Definitions declared as mixin abstracts: References = empty_references() # Definitions declared as abstract applytos: References = empty_references() # Definitions that include applytos applytorefs: Dict[DefinitionName, References] = empty_dict() # Definition to applyier # Slot or Type specific rangerefs: Dict[ElementName, Set[SlotDefinitionName]] = empty_dict() # Type or class to range slot # Element - any type inschema: Dict[str, References] = empty_references() # Schema name to elements def __post_init__(self): for k, v in self.schema.slots.items(): self.summarize_slot_definition(k, v) for k, v in self.schema.types.items(): self.summarize_type_definition(k, v) for k, v in self.schema.classes.items(): self.summarize_class_definition(k, v) for k, v in self.schema.enums.items(): self.summarize_enum_definition(k, v) # Generate a list of slots owned exclusively by cls for cls in self.schema.classes.values(): non_owned_slots = set() if cls.is_a: non_owned_slots = set(self.schema.classes[cls.is_a].slots) for mixin in cls.mixins: non_owned_slots.update(set(self.schema.classes[mixin].slots)) owned_slots = set(cls.slots) - non_owned_slots self.ownslots[cls.name] = set(cls.slots) - non_owned_slots for slotname in owned_slots: self.owners.setdefault(slotname, set()).add(cls.name)
[docs] def summarize_slot_definition(self, k: SlotDefinitionName, v: SlotDefinition) -> None: """ Summarize a slot definition :param k: slot name :param v: slot definition :return: """ self.summarize_definition(SlotType, k, v) if v.domain: self.add_ref(SlotType, k, ClassType, v.domain) self.rangerefs.setdefault(v.range, set()).add(k) self.add_ref( SlotType, k, ( ClassType if v.range in self.schema.classes else EnumType if v.range in self.schema.enums else TypeType if v.range in self.schema.types else None ), v.range, )
[docs] def summarize_type_definition(self, k: TypeDefinitionName, v: TypeDefinition): """ Summarize type definition :param k: Type name :param v: Type definition :return: """ self.summarize_element(TypeType, k, v) if v.typeof: self.typeofs.setdefault(v.typeof, set()).add(k) self.add_ref(TypeType, k, TypeType, v.typeof) if v.base: self.typebases.setdefault(v.base, set()).add(k)
[docs] def summarize_class_definition(self, k: ClassDefinitionName, v: ClassDefinition) -> None: """ Summarize class definition element :param k: Class name :param v: Class definition :return: """ self.summarize_definition(ClassType, k, v) for slotname in v.slots: self.add_ref(ClassType, k, SlotType, slotname) for slotname, usage in v.slot_usage.items(): self.slotusages.setdefault(slotname, set()).add(k)
# self.add_ref(ClassType, k, SlotType, slotname) # slot_alias = self.schema.slots[slotname].alias # if slot_alias: # self.add_ref(SlotType, slotname, SlotType, cast(SlotDefinitionName, slot_alias)) # self.add_ref(ClassType, k, SlotType, cast(SlotDefinitionName, slot_alias))
[docs] def summarize_enum_definition(self, k: EnumDefinitionName, v: EnumDefinition): """ Summarize enum definition :param k: Enum name :param v: Enum definition :return: """ self.summarize_element(EnumType, k, v)
[docs] def summarize_definition(self, typ: RefType, k: DefinitionName, v: Definition) -> None: """ Summarize slot and class definitions :param typ: type (slot or class) :param k: name :param v: definition :return: """ self.summarize_element(typ, k, v) if v.is_a: self.isarefs.setdefault(v.is_a, References()).addref(typ, k) self.add_ref(typ, k, typ, v.is_a) else: self.roots.addref(typ, k) if v.abstract: self.abstracts.addref(typ, k) if v.mixin: self.mixins.addref(typ, k) for mixin in v.mixins: self.mixinrefs.setdefault(mixin, References()).addref(typ, k) self.add_ref(typ, k, typ, mixin) if v.apply_to: self.applytos.addref(typ, k) for applyto in v.apply_to: self.applytorefs.setdefault(applyto, References()).addref(typ, k) self.add_ref(typ, k, typ, applyto)
[docs] def summarize_element(self, typ: RefType, k: ElementName, v: Element) -> None: """ Summarize element level items :param typ: element type :param k: element name :param v: element definition :return: """ if k != v.name: raise ValueError("{typ} name mismatch: {k} != {v.name}") # should never happen for subset in v.in_subset: self.add_ref(typ, k, SubsetType, subset)
[docs] def add_ref( self, fromtype: RefType, fromname: ElementName, totype: RefType, toname: ElementName, ) -> None: """Add an inverse reference, indicating that to type/name is referenced by from type/name :param fromtype: Referencer type :param fromname: Referencer name :param totype: Referencee type :param toname: Referencee name :return: """ if totype is ClassType: self.classrefs.setdefault(ClassDefinitionName(toname), References()).addref(fromtype, fromname) elif totype is SlotType: self.slotrefs.setdefault(SlotDefinitionName(toname), References()).addref(fromtype, fromname) elif totype is TypeType: self.typerefs.setdefault(TypeDefinitionName(toname), References()).addref(fromtype, fromname) elif totype is SubsetType: self.subsetrefs.setdefault(SubsetDefinitionName(toname), References()).addref(fromtype, fromname) elif totype is EnumType: self.enumrefs.setdefault(SlotDefinitionName(toname), References()).addref(fromtype, fromname) else: raise TypeError("Unknown typ: {typ}")
def _ancestor_is_owned(self, slot: SlotDefinition) -> bool: return bool(slot.is_a) and (slot.is_a in self.owners or self._ancestor_is_owned(self.schema.slots[slot.is_a]))
[docs] def errors(self) -> List[str]: def format_undefineds(refs: Set[Union[str, TypedNode]]) -> List[str]: return [f"{TypedNode.yaml_loc(ref)}: {ref}" for ref in refs] rval = [] undefined_classes = set(self.classrefs.keys()) - set(self.schema.classes.keys()) if undefined_classes: rval += [f"\tUndefined class references: " f"{', '.join(format_undefineds(undefined_classes))}"] undefined_slots = set(self.slotrefs.keys()) - set(self.schema.slots.keys()) if undefined_slots: rval += [f"\tUndefined slot references: " f"{', '.join(format_undefineds(undefined_slots))}"] undefined_types = set(self.typerefs.keys()) - set(self.schema.types.keys()) if undefined_types: rval += [f"\tUndefined type references: " f"{', '.join(format_undefineds(undefined_types))}"] undefined_subsets = set(self.subsetrefs.keys()) - set(self.schema.subsets.keys()) if undefined_subsets: rval += [f"\tUndefined subset references: " f"{', '.join(format_undefineds(undefined_subsets))}"] undefined_enums = set(self.enumrefs.keys()) - set(self.schema.enums.keys()) if undefined_enums: rval += [f"\tUndefined enun references: " f"{', '.join(format_undefineds(undefined_enums))}"] # Inlined slots must be multivalued (not a inviolable rule, but we make assumptions about this elsewhere in # the python generator for slot in self.schema.slots.values(): if slot.inlined and not slot.multivalued and slot.identifier: rval += [f"\t{TypedNode.yaml_loc(slot.name)} Slot {slot.name} is declared inline but single valued"] return rval
[docs] def summary(self) -> str: def summarize_refs(refs: Dict[ElementName, References]) -> str: clsrefs, slotrefs, typerefs, enumrefs = set(), set(), set(), set() if refs is not None: for cr in refs.values(): clsrefs.update(cr.classrefs) slotrefs.update(cr.slotrefs) typerefs.update(cr.typerefs) enumrefs.update(cr.enumrefs) return ( f"\tReferenced by: {len(clsrefs)} classes, {len(slotrefs)} slots, " f"{len(typerefs)} types, {len(enumrefs)} enums " ) def recurse_types(typ: TypeDefinitionName, indent: str = "\t\t\t") -> List[str]: rval = [f"{indent}{typ}" + (":" if typ in self.typeofs else "")] if typ in sorted(self.typeofs): for tr in sorted(self.typeofs[typ]): rval += recurse_types(tr, indent + "\t") return rval rval = [""] rval += [f"Classes: {len(self.schema.classes.keys())}"] rval += [summarize_refs(self.classrefs)] rval += [f"\tRoot: {len(self.roots.classrefs)}"] leaves = set(self.classrefs.keys()) - set(self.isarefs.keys()) rval += [f"\tLeaf: {len(leaves)}"] # Standalone ar classes that are both roots and leaves rval += [f"\tStandalone: {len(set(self.roots.classrefs).union(set(leaves)))}"] rval += [f"\tDeclared mixin: {len(self.mixins.classrefs)}"] undeclared_mixins = set(self.mixinrefs.keys()).intersection( set(self.schema.classes.keys()) - set(self.mixins.classrefs) ) rval += [f"\tUndeclared mixin: {len(undeclared_mixins)}"] if undeclared_mixins: for udm in sorted(undeclared_mixins): rval += [f"\t\t{udm}"] rval += [f"\tAbstract: {len(self.abstracts.classrefs)}"] undefined_classes = set(self.classrefs.keys()) - set(self.schema.classes.keys()) if undefined_classes: rval += [f"\tUndefined references: {', '.join(undefined_classes)}"] rval += [""] rval += [f"Slots: {len(self.schema.slots.keys())}"] rval += [summarize_refs(self.slotrefs)] rval += [f"\tRoot: {len(self.roots.slotrefs)}"] leaves = set(self.slotrefs.keys()) - set(self.isarefs.keys()) rval += [f"\tLeaf: {len(leaves)}"] rval += [f"\tStandalone: {len(set(self.roots.classrefs).union(set(leaves)))}"] rval += [f"\tDeclared mixin: {len(self.mixins.slotrefs)}"] undeclared_mixins = set(self.mixinrefs.keys()).intersection( set(self.schema.slots.keys()) - set(self.mixins.slotrefs) ) rval += [f"\tUndeclared mixin: {len(undeclared_mixins)}"] if undeclared_mixins: for udm in sorted(undeclared_mixins): rval += [f"\t\t{udm}"] rval += [f"\tAbstract: {len(self.abstracts.slotrefs)}"] # Slots that are referenced but not defined undefined_slots = set(self.slotrefs.keys()) - set(self.schema.slots.keys()) if undefined_slots: rval += [f"\tUndefined: {len(undefined_slots)}"] # Slots that are defined but do not (directly) occur in any class n_unreferenced_descendants: int = 0 unowned_slots: Set[SlotDefinitionName] = set() for slotname, slot in sorted(self.schema.slots.items(), key=lambda e: e[0]): if slotname not in self.owners: if slot.domain: if self._ancestor_is_owned(slot): n_unreferenced_descendants += 1 else: unowned_slots.add(slotname) if n_unreferenced_descendants: rval += [f"\tUnreferenced descendants of owned slots: {n_unreferenced_descendants}"] if unowned_slots: rval += [f"\t* Unowned slots: {', '.join(sorted(unowned_slots))}"] not_in_domain: Set[SlotDefinitionName] = set() domain_mismatches: Set[SlotDefinitionName] = set() unkdomains: Set[SlotDefinitionName] = set() emptydomains: Set[SlotDefinitionName] = set() for slot in self.schema.slots.values(): if not slot.domain: emptydomains.add(slot.name) elif slot.domain in self.schema.classes: if slot.name in self.schema.classes[slot.domain].slots: pass elif slot.name not in self.slotclasses: not_in_domain.add(slot.name) else: domain_mismatches.add(slot.name) else: unkdomains.add(f"{slot.name}: {slot.domain}") if not_in_domain: rval += [f"\tNot in domain: {len(not_in_domain)}"] rval += ["\t\tslot.name: slot.domain"] rval += ["\t\t--------- -----------"] for slotname in sorted(not_in_domain): rval.append(f'\t\t"{slotname}": "{self.schema.slots[slotname].domain}"') if domain_mismatches: rval += [f"\t\tMismatches: {len(domain_mismatches)}"] for slotname in sorted(domain_mismatches): rval.append( f'\t\t\tSlot: "{slotname}" declared domain: "{self.schema.slots[slotname].domain}" ' f'actual domain(s): {", ".join(self.slotclasses[slotname])}' ) if unkdomains: rval += [f"\t* Unknown domain: {', '.join(sorted(unkdomains))}"] if emptydomains: rval += [f"\tDomain unspecified: {len(emptydomains)}"] rval += ["\tRanges:"] rval += ["\t\tType:"] for rng, slots in sorted(self.rangerefs.items()): if rng in self.schema.types: rval += [f"\t\t\t{rng}: {len(slots)}"] rval += ["\t\tClass:"] for rng, slots in sorted(self.rangerefs.items()): if rng in self.schema.classes: rval += [f"\t\t\t{rng}: {len(slots)}"] unknowns = [] for rng, slots in sorted(self.rangerefs.items()): if rng not in self.schema.types and rng not in self.schema.classes: unknowns += [f"\t\t\t{rng}: {len(slots)}"] if unknowns: rval += ["\t\tUnknown:"] + unknowns shared_class_slots = set(self.schema.classes.keys()).intersection(set(self.schema.slots.keys())) if shared_class_slots: rval += ["\nClasses and Slots with the same name:"] for ssc in sorted(shared_class_slots): rval += [f"\t{ssc}"] rval += [""] rval += [f"Types: {len(self.schema.types)}"] rval += [summarize_refs(self.typerefs)] rval += ["\tBases:"] for base in sorted(self.typebases.keys()): rval += [f"\t\t{base}:"] for typ in sorted(self.typebases[base]): if not self.schema.types[typ].typeof: rval += recurse_types(typ) return "\n".join(rval)