Source code for linkml.generators.jsonldcontextgen

"""
Generate JSON-LD contexts
"""

import json
import os
import re
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any

import click
from jsonasobj2 import JsonObj, as_json
from rdflib import SKOS, XSD, Namespace

from linkml._version import __version__
from linkml.utils.deprecation import deprecated_fields
from linkml.utils.generator import Generator, shared_arguments
from linkml_runtime.linkml_model.meta import ClassDefinition, SlotDefinition
from linkml_runtime.linkml_model.types import SHEX
from linkml_runtime.utils.formatutils import camelcase, underscore

URI_RANGES = (SHEX.nonliteral, SHEX.bnode, SHEX.iri)

ENUM_CONTEXT = {
    "text": "skos:notation",
    "description": "skos:prefLabel",
    "meaning": "@id",
}


[docs] @deprecated_fields({"emit_metadata": "metadata"}) @dataclass class ContextGenerator(Generator): # ClassVars generatorname = os.path.basename(__file__) generatorversion = "0.1.1" valid_formats = ["context", "json"] visit_all_class_slots = False uses_schemaloader = True requires_metamodel = True file_extension = "context.jsonld" # ObjectVars emit_prefixes: set[str] = field(default_factory=lambda: set()) default_ns: str = None context_body: dict = field(default_factory=lambda: dict()) slot_class_maps: dict = field(default_factory=lambda: dict()) metadata: bool = False model: bool | None = True base: str | Namespace | None = None output: str | None = None prefixes: bool | None = True flatprefixes: bool | None = False fix_multivalue_containers: bool | None = False # Framing (opt-in via CLI flag) emit_frame: bool = False embed_context_in_frame: bool = False frame_body: dict = field(default_factory=lambda: dict()) frame_root: str | None = None def __post_init__(self) -> None: super().__post_init__() if self.namespaces is None: raise TypeError("Schema text must be supplied to context generator. Preparsed schema will not work") def visit_schema(self, base: str | Namespace | None = None, output: str | None = None, **_): # Add any explicitly declared prefixes for prefix in self.schema.prefixes.values(): self.emit_prefixes.add(prefix.prefix_prefix) # Add any prefixes explicitly declared for pfx in self.schema.emit_prefixes: self.add_prefix(pfx) # Add the default prefix if self.schema.default_prefix: dflt = self.namespaces.prefix_for(self.schema.default_prefix) if dflt: self.default_ns = dflt if self.default_ns: default_uri = self.namespaces[self.default_ns] self.emit_prefixes.add(self.default_ns) else: default_uri = self.schema.default_prefix if self.schema.name: self.namespaces[self.schema.name] = default_uri self.emit_prefixes.add(self.schema.name) self.context_body["@vocab"] = default_uri def end_schema( self, base: str | Namespace | None = None, output: str | None = None, prefixes: bool | None = None, flatprefixes: bool | None = None, model: bool | None = None, **_, ) -> str: if base is None: base = self.base if output is None: output = self.output if prefixes is None: prefixes = self.prefixes if flatprefixes is None: flatprefixes = self.flatprefixes if model is None: model = self.model context = JsonObj() if self.metadata: comments = JsonObj() comments.description = "Auto generated by LinkML jsonld context generator" comments.generation_date = self.schema.generation_date comments.source = self.schema.source_file context.comments = comments context_content = {"xsd": "http://www.w3.org/2001/XMLSchema#"} if base: base = str(base) if "://" not in base: self.context_body["@base"] = os.path.relpath(base, os.path.dirname(self.schema.source_file)) else: self.context_body["@base"] = base if prefixes: for prefix in sorted(self.emit_prefixes): url = str(self.namespaces[prefix]) # Derived from line # ~5223 in pyld/lib/jsonld.py if bool(re.match(r".*[:/\?#\[\]@]$", url)) or flatprefixes: context_content[prefix] = url else: prefix_obj = JsonObj() prefix_obj["@id"] = url prefix_obj["@prefix"] = True context_content[prefix] = prefix_obj if model: for k, v in self.context_body.items(): context_content[k] = v for k, v in self.slot_class_maps.items(): context_content[k] = v context["@context"] = context_content if output and not self.embed_context_in_frame: with open(output, "w", encoding="UTF-8") as outf: outf.write(as_json(context)) if self.emit_frame and self.frame_body and output: root_name = None for cname, c in self.schema.classes.items(): if getattr(c, "tree_root", False): root_name = cname break if root_name is None and self.schema.classes: root_name = next(iter(self.schema.classes)) if self.embed_context_in_frame: frame = { "@context": context["@context"], "@omitGraph": True, } else: frame = { "@context": Path(output).name, "@omitGraph": True, } if root_name: root_cls = self.schema.classes[root_name] frame["@type"] = root_cls.class_uri or root_cls.name for prop, rule in self.frame_body.items(): frame[prop] = rule frame_path = Path(output).with_suffix(".frame.jsonld") with open(frame_path, "w", encoding="UTF-8") as f: json.dump(frame, f, indent=2, ensure_ascii=False) return str(as_json(context)) + "\n" def visit_class(self, cls: ClassDefinition) -> bool: class_def = {} cn = camelcase(cls.name) self.add_mappings(cls) self._build_element_id(class_def, cls.class_uri) if class_def: self.slot_class_maps[cn] = class_def # prefer explicit tree_root for frame @type if getattr(cls, "tree_root", False): self.frame_root = cls.name # We don't bother to visit class slots - just all slots return True def visit_slot(self, aliased_slot_name: str, slot: SlotDefinition) -> None: if slot.identifier: slot_def = "@id" else: slot_def = {} if not slot.usage_slot_name: any_of_ranges = [any_of_el.range for any_of_el in slot.any_of] if slot.range in self.schema.classes or any(rng in self.schema.classes for rng in any_of_ranges): slot_def["@type"] = "@id" elif slot.range in self.schema.enums: slot_def["@context"] = ENUM_CONTEXT # Add the necessary prefixes to the namespace skos = self.namespaces.prefix_for(SKOS) if not skos: self.namespaces["skos"] = SKOS skos = "skos" self.emit_prefixes.add(skos) else: range_type = self.schema.types[slot.range] if self.namespaces.uri_for(range_type.uri) == XSD.string: pass elif self.namespaces.uri_for(range_type.uri) in URI_RANGES: slot_def["@type"] = "@id" else: slot_def["@type"] = range_type.uri if self.fix_multivalue_containers and slot.multivalued: if slot.inlined and not slot.inlined_as_list: slot_def["@container"] = "@index" else: slot_def["@container"] = "@set" self._build_element_id(slot_def, slot.slot_uri) self.add_mappings(slot) if slot_def: key = underscore(aliased_slot_name) self.context_body[key] = slot_def # collect @embed only for object-valued slots (range is a class) if slot.range in self.schema.classes and slot.inlined is not None: self.frame_body[key] = {"@embed": "@always" if bool(slot.inlined) else "@never"} def _build_element_id(self, definition: Any, uri: str) -> None: """ Defines the elements @id attribute according to the default namespace prefix of the schema. The @id namespace prefix is added only if it doesn't correspond to the default schema namespace prefix whether it is in URI format or as an alias. @param definition: the element (class or slot) definition @param uri: the uri of the element (class or slot) @return: None """ uri_prefix, uri_suffix = self.namespaces.prefix_suffix(uri) is_default_namespace = uri_prefix == self.context_body["@vocab"] or uri_prefix == self.namespaces.prefix_for( self.context_body["@vocab"] ) if not uri_prefix and not uri_suffix: definition["@id"] = uri elif not uri_prefix or is_default_namespace: definition["@id"] = uri_suffix else: definition["@id"] = (uri_prefix + ":" + uri_suffix) if uri_prefix else uri if uri_prefix and not is_default_namespace: self.add_prefix(uri_prefix)
[docs] def serialize( self, base: str | Namespace | None = None, output: str | None = None, prefixes: bool | None = None, flatprefixes: bool | None = None, model: bool | None = None, **kwargs, ) -> str: return super().serialize( base=base, output=output, prefixes=prefixes, flatprefixes=flatprefixes, model=model, **kwargs )
@shared_arguments(ContextGenerator) @click.command(name="jsonld-context") @click.option("--base", help="Base URI for model") @click.option( "--prefixes/--no-prefixes", default=True, show_default=True, help="Emit context for prefixes (default=--prefixes)", ) @click.option( "--model/--no-model", default=True, show_default=True, help="Emit context for model elements (default=--model)", ) @click.option( "--flatprefixes/--no-flatprefixes", default=False, show_default=True, help="Emit non-JSON-LD compliant prefixes as an object (deprecated: use gen-prefix-map instead).", ) @click.option( "--emit-frame/--no-emit-frame", default=False, show_default=True, help="Also emit a <schema>.frame.jsonld file with @embed rules for framing", ) @click.option( "--embed-context-in-frame/--no-embed-context-in-frame", default=False, show_default=True, help="Emit a <schema>.frame.jsonld file with @context embedded directly (single file)", ) @click.option( "-o", "--output", type=click.Path(), help="Output file name", ) @click.option( "--fix-multivalue-containers/--no-fix-multivalue-containers", default=False, show_default=True, help="For multivalued attributes declare a fix container type ('@set' for lists, '@index' for dictionaries).", ) @click.version_option(__version__, "-V", "--version") def cli(yamlfile, emit_frame, embed_context_in_frame, output, **args): """Generate jsonld @context definition from LinkML model""" if (emit_frame or embed_context_in_frame) and not output: raise click.UsageError("--emit-frame/--embed-context-in-frame requires --output") gen = ContextGenerator(yamlfile, **args) if embed_context_in_frame: gen.emit_frame = True gen.embed_context_in_frame = True else: gen.emit_frame = emit_frame print(gen.serialize(output=output, **args)) if __name__ == "__main__": cli()