Source code for linkml.generators.erdiagramgen

import os
from dataclasses import dataclass
from enum import Enum
from typing import Optional, Union

import click
import pydantic
from linkml_runtime.linkml_model.meta import ClassDefinitionName, SlotDefinition
from linkml_runtime.utils.formatutils import camelcase, underscore
from linkml_runtime.utils.schemaview import SchemaView

from linkml._version import __version__
from linkml.utils.generator import Generator, shared_arguments

MERMAID_SERIALIZATION = str


class Attribute(pydantic.BaseModel):
    datatype: str = "string"
    name: str = None
    key: str = None
    comment: str = None

    def __str__(self):
        cmt = f'"{self.comment}"' if self.comment else ""
        return f"    {self.datatype} {self.name} {self.key if self.key else ''} {cmt}"


class IdentifyingType(str, Enum):
    """Identifying types.

    See: https://mermaid.js.org/syntax/entityRelationshipDiagram.html#identification"""

    IDENTIFYING = "--"
    NON_IDENTIFYING = ".."

    def __str__(self):
        return self.value


class Cardinality(pydantic.BaseModel):
    """Cardinality of a slot.

    See https://mermaid.js.org/syntax/entityRelationshipDiagram.html#relationship-syntax"""

    required: bool = True
    multivalued: bool = False
    is_left: bool = True

    def __str__(self):
        required_char = "|" if self.required else "o"
        if self.multivalued:
            multivalued_char = "}" if self.is_left else "{"
        else:
            multivalued_char = "|"
        if self.is_left:
            return f"{multivalued_char}{required_char}"
        else:
            return f"{required_char}{multivalued_char}"


class RelationshipType(pydantic.BaseModel):
    """Relationship type.
    See https://mermaid.js.org/syntax/entityRelationshipDiagram.html#relationship-syntax"""

    left_cardinality: Cardinality
    identifying: IdentifyingType = IdentifyingType.IDENTIFYING
    right_cardinality: Cardinality = None

    def __str__(self):
        return f"{self.left_cardinality}{self.identifying}{self.right_cardinality}"


class Entity(pydantic.BaseModel):
    """Entity in an ER diagram."""

    name: str
    attributes: list[Attribute] = []

    def __str__(self):
        attrs = "\n".join([str(a) for a in self.attributes])
        return f"{self.name} {{\n{attrs}\n}}"


class Relationship(pydantic.BaseModel):
    """Relationship in an ER diagram."""

    first_entity: str
    relationship_type: RelationshipType = None
    second_entity: str = None
    relationship_label: str = None

    def __str__(self):
        return f'{self.first_entity} {self.relationship_type} {self.second_entity} : "{self.relationship_label}"'


class ERDiagram(pydantic.BaseModel):
    """Represents an Diagram of Entities and Relationships"""

    entities: list[Entity] = []
    relationships: list[Relationship] = []

    def __str__(self):
        # Sort entities and relationships for stable output
        sorted_entities = sorted(self.entities, key=lambda e: e.name)
        sorted_relationships = sorted(self.relationships, key=lambda r: str(r))

        ents = "\n".join([str(e) for e in sorted_entities])
        rels = "\n".join([str(r) for r in sorted_relationships])
        return f"erDiagram\n{ents}\n\n{rels}\n"


[docs]@dataclass
class ERDiagramGenerator(Generator):
    """
    A generator for serializing schemas as Entity-Relationship diagrams.

    Currently this generates diagrams in mermaid syntax, but in future
    this could easily be extended to have for example a direct SVG or PNG
    generator using PyGraphViz, similar to erdantic.
    """

    # ClassVars
    generatorname = os.path.basename(__file__)
    generatorversion = "0.0.1"
    valid_formats = ["markdown", "mermaid"]
    uses_schemaloader = False
    requires_metamodel = False

    structural: bool = True
    """If True, then only the tree_root and entities reachable from the root are drawn"""

    exclude_attributes: bool = False
    """If True, do not include attributes in entities"""

    exclude_abstract_classes: bool = False
    """If True, do not include abstract classes in the diagram"""

    genmeta: bool = False
    gen_classvars: bool = True
    gen_slots: bool = True
    no_types_dir: bool = False
    use_slot_uris: bool = False

    def __post_init__(self):
        self.schemaview = SchemaView(self.schema)
        super().__post_init__()

[docs]    def serialize(self) -> MERMAID_SERIALIZATION:
        """
        Serialize a schema as an ER Diagram.

        If a tree_root is present in the schema, then only Entities traversable
        from here will be included. Otherwise, all Entities will be included.

        :return: mermaid string
        """
        sv = self.schemaview
        structural_roots = [cn for cn, c in sv.all_classes().items() if c.tree_root]
        if self.structural and structural_roots:
            return self.serialize_classes(structural_roots, follow_references=True)
        else:
            diagram = ERDiagram()
            for cn in sv.all_classes():
                self.add_class(cn, diagram)
            return self.serialize_diagram(diagram)

[docs]    def serialize_classes(
        self,
        class_names: list[Union[str, ClassDefinitionName]],
        follow_references=False,
        max_hops: int = None,
        include_upstream: bool = False,
    ) -> MERMAID_SERIALIZATION:
        """
        Serialize a list of classes as an ER Diagram.

        This will also traverse the reference graph and include any Entities reachable from the
        specified classes.

        By default, all reachable Entities are included, unless max_hops is specified.

        :param class_names: initial seed
        :param follow_references: if True, follow references even if not inlined
        :param max_hops: maximum number of hops to follow references
        :return:
        """
        visited = set()
        sv = self.schemaview
        stack = [(cn, 0) for cn in class_names]
        diagram = ERDiagram()
        while stack:
            cn, depth = stack.pop()
            if cn in visited:
                continue
            self.add_class(cn, diagram)
            visited.add(cn)
            if max_hops is not None and depth >= max_hops:
                continue
            for slot in sv.class_induced_slots(cn):
                rng = slot.range
                if rng in sv.all_classes():
                    if follow_references or sv.is_inlined(slot):
                        if rng not in visited:
                            stack.append((rng, depth + 1))

        # Now Add upstream classes if needed
        if include_upstream:
            for sn in sv.all_slots():
                slot = sv.schema.slots.get(sn)
                if slot and slot.range in set(class_names):
                    for cl in sv.all_classes():
                        if slot.name in sv.get_class(cl).slots and cl not in visited:
                            self.add_upstream_class(cl, set(class_names), diagram)
        return self.serialize_diagram(diagram)

    def serialize_diagram(self, diagram: ERDiagram) -> str:
        """
        Serialize an ER Diagram, from the native pydantic schema.

        :param diagram: ER Diagram
        :return:
        """
        er = str(diagram)
        if self.format == "markdown":
            return f"```mermaid\n{er}\n```\n"
        else:
            return er

    def add_upstream_class(self, class_name: ClassDefinitionName, targets: set[str], diagram: ERDiagram) -> None:
        sv = self.schemaview
        cls = sv.get_class(class_name)
        if self.exclude_abstract_classes and cls.abstract:
            return
        entity = Entity(name=camelcase(cls.name))
        diagram.entities.append(entity)
        for slot in sv.class_induced_slots(class_name):
            if slot.range in targets:
                self.add_relationship(entity, slot, diagram)

    def add_class(self, class_name: ClassDefinitionName, diagram: ERDiagram) -> None:
        """
        Add a class to the ER Diagram.

        :param class_name: ClassDefinitionName
        :param diagram: ER Diagram
        :return:
        """
        sv = self.schemaview
        cls = sv.get_class(class_name)
        if self.exclude_abstract_classes and cls.abstract:
            return
        entity = Entity(name=camelcase(cls.name))
        diagram.entities.append(entity)
        for slot in sv.class_induced_slots(class_name):
            # TODO: schemaview should infer this
            if slot.range is None:
                slot.range = sv.schema.default_range or "string"
            if slot.range in sv.all_classes():
                self.add_relationship(entity, slot, diagram)
            else:
                self.add_attribute(entity, slot)

    def add_relationship(self, entity: Entity, slot: SlotDefinition, diagram: ERDiagram) -> None:
        """
        Add a relationship to the ER Diagram.

        :param class_name: ClassDefinitionName
        :param slot: SlotDefinition
        :param diagram: ER Diagram
        :return:
        """
        sv = self.schemaview
        rel_type = RelationshipType(
            right_cardinality=Cardinality(
                required=slot.required is True, multivalued=slot.multivalued is True, is_left=True
            ),
            left_cardinality=Cardinality(is_left=False),
        )
        rel = Relationship(
            first_entity=entity.name,
            relationship_type=rel_type,
            second_entity=camelcase(sv.get_class(slot.range).name),
            relationship_label=slot.name,
        )
        diagram.relationships.append(rel)

    def add_attribute(self, entity: Entity, slot: SlotDefinition) -> None:
        """
        Add an attribute to the ER Diagram.

        :param class_name: Class
        :param slot: SlotDefinition
        :return:
        """
        if self.exclude_attributes:
            return
        dt = slot.range
        if slot.multivalued:
            # NOTE: mermaid does not support []s or *s in attribute types
            dt = f"{dt}List"
        attr = Attribute(name=underscore(slot.name), datatype=dt)
        entity.attributes.append(attr)


@shared_arguments(ERDiagramGenerator)
@click.option(
    "--structural/--no-structural",
    default=True,
    help="If True, then only the tree_root and entities reachable from the root are drawn",
)
@click.option(
    "--exclude-attributes/--no-exclude-attributes",
    default=False,
    help="If True, do not include attributes in entities",
)
@click.option(
    "--exclude-abstract-classes/--no-exclude-abstract-classes",
    default=False,
    help="If True, do not include abstract classes in the diagram",
)
@click.option(
    "--follow-references/--no-follow-references",
    default=False,
    help="If True, follow references even if not inlined",
)
@click.option("--format", "-f", default="markdown", type=click.Choice(ERDiagramGenerator.valid_formats))
@click.option("--max-hops", default=None, type=click.INT, help="Maximum number of hops")
@click.option("--classes", "-c", multiple=True, help="List of classes to serialize")
@click.option("--include-upstream", is_flag=True, help="Include upstream classes")
@click.version_option(__version__, "-V", "--version")
@click.command(name="erdiagram")
def cli(
    yamlfile,
    classes: list[str],
    max_hops: Optional[int],
    follow_references: bool,
    include_upstream: bool = False,
    **args,
):
    """Generate a mermaid ER diagram from a schema.

    By default, all entities traversable from the tree_root are included. If no tree_root is
    present, then all entities are included.

    To create an ER diagram for selected classes, use the --classes option.
    """
    gen = ERDiagramGenerator(
        yamlfile,
        **args,
    )
    if classes:
        print(
            gen.serialize_classes(
                classes,
                follow_references=follow_references,
                max_hops=max_hops,
                include_upstream=include_upstream,
            )
        )
    else:
        print(gen.serialize())


if __name__ == "__main__":
    cli()