import logging
import os
from collections import defaultdict
from dataclasses import dataclass, field
from functools import lru_cache
from pathlib import Path
from typing import Any, Dict, List, Tuple, Type
import click
import yaml
from linkml._version import __version__
from linkml.generators.excelgen import ExcelGenerator
from linkml.generators.graphqlgen import GraphqlGenerator
from linkml.generators.jsonldcontextgen import ContextGenerator
from linkml.generators.jsonldgen import JSONLDGenerator
from linkml.generators.jsonschemagen import JsonSchemaGenerator
from linkml.generators.markdowngen import MarkdownGenerator
from linkml.generators.owlgen import OwlSchemaGenerator
from linkml.generators.prefixmapgen import PrefixGenerator
from linkml.generators.protogen import ProtoGenerator
from linkml.generators.pythongen import PythonGenerator
from linkml.generators.shaclgen import ShaclGenerator
from linkml.generators.shexgen import ShExGenerator
from linkml.generators.sqltablegen import SQLTableGenerator
from linkml.utils.cli_utils import log_level_option
from linkml.utils.generator import Generator
PATH_FSTRING = str
GENERATOR_NAME = str
ARG_DICT = Dict[str, Any]
CONFIG_TUPLE = Tuple[Type[Generator], PATH_FSTRING, ARG_DICT]
GEN_MAP: Dict[GENERATOR_NAME, CONFIG_TUPLE]
GEN_MAP = {
"graphql": (GraphqlGenerator, "graphql/{name}.graphql", {}),
"jsonldcontext": (ContextGenerator, "jsonld/{name}.context.jsonld", {}),
"jsonld": (
JSONLDGenerator,
"jsonld/{name}.jsonld",
{"context": "{parent}/{name}.context.jsonld"},
),
"jsonschema": (JsonSchemaGenerator, "jsonschema/{name}.schema.json", {}),
"markdown": (
MarkdownGenerator,
"docs/",
{"directory": "{parent}", "index_file": "{name}.md"},
),
"owl": (OwlSchemaGenerator, "owl/{name}.owl.ttl", {}),
"prefixmap": (PrefixGenerator, "prefixmap/{name}.yaml", {}),
"proto": (ProtoGenerator, "protobuf/{name}.proto", {}),
"python": (PythonGenerator, "{name}.py", {}),
# 'rdf': (RDFGenerator, 'rdf/{name}.ttl', {}),
# 'rdf': (RDFGenerator, 'rdf/{name}.ttl', {'context': '{parent}/../jsonld/{name}.context.jsonld'}),
"shex": (ShExGenerator, "shex/{name}.shex", {}),
"shacl": (ShaclGenerator, "shacl/{name}.shacl.ttl", {}),
"sqltable": (SQLTableGenerator, "sqlschema/{name}.sql", {}),
# # linkml/generators/javagen.py uses different architecture from most of the other generators
# # also linkml/generators/excelgen.py, which has a different mechanism for determining the output path
# 'java': (JavaGenerator, 'java/{name}.java', {'directory': '{parent}'}),
"excel": (ExcelGenerator, "excel/{name}.xlsx", {"output": "{parent}/{name}.xlsx"}),
}
@lru_cache()
def get_local_imports(schema_path: str, dir: str):
logging.info(f"GETTING IMPORTS = {schema_path}")
all_imports = [schema_path]
with open(schema_path) as stream:
with open(schema_path) as stream:
schema = yaml.safe_load(stream)
for imp in schema.get("imports", []):
imp_path = os.path.join(dir, imp) + ".yaml"
logging.info(f" IMP={imp} // path={imp_path}")
if os.path.isfile(imp_path):
all_imports += get_local_imports(imp_path, dir)
return all_imports
@dataclass
class ProjectConfiguration:
"""
Global project configuration, and per-generator configurations
"""
directory: str = "tmp"
generator_args: Dict[GENERATOR_NAME, ARG_DICT] = field(default_factory=lambda: defaultdict(dict))
includes: List[str] = None
excludes: List[str] = None
mergeimports: bool = None
[docs]class ProjectGenerator:
"""
Generates complete project folders
Note this doesn't conform to overall generator framework, as it is a meta-generator
"""
@staticmethod
def generate(schema_path: str, config: ProjectConfiguration = ProjectConfiguration()):
if config.directory is None:
raise Exception("Must pass directory")
Path(config.directory).mkdir(parents=True, exist_ok=True)
if config.mergeimports:
all_schemas = [schema_path]
else:
all_schemas = get_local_imports(schema_path, os.path.dirname(schema_path))
logging.debug(f"ALL_SCHEMAS = {all_schemas}")
for gen_name, (gen_cls, gen_path_fmt, default_gen_args) in GEN_MAP.items():
if config.includes is not None and config.includes != [] and gen_name not in config.includes:
logging.info(f"Skipping {gen_name} as not in inclusion list: {config.includes}")
continue
if config.excludes is not None and gen_name in config.excludes:
logging.info(f"Skipping {gen_name} as it is in exclusion list")
continue
logging.info(f"Generating: {gen_name}")
for local_path in all_schemas:
logging.info(f" SCHEMA: {local_path}")
name = os.path.basename(local_path).replace(".yaml", "")
gen_path = gen_path_fmt.format(name=name)
gen_path_full = f"{config.directory}/{gen_path}"
parts = gen_path_full.split("/")
parent_dir = "/".join(parts[0:-1])
logging.info(f" PARENT={parent_dir}")
Path(parent_dir).mkdir(parents=True, exist_ok=True)
gen_path_full = "/".join(parts)
all_gen_args = {
**default_gen_args,
**config.generator_args.get(gen_name, {}),
}
gen: Generator
# special check for output key because ExcelGenerator and
# SSSOMGenerator read in output file name during initialization
if "output" in all_gen_args:
all_gen_args["output"] = all_gen_args["output"].format(name=name, parent=parent_dir)
gen = gen_cls(local_path, **all_gen_args)
serialize_args = {"mergeimports": config.mergeimports}
for k, v in all_gen_args.items():
# all ARG_DICT values are interpolatable
if isinstance(v, str):
v = v.format(name=name, parent=parent_dir)
serialize_args[k] = v
logging.info(f" {gen_name} ARGS: {serialize_args}")
gen_dump = gen.serialize(**serialize_args)
if gen_name != "excel":
if parts[-1] != "":
# markdowngen does not write to a file
logging.info(f" WRITING TO: {gen_path_full}")
with open(gen_path_full, "w", encoding="UTF-8") as stream:
stream.write(gen_dump)
else:
# special handling for excel generator
# we do not need to route the output
# into a file like the other generators
gen.serialize(**serialize_args)
@click.command()
@click.option(
"--dir",
"-d",
help="directory in which to place generated files. E.g. linkml_model, biolink_model",
)
@click.option("--generator-arguments", "-A", help="yaml configuration for generators")
@click.option("--config-file", "-C", type=click.File("rb"), help="path to yaml configuration")
@click.option("--exclude", "-X", multiple=True, help="list of artefacts to be excluded") # TODO: make this an enum
@click.option(
"--include",
"-I",
multiple=True,
help="list of artefacts to be included. If not set, defaults to all",
) # TODO: make this an enum
@click.option(
"--mergeimports/--no-mergeimports",
default=True,
show_default=True,
help="Merge imports into source file",
)
@log_level_option
@click.argument("yamlfile")
@click.version_option(__version__, "-V", "--version")
def cli(
yamlfile,
dir,
exclude: List[str],
include: List[str],
config_file,
mergeimports,
generator_arguments: str,
**kwargs,
):
"""
Generate an entire project LinkML schema
Generate all downstream artefacts using default configuration:
.. code-block: bash
gen-project -d . personinfo.yaml
Exclusion lists: all except ShEx:
.. code-block: bash
gen-project --exclude shex -d . personinfo.yaml
Inclusion lists: only jsonschema and python:
.. code-block: bash
gen-project -I python -I jsonschema -d . personinfo.yaml
Configuration, on command line:
.. code-block: bash
gen-project -A 'jsonschema: {top_class: Container}' -d . personinfo.yaml
Configuration, via yaml file:
.. code-block: bash
gen-project --config config.yaml personinfo.yaml
config.yaml:
.. code-block: yaml
directory: .
generator_args:
json_schema:
top_class: Container
"""
project_config = ProjectConfiguration()
if config_file is not None:
for k, v in yaml.safe_load(config_file).items():
setattr(project_config, k, v)
if exclude:
project_config.excludes = list(exclude)
if include:
project_config.includes = list(include)
if generator_arguments is not None:
try:
project_config.generator_args = yaml.safe_load(generator_arguments)
except Exception:
raise Exception("Argument must be a valid YAML blob")
logging.info(f"generator args: {project_config.generator_args}")
if dir is not None:
project_config.directory = dir
project_config.mergeimports = mergeimports
gen = ProjectGenerator()
gen.generate(yamlfile, project_config)
if __name__ == "__main__":
cli()