Source code for linkml.utils.rawloader

import copy
from datetime import datetime
from pathlib import Path
from typing import Optional, TextIO, Union
from urllib.parse import urlparse

import yaml
from dateutil.parser import ParserError, parse
from hbreader import FileInfo, HBType, detect_type
from linkml_runtime.linkml_model.meta import SchemaDefinition, metamodel_version
from linkml_runtime.loaders import yaml_loader
from linkml_runtime.utils.yamlutils import YAMLMark, YAMLRoot

from linkml.utils.mergeutils import set_from_schema

DATETIME_FORMAT = "%Y-%m-%dT%H:%M:%S"
yaml.error.Mark = YAMLMark


# Override the default linkml missing value tests
[docs]def mrf(self, field_name: str) -> None: if isinstance(self, SchemaDefinition) and field_name == "name" and self.id: id_parts = self.id.replace("#", "/").rsplit("/") self.name = id_parts[-1] else: YAMLRoot.MissingRequiredField(self, f"{type(self).__name__}.{field_name}")
SchemaDefinition.MissingRequiredField = mrf
[docs]def load_raw_schema( data: Union[str, dict, TextIO, Path], source_file: Optional[str] = None, source_file_date: Optional[str] = None, source_file_size: Optional[int] = None, base_dir: Optional[str] = None, merge_modules: Optional[bool] = True, emit_metadata: Optional[bool] = True, ) -> SchemaDefinition: """Load and flatten SchemaDefinition from a file name, a URL or a block of text @param data: URL, file name or block of text YAML Object or open file handle @param source_file: Source file name for the schema if data is type TextIO @param source_file_date: timestamp of source file if data is type TextIO @param source_file_size: size of source file if data is type TextIO @param base_dir: Working directory or base URL of sources @param merge_modules: True means combine modules into one source, false means keep separate @param emit_metadata: True means add source file info to the output @return: Un-processed Schema Definition object """ def _name_from_url(url) -> str: return urlparse(url).path.rsplit("/", 1)[-1].rsplit(".", 1)[0] # Passing a URL or file name if detect_type(data, base_dir) not in (HBType.STRING, HBType.STRINGABLE): assert source_file is None, "source_file parameter not allowed if data is a file or URL" assert source_file_date is None, "source_file_date parameter not allowed if data is a file or URL" assert source_file_size is None, "source_file_size parameter not allowed if data is a file or URL" if isinstance(data, Path): data = str(data) # Convert the input into a valid SchemaDefinition if isinstance(data, (str, dict, TextIO)): # TODO: Build a generic loader that detects type from suffix or content and invokes the appropriate loader schema_metadata = FileInfo() schema_metadata.source_file = source_file schema_metadata.source_file_date = source_file_date schema_metadata.source_file_size = source_file_size schema_metadata.base_path = base_dir schema = yaml_loader.load( copy.deepcopy(data) if isinstance(data, dict) else data, SchemaDefinition, base_dir=base_dir, metadata=schema_metadata, ) elif isinstance(data, SchemaDefinition): schema = copy.deepcopy(data) else: raise ValueError("Unrecognized input to raw loader") if schema is None: raise ValueError("Empty schema - cannot process") if schema.name is None: if schema.id is None: raise ValueError("Unable to determine schema name") else: schema.name = _name_from_url(schema.id) elif schema.id is None: # TODO: figure out how to generate this from the default_prefix and namespace map raise ValueError("Schema identifier must be supplied") if emit_metadata: schema.source_file = schema_metadata.source_file src_date = schema_metadata.source_file_date try: schema.source_file_date = parse(src_date).strftime(DATETIME_FORMAT) if src_date else None except ParserError: schema.source_file_date = src_date schema.source_file_size = schema_metadata.source_file_size schema.generation_date = datetime.now().strftime(DATETIME_FORMAT) schema.metamodel_version = metamodel_version set_from_schema(schema) return schema