Source code for linkml.validator.loaders.delimited_file_loader

import csv
import re
from abc import ABC, abstractmethod
from typing import Iterator, Optional

from linkml.validator.loaders.loader import Loader


def _parse_numeric(value: str):
    if not isinstance(value, str) or not re.search(r"[0-9]", value):
        return value
    try:
        return int(value)
    except (TypeError, ValueError):
        pass
    try:
        return float(value)
    except (TypeError, ValueError, OverflowError):
        return value


class _DelimitedFileLoader(Loader, ABC):
    """Base class for TSV and CSV loaders"""

    @property
    @abstractmethod
    def delimiter(self):
        pass

    def __init__(self, source, *, skip_empty_rows: bool = False, index_slot_name: Optional[str] = None) -> None:
        super().__init__(source)
        self.skip_empty_rows = skip_empty_rows
        self.index_slot_name = index_slot_name

    def _rows(self) -> Iterator[dict]:
        with open(self.source) as file:
            reader: csv.DictReader = csv.DictReader(file, delimiter=self.delimiter, skipinitialspace=True)
            for row in reader:
                if self.skip_empty_rows and not any(row.values()):
                    continue
                yield {k: _parse_numeric(v) for k, v in row.items() if k is not None and v != ""}

    def iter_instances(self) -> Iterator[dict]:
        if self.index_slot_name is not None:
            yield {self.index_slot_name: list(self._rows())}
        else:
            yield from self._rows()


[docs]class CsvLoader(_DelimitedFileLoader): """A loader for instances serialized as CSV :param skip_empty_rows: If ``True``, skip empty rows instead of yielding empty dicts. Defaults to ``False``. :param index_slot_name: If provided, ``iter_instances`` will yield one dict where all rows of the CSV file are collected into a list with ``index_slot_name`` as the key. If ``None``, ``iter_instances`` will yield each row as a dict individually. Defaults to ``None``. """ @property def delimiter(self): return ","
[docs]class TsvLoader(_DelimitedFileLoader): """A loader for instances serialized as TSV :param skip_empty_rows: If ``True``, skip empty rows instead of yielding empty dicts. Defaults to ``False``. :param index_slot_name: If provided, ``iter_instances`` will yield one dict where all rows of the TSV file are collected into a list with ``index_slot_name`` as the key. If ``None``, ``iter_instances`` will yield each row as a dict individually. Defaults to ``None``. """ @property def delimiter(self): return "\t"