Source code for linkml_store.api.client

import logging
from pathlib import Path
from typing import Dict, Optional, Union

import yaml
from linkml_runtime import SchemaView

from linkml_store.api import Database
from linkml_store.api.config import ClientConfig
from linkml_store.api.stores.chromadb.chromadb_database import ChromaDBDatabase
from linkml_store.api.stores.duckdb.duckdb_database import DuckDBDatabase
from linkml_store.api.stores.filesystem.filesystem_database import FileSystemDatabase
from linkml_store.api.stores.mongodb.mongodb_database import MongoDBDatabase
from linkml_store.api.stores.neo4j.neo4j_database import Neo4jDatabase
from linkml_store.api.stores.solr.solr_database import SolrDatabase

logger = logging.getLogger(__name__)


HANDLE_MAP = {
    "duckdb": DuckDBDatabase,
    "solr": SolrDatabase,
    "mongodb": MongoDBDatabase,
    "chromadb": ChromaDBDatabase,
    "neo4j": Neo4jDatabase,
    "file": FileSystemDatabase,
}


[docs] class Client: """ A client is the top-level object for interacting with databases. * A client has access to one or more :class:`.Database` objects. * Each database consists of a number of :class:`.Collection` objects. Creating a client ----------------- >>> client = Client() Attaching a database -------------------- >>> db = client.attach_database("duckdb", alias="test") Note that normally a handle would be specified by a locator such as ``duckdb:///<PATH>``, but for convenience, an in-memory duckdb object can be specified without a full locator We can check the actual handle: >>> db.handle 'duckdb:///:memory:' Creating a new collection ------------------------- >>> collection = db.create_collection("Person") >>> objs = [{"id": "P1", "name": "John", "age_in_years": 30}, {"id": "P2", "name": "Alice", "age_in_years": 25}] >>> collection.insert(objs) >>> qr = collection.find() >>> len(qr.rows) 2 >>> qr.rows[0]["id"] 'P1' >>> qr.rows[1]["name"] 'Alice' >>> qr = collection.find({"name": "John"}) >>> len(qr.rows) 1 >>> qr.rows[0]["name"] 'John' """ metadata: Optional[ClientConfig] = None _databases: Optional[Dict[str, Database]] = None
[docs] def __init__(self, handle: Optional[str] = None, metadata: Optional[ClientConfig] = None): """ Initialize a client. :param handle: :param metadata: """ self.metadata = metadata if not self.metadata: self.metadata = ClientConfig() self.metadata.handle = handle
@property def handle(self) -> Optional[str]: return self.metadata.handle @property def base_dir(self) -> Optional[str]: """ Get the base directory for the client. Wraps metadata.base_dir. :return: """ return self.metadata.base_dir
[docs] def from_config(self, config: Union[ClientConfig, dict, str, Path], base_dir=None, auto_attach=False, **kwargs): """ Create a client from a configuration. Examples -------- >>> from linkml_store.api.config import ClientConfig >>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}})) >>> len(client.databases) 0 >>> client = Client().from_config(ClientConfig(databases={"test": {"handle": "duckdb:///:memory:"}}), ... auto_attach=True) >>> len(client.databases) 1 >>> "test" in client.databases True >>> client.databases["test"].handle 'duckdb:///:memory:' :param config: :param base_dir: :param auto_attach: :param kwargs: :return: """ if isinstance(config, dict): config = ClientConfig(**config) if isinstance(config, Path): config = str(config) if isinstance(config, str): if not base_dir: base_dir = Path(config).parent parsed_obj = yaml.safe_load(open(config)) config = ClientConfig(**parsed_obj) self.metadata = config if base_dir: self.metadata.base_dir = base_dir self._initialize_databases(auto_attach=auto_attach, **kwargs) return self
def _initialize_databases(self, auto_attach=False, **kwargs): for name, db_config in self.metadata.databases.items(): base_dir = self.base_dir logger.info(f"Initializing database: {name}, base_dir: {base_dir}") if not base_dir: base_dir = Path.cwd() logger.info(f"Using current working directory: {base_dir}") handle = db_config.handle.format(base_dir=base_dir) db_config.handle = handle if db_config.schema_location: db_config.schema_location = db_config.schema_location.format(base_dir=base_dir) if auto_attach: db = self.attach_database(handle, alias=name, **kwargs) db.from_config(db_config) def _set_database_config(self, db: Database): """ Set the configuration for a database. :param name: :param config: :return: """ if not self.metadata: return if db.alias in self.metadata.databases: db.from_config(self.metadata.databases[db.alias])
[docs] def attach_database( self, handle: str, alias: Optional[str] = None, schema_view: Optional[SchemaView] = None, recreate_if_exists=False, **kwargs, ) -> Database: """ Associate a database with a handle. Examples -------- >>> client = Client() >>> db = client.attach_database("duckdb", alias="memory") >>> "memory" in client.databases True >>> db = client.attach_database("duckdb:///tmp/another.db", alias="disk") >>> len(client.databases) 2 >>> "disk" in client.databases True :param handle: handle for the database, e.g. duckdb:///foo.db :param alias: alias for the database, e.g foo :param schema_view: schema view to associate with the database :param kwargs: :return: """ if ":" not in handle: scheme = handle handle = None if alias is None: alias = scheme else: scheme, _ = handle.split(":", 1) if scheme not in HANDLE_MAP: raise ValueError(f"Unknown scheme: {scheme}") cls = HANDLE_MAP[scheme] db = cls(handle=handle, recreate_if_exists=recreate_if_exists, **kwargs) if schema_view: db.set_schema_view(schema_view) if not alias: alias = handle if not self._databases: self._databases = {} self._databases[alias] = db db.parent = self if db.alias: if db.alias != alias: raise AssertionError(f"Inconsistent alias: {db.alias} != {alias}") else: db.metadata.alias = alias self._set_database_config(db) return db
[docs] def get_database(self, name: Optional[str] = None, create_if_not_exists=True, **kwargs) -> Database: """ Get a named database. Examples -------- >>> client = Client() >>> db = client.attach_database("duckdb:///test.db", alias="test") >>> retrieved_db = client.get_database("test") >>> db == retrieved_db True :param name: if None, there must be a single database attached :param create_if_not_exists: :param kwargs: :return: """ if not name: if not self._databases: raise ValueError("No databases attached and no name provided") if len(self._databases) > 1: raise ValueError("Ambiguous: No name provided and multiple databases attached") return list(self._databases.values())[0] if not self._databases: self._databases = {} if name not in self._databases and name in self.metadata.databases: db_config = self.metadata.databases[name] db = self.attach_database(db_config.handle, alias=name, **kwargs) self._databases[name] = db if name not in self._databases: if create_if_not_exists: logger.info(f"Creating database: {name}") self.attach_database(name, **kwargs) else: raise ValueError(f"Database {name} does not exist") db = self._databases[name] self._set_database_config(db) return db
@property def databases(self) -> Dict[str, Database]: """ Return all attached databases Examples >>> client = Client() >>> _ = client.attach_database("duckdb", alias="test1") >>> _ = client.attach_database("duckdb", alias="test2") >>> len(client.databases) 2 >>> "test1" in client.databases True >>> "test2" in client.databases True >>> client.databases["test1"].handle 'duckdb:///:memory:' >>> client.databases["test2"].handle 'duckdb:///:memory:' :return: """ if not self._databases: self._databases = {} return self._databases
[docs] def drop_database(self, name: str, missing_ok=False, **kwargs): """ Drop a database. Example (in-memory): >>> client = Client() >>> db1 = client.attach_database("duckdb", alias="test1") >>> db2 = client.attach_database("duckdb", alias="test2") >>> len(client.databases) 2 >>> client.drop_database("test1") >>> len(client.databases) 1 Databases that persist on disk: >>> client = Client() >>> path = Path("tmp/test.db") >>> path.parent.mkdir(parents=True, exist_ok=True) >>> db = client.attach_database(f"duckdb:///{path}", alias="test") >>> len(client.databases) 1 >>> db.store({"persons": [{"id": "P1", "name": "John"}]}) >>> db.commit() >>> Path("tmp/test.db").exists() True >>> client.drop_database("test") >>> len(client.databases) 0 >>> Path("tmp/test.db").exists() False Dropping a non-existent database: >>> client = Client() >>> client.drop_database("duckdb:///tmp/made-up1", missing_ok=True) >>> client.drop_database("duckdb:///tmp/made-up2", missing_ok=False) Traceback (most recent call last): ... ValueError: Database duckdb:///tmp/made-up2 not found :param name: :param missing_ok: :return: """ if self._databases: if name in self._databases: db = self._databases[name] db.drop(**kwargs) del self._databases[name] else: if not missing_ok: raise ValueError(f"Database {name} not found") else: db = self.get_database(name, create_if_not_exists=True) db.drop(**kwargs)
[docs] def drop_all_databases(self, **kwargs): """ Drop all databases. Example (in-memory): >>> client = Client() >>> db1 = client.attach_database("duckdb", alias="test1") >>> assert "test1" in client.databases >>> db2 = client.attach_database("duckdb", alias="test2") >>> assert "test2" in client.databases >>> client.drop_all_databases() >>> len(client.databases) 0 :param missing_ok: :return: """ if not self._databases: return for name in list(self._databases.keys()): self.drop_database(name, missing_ok=False, **kwargs) self._databases = {}