{ "cells": [ { "cell_type": "markdown", "source": [ "# How to query the Monarch-KG\n", "\n", "Illustrates use of LinkML-Store over the Monarch-KG database (duckdb serialization)\n", "\n", "First we initialize a `Client` object:" ], "metadata": { "collapsed": false }, "id": "8d81cec461c4117e" }, { "cell_type": "code", "execution_count": 9, "outputs": [], "source": [ "from linkml_store.api.client import Client\n", "\n", "client = Client()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-25T23:11:28.071286Z", "start_time": "2024-04-25T23:11:28.068145Z" } }, "id": "initial_id" }, { "cell_type": "markdown", "source": [ "Next we download the dump (using pystow, to cache if needed)" ], "metadata": { "collapsed": false }, "id": "f681fee14e155210" }, { "cell_type": "code", "execution_count": 11, "outputs": [ { "data": { "text/plain": "Downloading monarch-kg.duckdb.gz: 0.00B [00:00, ?B/s]", "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "05a49de2491a449085974a37a0656c37" } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "from linkml_store.constants import LINKML_STORE_MODULE\n", "\n", "MONARCH_KG_DB = \"https://data.monarchinitiative.org/monarch-kg/latest/monarch-kg.duckdb.gz\"\n", "\n", "path = LINKML_STORE_MODULE.ensure_gunzip(url=MONARCH_KG_DB, autoclean=True)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-25T23:31:55.438089Z", "start_time": "2024-04-25T23:31:03.181316Z" } }, "id": "7f026805247b381d" }, { "cell_type": "code", "execution_count": 12, "outputs": [], "source": [ "database = client.attach_database(f\"duckdb:///{path}\", \"monarch-kg\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-25T23:32:03.566544Z", "start_time": "2024-04-25T23:32:03.561214Z" } }, "id": "af0f809644d5fe95" }, { "cell_type": "code", "execution_count": 13, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/cjm/Library/Caches/pypoetry/virtualenvs/linkml-store-8ZYO4kTy-py3.10/lib/python3.10/site-packages/duckdb_engine/__init__.py:588: SAWarning: Did not recognize type 'list' of column 'closure'\n", " columns = self._get_columns_info(rows, domains, enums, schema) # type: ignore[attr-defined]\n", "/Users/cjm/Library/Caches/pypoetry/virtualenvs/linkml-store-8ZYO4kTy-py3.10/lib/python3.10/site-packages/duckdb_engine/__init__.py:588: SAWarning: Did not recognize type 'list' of column 'closure_label'\n", " columns = self._get_columns_info(rows, domains, enums, schema) # type: ignore[attr-defined]\n" ] } ], "source": [ "edges_coll = database.get_collection(\"denormalized_edges\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-25T23:32:04.300145Z", "start_time": "2024-04-25T23:32:04.041670Z" } }, "id": "e858f9917db96e42" }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "data": { "text/plain": " agent_type aggregator_knowledge_source \\\n0 not_provided infores:monarchinitiative \n1 not_provided infores:monarchinitiative \n2 not_provided infores:monarchinitiative \n3 not_provided infores:monarchinitiative \n4 not_provided infores:monarchinitiative \n.. ... ... \n95 not_provided infores:monarchinitiative \n96 not_provided infores:monarchinitiative \n97 not_provided infores:monarchinitiative \n98 not_provided infores:monarchinitiative \n99 not_provided infores:monarchinitiative \n\n category evidence_count frequency_qualifier \\\n0 biolink:PairwiseGeneToGeneInteraction 4 None \n1 biolink:PairwiseGeneToGeneInteraction 2 None \n2 biolink:PairwiseGeneToGeneInteraction 4 None \n3 biolink:PairwiseGeneToGeneInteraction 2 None \n4 biolink:PairwiseGeneToGeneInteraction 4 None \n.. ... ... ... \n95 biolink:PairwiseGeneToGeneInteraction 3 None \n96 biolink:PairwiseGeneToGeneInteraction 4 None \n97 biolink:PairwiseGeneToGeneInteraction 3 None \n98 biolink:PairwiseGeneToGeneInteraction 4 None \n99 biolink:PairwiseGeneToGeneInteraction 4 None \n\n frequency_qualifier_category frequency_qualifier_closure \\\n0 None None \n1 None None \n2 None None \n3 None None \n4 None None \n.. ... ... \n95 None None \n96 None None \n97 None None \n98 None None \n99 None None \n\n frequency_qualifier_closure_label frequency_qualifier_label \\\n0 None None \n1 None None \n2 None None \n3 None None \n4 None None \n.. ... ... \n95 None None \n96 None None \n97 None None \n98 None None \n99 None None \n\n frequency_qualifier_namespace ... stage_qualifier_label \\\n0 None ... None \n1 None ... None \n2 None ... None \n3 None ... None \n4 None ... None \n.. ... ... ... \n95 None ... None \n96 None ... None \n97 None ... None \n98 None ... None \n99 None ... None \n\n stage_qualifier_namespace subject subject_category subject_closure \\\n0 None FB:FBgn0033485 biolink:Gene None \n1 None FB:FBgn0033485 biolink:Gene None \n2 None FB:FBgn0033485 biolink:Gene None \n3 None FB:FBgn0033485 biolink:Gene None \n4 None FB:FBgn0033485 biolink:Gene None \n.. ... ... ... ... \n95 None FB:FBgn0050000 biolink:Gene None \n96 None FB:FBgn0050000 biolink:Gene None \n97 None FB:FBgn0050000 biolink:Gene None \n98 None FB:FBgn0050000 biolink:Gene None \n99 None FB:FBgn0050000 biolink:Gene None \n\n subject_closure_label subject_label subject_namespace subject_taxon \\\n0 None RpLP0-like FB NCBITaxon:7227 \n1 None RpLP0-like FB NCBITaxon:7227 \n2 None RpLP0-like FB NCBITaxon:7227 \n3 None RpLP0-like FB NCBITaxon:7227 \n4 None RpLP0-like FB NCBITaxon:7227 \n.. ... ... ... ... \n95 None GstT1 FB NCBITaxon:7227 \n96 None GstT1 FB NCBITaxon:7227 \n97 None GstT1 FB NCBITaxon:7227 \n98 None GstT1 FB NCBITaxon:7227 \n99 None GstT1 FB NCBITaxon:7227 \n\n subject_taxon_label \n0 Drosophila melanogaster \n1 Drosophila melanogaster \n2 Drosophila melanogaster \n3 Drosophila melanogaster \n4 Drosophila melanogaster \n.. ... \n95 Drosophila melanogaster \n96 Drosophila melanogaster \n97 Drosophila melanogaster \n98 Drosophila melanogaster \n99 Drosophila melanogaster \n\n[100 rows x 65 columns]", "text/html": "
\n | agent_type | \naggregator_knowledge_source | \ncategory | \nevidence_count | \nfrequency_qualifier | \nfrequency_qualifier_category | \nfrequency_qualifier_closure | \nfrequency_qualifier_closure_label | \nfrequency_qualifier_label | \nfrequency_qualifier_namespace | \n... | \nstage_qualifier_label | \nstage_qualifier_namespace | \nsubject | \nsubject_category | \nsubject_closure | \nsubject_closure_label | \nsubject_label | \nsubject_namespace | \nsubject_taxon | \nsubject_taxon_label | \n
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n4 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0033485 | \nbiolink:Gene | \nNone | \nNone | \nRpLP0-like | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
1 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n2 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0033485 | \nbiolink:Gene | \nNone | \nNone | \nRpLP0-like | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
2 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n4 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0033485 | \nbiolink:Gene | \nNone | \nNone | \nRpLP0-like | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
3 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n2 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0033485 | \nbiolink:Gene | \nNone | \nNone | \nRpLP0-like | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
4 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n4 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0033485 | \nbiolink:Gene | \nNone | \nNone | \nRpLP0-like | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n... | \n
95 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n3 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0050000 | \nbiolink:Gene | \nNone | \nNone | \nGstT1 | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
96 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n4 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0050000 | \nbiolink:Gene | \nNone | \nNone | \nGstT1 | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
97 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n3 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0050000 | \nbiolink:Gene | \nNone | \nNone | \nGstT1 | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
98 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n4 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0050000 | \nbiolink:Gene | \nNone | \nNone | \nGstT1 | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
99 | \nnot_provided | \ninfores:monarchinitiative | \nbiolink:PairwiseGeneToGeneInteraction | \n4 | \nNone | \nNone | \nNone | \nNone | \nNone | \nNone | \n... | \nNone | \nNone | \nFB:FBgn0050000 | \nbiolink:Gene | \nNone | \nNone | \nGstT1 | \nFB | \nNCBITaxon:7227 | \nDrosophila melanogaster | \n
100 rows × 65 columns
\n