{ "cells": [ { "cell_type": "markdown", "source": [ "# Example: Storing an ontology\n", "\n", "This tutorial will show you how to store an ontology, via its JSON representation." ], "metadata": { "collapsed": false }, "id": "315813bcb5f486a4" }, { "cell_type": "code", "execution_count": 11, "outputs": [], "source": [ "import pandas as pd\n", "import pystow" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:03.887292Z", "start_time": "2024-04-22T20:50:03.869390Z" } }, "id": "b3962319d0e1cdd2" }, { "cell_type": "code", "execution_count": 12, "outputs": [], "source": [ "path=pystow.ensure(\"tmp\", \"eccode.json\", url=\"https://w3id.org/biopragmatics/resources/eccode/eccode.json\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:03.887512Z", "start_time": "2024-04-22T20:50:03.873291Z" } }, "id": "5a0030a3e24b1545" }, { "cell_type": "code", "execution_count": 13, "outputs": [], "source": [ "import json\n", "\n", "graphdoc = json.load(open(path))\n", "graph = graphdoc[\"graphs\"][0]" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:04.181894Z", "start_time": "2024-04-22T20:50:03.875729Z" } }, "id": "793566db96fc1cd9" }, { "cell_type": "code", "execution_count": 14, "outputs": [ { "data": { "text/plain": "(7177, 506022)" }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(graph[\"nodes\"]), len(graph[\"edges\"])" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:04.183617Z", "start_time": "2024-04-22T20:50:04.181412Z" } }, "id": "b3c3cf57c2d9aeed" }, { "cell_type": "code", "execution_count": 15, "outputs": [], "source": [ "!rm -f /tmp/eccode.db" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:04.314842Z", "start_time": "2024-04-22T20:50:04.186207Z" } }, "id": "93e613ff3730dd2a" }, { "cell_type": "code", "execution_count": 16, "outputs": [], "source": [ "from linkml_store import Client\n", "\n", "client = Client()\n", "db = client.attach_database(\"duckdb:////tmp/eccode.db\", \"eccode\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:04.319296Z", "start_time": "2024-04-22T20:50:04.316606Z" } }, "id": "6a8adce3d3ec93c6" }, { "cell_type": "code", "execution_count": 17, "outputs": [], "source": [ "nodes_collection = db.create_collection(\"Node\", \"nodes\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:04.326018Z", "start_time": "2024-04-22T20:50:04.319969Z" } }, "id": "4fa95b75cd1f19cf" }, { "cell_type": "code", "execution_count": 18, "outputs": [], "source": [ "nodes_collection.insert(graph[\"nodes\"])" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:04.614554Z", "start_time": "2024-04-22T20:50:04.334717Z" } }, "id": "aee6a95b4a86432d" }, { "cell_type": "code", "execution_count": 19, "outputs": [], "source": [ "from linkml_store.index.implementations.llm_index import LLMIndex\n", "\n", "index = LLMIndex(name=\"test\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T20:50:04.618406Z", "start_time": "2024-04-22T20:50:04.616386Z" } }, "id": "8e75156bfdafe7b" }, { "cell_type": "code", "execution_count": 20, "outputs": [], "source": [ "nodes_collection.attach_index(index)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T21:11:29.678762Z", "start_time": "2024-04-22T20:50:04.621586Z" } }, "id": "993c0360941dbb3b" }, { "cell_type": "code", "execution_count": 21, "outputs": [ { "data": { "text/plain": "FloatProgress(value=0.0, layout=Layout(width='auto'), style=ProgressStyle(bar_color='black'))", "application/vnd.jupyter.widget-view+json": { "version_major": 2, "version_minor": 0, "model_id": "57e7e6483f4a44d480d2b33d20d52534" } }, "metadata": {}, "output_type": "display_data" } ], "source": [ "qr = nodes_collection.search(\"sugar transporters\")" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T21:11:34.507263Z", "start_time": "2024-04-22T21:11:29.684218Z" } }, "id": "e3abb5d529063c6e" }, { "cell_type": "code", "execution_count": 22, "outputs": [], "source": [ "results = [{\"sim\": r[0], \"id\": r[1][\"id\"], \"name\": r[1][\"lbl\"]} for r in qr.ranked_rows]" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T21:11:34.514381Z", "start_time": "2024-04-22T21:11:34.512629Z" } }, "id": "cc3bce09e81d66a" }, { "cell_type": "code", "execution_count": 23, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.DataFrame(results)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T21:11:34.521054Z", "start_time": "2024-04-22T21:11:34.519586Z" } }, "id": "5b9d6d63561b72db" }, { "cell_type": "code", "execution_count": 24, "outputs": [ { "data": { "text/plain": " sim id \\\n0 0.797146 http://purl.obolibrary.org/obo/eccode_7.5.2.2 \n1 0.792401 http://purl.obolibrary.org/obo/eccode_7.5.2.1 \n2 0.791971 http://purl.obolibrary.org/obo/eccode_7.3.2.3 \n3 0.791232 http://purl.obolibrary.org/obo/eccode_7.5.2.11 \n4 0.789132 http://purl.obolibrary.org/obo/eccode_7.6.2.12 \n... ... ... \n7172 0.673991 http://purl.obolibrary.org/obo/eccode_3.4.22.57 \n7173 0.673331 http://purl.obolibrary.org/obo/eccode_3.4.22.63 \n7174 0.670908 http://purl.obolibrary.org/obo/eccode_1.14.99 \n7175 0.668042 http://purl.obolibrary.org/obo/eccode_3.4.24.4 \n7176 0.663048 http://www.geneontology.org/formats/oboInOwl#h... \n\n name \n0 ABC-type oligosaccharide transporter \n1 ABC-type maltose transporter \n2 ABC-type sulfate transporter \n3 ABC-type D-galactose transporter \n4 ABC-type capsular-polysaccharide transporter \n... ... \n7172 caspase-4 \n7173 caspase-10 \n7174 Miscellaneous \n7175 3.4.24.30, 3.4.24.31, 3.4.24.32, 3.4.24.39 and... \n7176 has_obo_format_version \n\n[7177 rows x 3 columns]", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
simidname
00.797146http://purl.obolibrary.org/obo/eccode_7.5.2.2ABC-type oligosaccharide transporter
10.792401http://purl.obolibrary.org/obo/eccode_7.5.2.1ABC-type maltose transporter
20.791971http://purl.obolibrary.org/obo/eccode_7.3.2.3ABC-type sulfate transporter
30.791232http://purl.obolibrary.org/obo/eccode_7.5.2.11ABC-type D-galactose transporter
40.789132http://purl.obolibrary.org/obo/eccode_7.6.2.12ABC-type capsular-polysaccharide transporter
............
71720.673991http://purl.obolibrary.org/obo/eccode_3.4.22.57caspase-4
71730.673331http://purl.obolibrary.org/obo/eccode_3.4.22.63caspase-10
71740.670908http://purl.obolibrary.org/obo/eccode_1.14.99Miscellaneous
71750.668042http://purl.obolibrary.org/obo/eccode_3.4.24.43.4.24.30, 3.4.24.31, 3.4.24.32, 3.4.24.39 and...
71760.663048http://www.geneontology.org/formats/oboInOwl#h...has_obo_format_version
\n

7177 rows × 3 columns

\n
" }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T21:11:34.534196Z", "start_time": "2024-04-22T21:11:34.524830Z" } }, "id": "dc81533a0b074360" }, { "cell_type": "code", "execution_count": 24, "outputs": [], "source": [], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-04-22T21:11:34.542208Z", "start_time": "2024-04-22T21:11:34.533067Z" } }, "id": "864179ee57c531f6" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }