Example: Storing an ontology
This tutorial will show you how to store an ontology, via its JSON representation.
[11]:
import pandas as pd
import pystow
[12]:
path=pystow.ensure("tmp", "eccode.json", url="https://w3id.org/biopragmatics/resources/eccode/eccode.json")
[13]:
import json
graphdoc = json.load(open(path))
graph = graphdoc["graphs"][0]
[14]:
len(graph["nodes"]), len(graph["edges"])
[14]:
(7177, 506022)
[15]:
!rm -f /tmp/eccode.db
[16]:
from linkml_store import Client
client = Client()
db = client.attach_database("duckdb:////tmp/eccode.db", "eccode")
[17]:
nodes_collection = db.create_collection("Node", "nodes")
[18]:
nodes_collection.insert(graph["nodes"])
[19]:
from linkml_store.index.implementations.llm_index import LLMIndex
index = LLMIndex(name="test")
[20]:
nodes_collection.attach_index(index)
[21]:
qr = nodes_collection.search("sugar transporters")
[22]:
results = [{"sim": r[0], "id": r[1]["id"], "name": r[1]["lbl"]} for r in qr.ranked_rows]
[23]:
import pandas as pd
df = pd.DataFrame(results)
[24]:
df
[24]:
sim | id | name | |
---|---|---|---|
0 | 0.797146 | http://purl.obolibrary.org/obo/eccode_7.5.2.2 | ABC-type oligosaccharide transporter |
1 | 0.792401 | http://purl.obolibrary.org/obo/eccode_7.5.2.1 | ABC-type maltose transporter |
2 | 0.791971 | http://purl.obolibrary.org/obo/eccode_7.3.2.3 | ABC-type sulfate transporter |
3 | 0.791232 | http://purl.obolibrary.org/obo/eccode_7.5.2.11 | ABC-type D-galactose transporter |
4 | 0.789132 | http://purl.obolibrary.org/obo/eccode_7.6.2.12 | ABC-type capsular-polysaccharide transporter |
... | ... | ... | ... |
7172 | 0.673991 | http://purl.obolibrary.org/obo/eccode_3.4.22.57 | caspase-4 |
7173 | 0.673331 | http://purl.obolibrary.org/obo/eccode_3.4.22.63 | caspase-10 |
7174 | 0.670908 | http://purl.obolibrary.org/obo/eccode_1.14.99 | Miscellaneous |
7175 | 0.668042 | http://purl.obolibrary.org/obo/eccode_3.4.24.4 | 3.4.24.30, 3.4.24.31, 3.4.24.32, 3.4.24.39 and... |
7176 | 0.663048 | http://www.geneontology.org/formats/oboInOwl#h... | has_obo_format_version |
7177 rows × 3 columns
[24]: