{ "cells": [ { "cell_type": "markdown", "source": [ "# How to Check Referential Integrity\n", "\n", "This example uses MongoDB" ], "metadata": { "collapsed": false }, "id": "fc4794dd116ed21" }, { "cell_type": "code", "execution_count": 1, "outputs": [], "source": [ "from linkml_store import Client\n", "\n", "client = Client()" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.760981Z", "start_time": "2024-05-04T19:51:08.378243Z" } }, "id": "initial_id" }, { "cell_type": "code", "execution_count": 2, "outputs": [], "source": [ "db = client.attach_database(\"mongodb://localhost:27017\", \"test\")\n", "db.metadata.ensure_referential_integrity = True\n", "db.set_schema_view(\"../../tests/input/countries/countries.linkml.yaml\")\n", "countries_coll = db.create_collection(\"Country\", alias=\"countries\", recreate_if_exists=True)\n", "routes_coll = db.create_collection(\"Route\", alias=\"routes\", recreate_if_exists=True)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.788932Z", "start_time": "2024-05-04T19:51:09.771112Z" } }, "id": "cc164c0acbe4c39d" }, { "cell_type": "code", "execution_count": 3, "outputs": [], "source": [ "COUNTRIES = \"../../tests/input/countries/countries.jsonl\"\n", "ROUTES = \"../../tests/input/countries/routes.csv\"" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.789681Z", "start_time": "2024-05-04T19:51:09.786454Z" } }, "id": "5286ef4e9dd0f316" }, { "cell_type": "code", "execution_count": 4, "outputs": [ { "data": { "text/plain": "[{'origin': 'DE', 'destination': 'FR', 'method': 'rail'}]" }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from linkml_store.utils.format_utils import load_objects\n", "\n", "countries = load_objects(COUNTRIES)\n", "routes = load_objects(ROUTES)\n", "routes" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.795894Z", "start_time": "2024-05-04T19:51:09.790413Z" } }, "id": "2e21988e4fc13f58" }, { "cell_type": "code", "execution_count": 5, "outputs": [], "source": [ "countries_coll.insert(countries)\n", "routes_coll.insert(routes)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.803272Z", "start_time": "2024-05-04T19:51:09.798758Z" } }, "id": "668e59a8f28e7bfe" }, { "cell_type": "code", "execution_count": 6, "outputs": [ { "data": { "text/plain": "[{'origin': 'DE', 'destination': 'FR', 'method': 'rail'}]" }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "routes_coll.find().rows" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.810617Z", "start_time": "2024-05-04T19:51:09.804004Z" } }, "id": "995e63f873ea9353" }, { "cell_type": "code", "execution_count": 7, "outputs": [], "source": [ "for result in db.iter_validate_database():\n", " print(result)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.956191Z", "start_time": "2024-05-04T19:51:09.809082Z" } }, "id": "a8ef16a3fbc6bfe6" }, { "cell_type": "markdown", "source": [ "## Inserting invalid data\n", "\n", "We will intentionally insert an invalid row" ], "metadata": { "collapsed": false }, "id": "24fb15bce092c2d1" }, { "cell_type": "code", "execution_count": 8, "outputs": [], "source": [ "routes_coll.insert({\"origin\": \"ZZZ\", \"destination\": \"YYY\", \"method\": \"rail\"})" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.961815Z", "start_time": "2024-05-04T19:51:09.956721Z" } }, "id": "f712a82be775f413" }, { "cell_type": "code", "execution_count": 9, "outputs": [ { "data": { "text/plain": " origin destination method\n0 DE FR rail\n1 ZZZ YYY rail", "text/html": "
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
origindestinationmethod
0DEFRrail
1ZZZYYYrail
\n
" }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "routes_coll.find().rows_dataframe" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:51:09.974226Z", "start_time": "2024-05-04T19:51:09.961675Z" } }, "id": "18ffa996e3893b96" }, { "cell_type": "code", "execution_count": 16, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "type='ReferentialIntegrity' severity= message='Referential integrity error: Country not found' instance='ZZZ' instance_index=None instantiates='Country'\n", "type='ReferentialIntegrity' severity= message='Referential integrity error: Country not found' instance='YYY' instance_index=None instantiates='Country'\n" ] } ], "source": [ "results = list(db.iter_validate_database())\n", "for result in results:\n", " print(result)" ], "metadata": { "collapsed": false, "ExecuteTime": { "end_time": "2024-05-04T19:52:20.044928Z", "start_time": "2024-05-04T19:52:19.996008Z" } }, "id": "c67517aece5d47c5" } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 5 }