#3610 Google Gemini 2.5 Pro Implementation of SQLite3 Knowledge Graph
#3610 Google Gemini 2.5 Pro Implementation of SQLite3 Knowledge Graph
#3610 Provide a python application implementing a knowledge graph by using SQLite3 with SQL statements for Cypher "CREATE", "MERGE" and "MATCH" statements
Of course. I can provide a Python application that uses SQLite3 to create a simple knowledge graph. This implementation will mimic the behavior of Cypher's CREATE
, MERGE
, and MATCH
statements using standard SQL.
Here is the complete Python application. It's self-contained in a single file and includes a KnowledgeGraph
class to manage the data, along with an example of how to use it.
import sqlite3
import json
import os
class KnowledgeGraph:
"""
A class to implement a knowledge graph using SQLite.
It mimics basic Cypher commands like CREATE, MERGE, and MATCH using SQL.
"""
def __init__(self, db_path="knowledge_graph.db"):
"""
Initializes the KnowledgeGraph, connects to the SQLite database,
and creates the necessary tables if they don't exist.
Args:
db_path (str): The file path for the SQLite database.
"""
self.db_path = db_path
self.conn = sqlite3.connect(db_path)
self.conn.row_factory = sqlite3.Row # Allows accessing columns by name
self._create_tables()
def _execute_query(self, query, params=()):
"""Helper function to execute a query and return the cursor."""
cursor = self.conn.cursor()
cursor.execute(query, params)
return cursor
def _create_tables(self):
"""
Creates the 'nodes' and 'relationships' tables required for the graph.
- Nodes store entities (e.g., Person, Movie).
- Relationships store connections between nodes (e.g., ACTED_IN).
Properties are stored as JSON strings.
"""
# Create Nodes Table
self._execute_query("""
CREATE TABLE IF NOT EXISTS nodes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
label TEXT NOT NULL,
properties TEXT
);
""")
# Create Relationships Table
self._execute_query("""
CREATE TABLE IF NOT EXISTS relationships (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER NOT NULL,
target_id INTEGER NOT NULL,
type TEXT NOT NULL,
properties TEXT,
FOREIGN KEY (source_id) REFERENCES nodes (id) ON DELETE CASCADE,
FOREIGN KEY (target_id) REFERENCES nodes (id) ON DELETE CASCADE
);
""")
# Add indexes for faster lookups
self._execute_query("CREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes (label);")
self._execute_query("CREATE INDEX IF NOT EXISTS idx_relationships_type ON relationships (type);")
self._execute_query("CREATE INDEX IF NOT EXISTS idx_relationships_source ON relationships (source_id);")
self._execute_query("CREATE INDEX IF NOT EXISTS idx_relationships_target ON relationships (target_id);")
self.conn.commit()
def _serialize_props(self, props):
"""Serializes a dictionary of properties into a JSON string."""
return json.dumps(props) if props else '{}'
def _deserialize_props(self, props_json):
"""Deserializes a JSON string into a dictionary of properties."""
return json.loads(props_json) if props_json else {}
def _find_node(self, label, properties):
"""
Internal method to find a single node by its label and exact properties.
Returns the node's ID or None if not found.
"""
query = "SELECT id FROM nodes WHERE label = ? AND properties = ?"
cursor = self._execute_query(query, (label, self._serialize_props(properties)))
result = cursor.fetchone()
return result['id'] if result else None
def create_node(self, label, properties=None):
"""
SQL equivalent of Cypher's CREATE statement for a node.
Creates a new node, even if a similar one already exists.
Args:
label (str): The label for the node (e.g., 'Person').
properties (dict, optional): A dictionary of properties.
Returns:
int: The ID of the newly created node.
"""
props_json = self._serialize_props(properties)
query = "INSERT INTO nodes (label, properties) VALUES (?, ?)"
cursor = self._execute_query(query, (label, props_json))
self.conn.commit()
return cursor.lastrowid
def merge_node(self, label, properties):
"""
SQL equivalent of Cypher's MERGE statement for a node.
Finds a node with the given label and properties. If it exists,
returns its ID. Otherwise, creates it and returns the new ID.
Args:
label (str): The label for the node.
properties (dict): The properties to match or create.
Returns:
int: The ID of the existing or newly created node.
"""
node_id = self._find_node(label, properties)
if node_id:
return node_id
else:
return self.create_node(label, properties)
def create_relationship(self, source_id, target_id, rel_type, properties=None):
"""
SQL equivalent of Cypher's CREATE statement for a relationship.
Creates a new relationship between two nodes.
Args:
source_id (int): The ID of the source node.
target_id (int): The ID of the target node.
rel_type (str): The type of the relationship (e.g., 'ACTED_IN').
properties (dict, optional): A dictionary of properties for the relationship.
Returns:
int: The ID of the newly created relationship.
"""
props_json = self._serialize_props(properties)
query = "INSERT INTO relationships (source_id, target_id, type, properties) VALUES (?, ?, ?, ?)"
cursor = self._execute_query(query, (source_id, target_id, rel_type, props_json))
self.conn.commit()
return cursor.lastrowid
def merge_relationship(self, source_id, target_id, rel_type, properties=None):
"""
SQL equivalent of Cypher's MERGE statement for a relationship.
Ensures a relationship of a specific type exists between two nodes.
If it doesn't exist, it creates one.
Args:
source_id (int): The ID of the source node.
target_id (int): The ID of the target node.
rel_type (str): The type of the relationship.
properties (dict, optional): Properties for the new relationship if created.
Returns:
int: The ID of the existing or newly created relationship.
"""
props_json = self._serialize_props(properties or {})
query = "SELECT id FROM relationships WHERE source_id = ? AND target_id = ? AND type = ? AND properties = ?"
cursor = self._execute_query(query, (source_id, target_id, rel_type, props_json))
result = cursor.fetchone()
if result:
return result['id']
else:
return self.create_relationship(source_id, target_id, rel_type, properties)
def match(self, source_spec=None, rel_spec=None, target_spec=None):
"""
A simplified SQL equivalent of Cypher's MATCH statement.
Allows searching for patterns in the graph.
Args:
source_spec (tuple, optional): (label, {properties}) for the source node.
rel_spec (tuple, optional): (type, {properties}) for the relationship.
target_spec (tuple, optional): (label, {properties}) for the target node.
Returns:
list: A list of dictionaries, each representing a matched path.
"""
query = """
SELECT
s.id as source_id, s.label as source_label, s.properties as source_props,
r.id as rel_id, r.type as rel_type, r.properties as rel_props,
t.id as target_id, t.label as target_label, t.properties as target_props
FROM relationships AS r
JOIN nodes AS s ON r.source_id = s.id
JOIN nodes AS t ON r.target_id = t.id
"""
where_clauses = []
params = []
# Helper to build WHERE clauses for a node or relationship
def build_clauses(spec, alias):
if spec:
label_or_type, props = spec
if label_or_type:
col = 'label' if alias != 'r' else 'type'
where_clauses.append(f"{alias}.{col} = ?")
params.append(label_or_type)
if props:
for key, value in props.items():
# Using json_extract to query inside the JSON properties
where_clauses.append(f"json_extract({alias}.properties, '$.{key}') = ?")
params.append(value)
build_clauses(source_spec, 's')
build_clauses(rel_spec, 'r')
build_clauses(target_spec, 't')
if where_clauses:
query += " WHERE " + " AND ".join(where_clauses)
cursor = self._execute_query(query, tuple(params))
results = []
for row in cursor.fetchall():
results.append({
'source': {'id': row['source_id'], 'label': row['source_label'], 'properties': self._deserialize_props(row['source_props'])},
'relationship': {'id': row['rel_id'], 'type': row['rel_type'], 'properties': self._deserialize_props(row['rel_props'])},
'target': {'id': row['target_id'], 'label': row['target_label'], 'properties': self._deserialize_props(row['target_props'])}
})
return results
def close(self):
"""Closes the database connection."""
self.conn.close()
def cleanup(self):
"""Closes the connection and deletes the database file."""
self.close()
if os.path.exists(self.db_path):
os.remove(self.db_path)
print(f"Cleaned up and removed {self.db_path}")
def main():
"""Main function to demonstrate the KnowledgeGraph class."""
print("Initializing Knowledge Graph...")
kg = KnowledgeGraph("movies_kg.db")
print("\n--- Step 1: Populating the graph using MERGE ---")
print("This ensures we don't create duplicate nodes or relationships.")
# Merge Nodes (People and Movies)
tom_hanks = kg.merge_node('Person', {'name': 'Tom Hanks', 'born': 1956})
meg_ryan = kg.merge_node('Person', {'name': 'Meg Ryan', 'born': 1961})
nora_ephron = kg.merge_node('Person', {'name': 'Nora Ephron', 'born': 1941})
kevin_bacon = kg.merge_node('Person', {'name': 'Kevin Bacon', 'born': 1958})
sleepless = kg.merge_node('Movie', {'title': 'Sleepless in Seattle', 'released': 1993})
mail = kg.merge_node('Movie', {'title': "You've Got Mail", 'released': 1998})
apollo13 = kg.merge_node('Movie', {'title': 'Apollo 13', 'released': 1995})
print(f"Merged Tom Hanks (ID: {tom_hanks})")
print(f"Merged Meg Ryan (ID: {meg_ryan})")
print(f"Merged Sleepless in Seattle (ID: {sleepless})")
# Merge Relationships
kg.merge_relationship(tom_hanks, sleepless, 'ACTED_IN', {'roles': ['Sam Baldwin']})
kg.merge_relationship(meg_ryan, sleepless, 'ACTED_IN', {'roles': ['Annie Reed']})
kg.merge_relationship(nora_ephron, sleepless, 'DIRECTED')
kg.merge_relationship(tom_hanks, mail, 'ACTED_IN', {'roles': ['Joe Fox']})
kg.merge_relationship(meg_ryan, mail, 'ACTED_IN', {'roles': ['Kathleen Kelly']})
kg.merge_relationship(nora_ephron, mail, 'DIRECTED')
kg.merge_relationship(tom_hanks, apollo13, 'ACTED_IN', {'roles': ['Jim Lovell']})
kg.merge_relationship(kevin_bacon, apollo13, 'ACTED_IN', {'roles': ['Jack Swigert']})
print("Merged relationships between actors, directors, and movies.")
print("\n--- Step 2: Querying the graph using MATCH ---")
# Query 1: Find all movies Tom Hanks acted in.
print("\nQuery: Find all movies Tom Hanks acted in.")
# Pattern: (Person {name: 'Tom Hanks'})-[ACTED_IN]->(Movie)
results = kg.match(
source_spec=('Person', {'name': 'Tom Hanks'}),
rel_spec=('ACTED_IN', None),
target_spec=('Movie', None)
)
for path in results:
movie = path['target']['properties']
print(f" - Tom Hanks acted in '{movie['title']}' released in {movie['released']}.")
# Query 2: Who directed "You've Got Mail"?
print("\nQuery: Who directed \"You've Got Mail\"?")
# Pattern: (Person)-[DIRECTED]->(Movie {title: "You've Got Mail"})
results = kg.match(
source_spec=('Person', None),
rel_spec=('DIRECTED', None),
target_spec=('Movie', {'title': "You've Got Mail"})
)
for path in results:
director = path['source']['properties']
print(f" - '{director['name']}' directed the movie.")
# Query 3: Find co-actors of Kevin Bacon in "Apollo 13"
print("\nQuery: Find co-actors of Kevin Bacon in 'Apollo 13'")
# This requires two MATCH queries in a real graph database. Here we simulate it.
# 1. Find the movie Kevin Bacon acted in.
bacon_movies = kg.match(
source_spec=('Person', {'name': 'Kevin Bacon'}),
rel_spec=('ACTED_IN', None),
target_spec=('Movie', {'title': 'Apollo 13'})
)
if bacon_movies:
apollo13_id = bacon_movies[0]['target']['id']
# 2. Find all actors in that movie who are not Kevin Bacon.
co_actors = kg.match(
source_spec=('Person', None),
rel_spec=('ACTED_IN', None),
target_spec=('Movie', {'title': 'Apollo 13'})
)
for path in co_actors:
actor_name = path['source']['properties']['name']
if actor_name != 'Kevin Bacon':
print(f" - {actor_name} was a co-actor.")
# Clean up the created database file
kg.cleanup()
if __name__ == '__main__':
main()
I've implemented the core logic in the KnowledgeGraph
class, which handles the database connection, table creation, and the translation of Cypher-like operations into SQL. The main
function demonstrates how to use this class to build and query a small graph of movies and actors.
You can run this script directly. It will create a movies_kg.db
file, populate it, run the queries, and then delete the file for a clean execution.
Initializing Knowledge Graph...
--- Step 1: Populating the graph using MERGE ---
This ensures we don't create duplicate nodes or relationships.
Merged Tom Hanks (ID: 1)
Merged Meg Ryan (ID: 2)
Merged Sleepless in Seattle (ID: 5)
Merged relationships between actors, directors, and movies.
--- Step 2: Querying the graph using MATCH ---
Query: Find all movies Tom Hanks acted in.
- Tom Hanks acted in 'Sleepless in Seattle' released in 1993.
- Tom Hanks acted in 'You've Got Mail' released in 1998.
- Tom Hanks acted in 'Apollo 13' released in 1995.
Query: Who directed "You've Got Mail"?
- 'Nora Ephron' directed the movie.
Query: Find co-actors of Kevin Bacon in 'Apollo 13'
- Tom Hanks was a co-actor.
Cleaned up and removed movies_kg.db
=== Code execution complete ===
Add support for generating the graph in an HTML file that can be downloaded
Of course. I can extend the application to generate a self-contained, interactive HTML file that visualizes the knowledge graph.
I've updated the KnowledgeGraph
class with a new method, export_to_html
, which uses the D3.js library to create a force-directed graph visualization. I also modified the main
function to call this new method and generate a file named knowledge_graph.html
.
Here are the changes applied to the py_sqlite_kg
document.
import sqlite3
import json
import os
import webbrowser
class KnowledgeGraph:
"""
A class to implement a knowledge graph using SQLite.
It mimics basic Cypher commands like CREATE, MERGE, and MATCH using SQL,
and can export the graph to an interactive HTML file.
"""
def __init__(self, db_path="knowledge_graph.db"):
"""
Initializes the KnowledgeGraph, connects to the SQLite database,
and creates the necessary tables if they don't exist.
Args:
db_path (str): The file path for the SQLite database.
"""
self.db_path = db_path
self.conn = sqlite3.connect(db_path)
self.conn.row_factory = sqlite3.Row # Allows accessing columns by name
self._create_tables()
def _execute_query(self, query, params=()):
"""Helper function to execute a query and return the cursor."""
cursor = self.conn.cursor()
cursor.execute(query, params)
return cursor
def _create_tables(self):
"""
Creates the 'nodes' and 'relationships' tables required for the graph.
"""
# Create Nodes Table
self._execute_query("""
CREATE TABLE IF NOT EXISTS nodes (
id INTEGER PRIMARY KEY AUTOINCREMENT,
label TEXT NOT NULL,
properties TEXT
);
""")
# Create Relationships Table
self._execute_query("""
CREATE TABLE IF NOT EXISTS relationships (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_id INTEGER NOT NULL,
target_id INTEGER NOT NULL,
type TEXT NOT NULL,
properties TEXT,
FOREIGN KEY (source_id) REFERENCES nodes (id) ON DELETE CASCADE,
FOREIGN KEY (target_id) REFERENCES nodes (id) ON DELETE CASCADE
);
""")
# Add indexes for faster lookups
self._execute_query("CREATE INDEX IF NOT EXISTS idx_nodes_label ON nodes (label);")
self._execute_query("CREATE INDEX IF NOT EXISTS idx_relationships_type ON relationships (type);")
self._execute_query("CREATE INDEX IF NOT EXISTS idx_relationships_source ON relationships (source_id);")
self._execute_query("CREATE INDEX IF NOT EXISTS idx_relationships_target ON relationships (target_id);")
self.conn.commit()
def _serialize_props(self, props):
"""Serializes a dictionary of properties into a JSON string."""
return json.dumps(props) if props else '{}'
def _deserialize_props(self, props_json):
"""Deserializes a JSON string into a dictionary of properties."""
return json.loads(props_json) if props_json else {}
def _find_node(self, label, properties):
"""
Internal method to find a single node by its label and exact properties.
Returns the node's ID or None if not found.
"""
query = "SELECT id FROM nodes WHERE label = ? AND properties = ?"
cursor = self._execute_query(query, (label, self._serialize_props(properties)))
result = cursor.fetchone()
return result['id'] if result else None
def create_node(self, label, properties=None):
"""
SQL equivalent of Cypher's CREATE statement for a node.
"""
props_json = self._serialize_props(properties)
query = "INSERT INTO nodes (label, properties) VALUES (?, ?)"
cursor = self._execute_query(query, (label, props_json))
self.conn.commit()
return cursor.lastrowid
def merge_node(self, label, properties):
"""
SQL equivalent of Cypher's MERGE statement for a node.
"""
node_id = self._find_node(label, properties)
if node_id:
return node_id
else:
return self.create_node(label, properties)
def create_relationship(self, source_id, target_id, rel_type, properties=None):
"""
SQL equivalent of Cypher's CREATE statement for a relationship.
"""
props_json = self._serialize_props(properties)
query = "INSERT INTO relationships (source_id, target_id, type, properties) VALUES (?, ?, ?, ?)"
cursor = self._execute_query(query, (source_id, target_id, rel_type, props_json))
self.conn.commit()
return cursor.lastrowid
def merge_relationship(self, source_id, target_id, rel_type, properties=None):
"""
SQL equivalent of Cypher's MERGE statement for a relationship.
"""
props_json = self._serialize_props(properties or {})
query = "SELECT id FROM relationships WHERE source_id = ? AND target_id = ? AND type = ? AND properties = ?"
cursor = self._execute_query(query, (source_id, target_id, rel_type, props_json))
result = cursor.fetchone()
if result:
return result['id']
else:
return self.create_relationship(source_id, target_id, rel_type, properties)
def match(self, source_spec=None, rel_spec=None, target_spec=None):
"""
A simplified SQL equivalent of Cypher's MATCH statement.
"""
query = """
SELECT
s.id as source_id, s.label as source_label, s.properties as source_props,
r.id as rel_id, r.type as rel_type, r.properties as rel_props,
t.id as target_id, t.label as target_label, t.properties as target_props
FROM relationships AS r
JOIN nodes AS s ON r.source_id = s.id
JOIN nodes AS t ON r.target_id = t.id
"""
where_clauses = []
params = []
def build_clauses(spec, alias):
if spec:
label_or_type, props = spec
if label_or_type:
col = 'label' if alias != 'r' else 'type'
where_clauses.append(f"{alias}.{col} = ?")
params.append(label_or_type)
if props:
for key, value in props.items():
where_clauses.append(f"json_extract({alias}.properties, '$.{key}') = ?")
params.append(value)
build_clauses(source_spec, 's')
build_clauses(rel_spec, 'r')
build_clauses(target_spec, 't')
if where_clauses:
query += " WHERE " + " AND ".join(where_clauses)
cursor = self._execute_query(query, tuple(params))
results = []
for row in cursor.fetchall():
results.append({
'source': {'id': row['source_id'], 'label': row['source_label'], 'properties': self._deserialize_props(row['source_props'])},
'relationship': {'id': row['rel_id'], 'type': row['rel_type'], 'properties': self._deserialize_props(row['rel_props'])},
'target': {'id': row['target_id'], 'label': row['target_label'], 'properties': self._deserialize_props(row['target_props'])}
})
return results
def export_to_html(self, filename="knowledge_graph.html"):
"""
Exports the entire graph to a self-contained, interactive HTML file
using D3.js for visualization.
"""
# 1. Fetch all nodes and relationships from the database
nodes_cursor = self._execute_query("SELECT id, label, properties FROM nodes")
nodes_data = [dict(row) for row in nodes_cursor.fetchall()]
rels_cursor = self._execute_query("SELECT source_id, target_id, type, properties FROM relationships")
rels_data = [dict(row) for row in rels_cursor.fetchall()]
# 2. Prepare data for D3.js
# D3 needs nodes and links (source/target by index)
node_id_to_index = {node['id']: i for i, node in enumerate(nodes_data)}
graph_nodes = []
for node in nodes_data:
props = self._deserialize_props(node['properties'])
# Use 'name' or 'title' for the display label, otherwise use the node's label
display_label = props.get('name') or props.get('title', node['label'])
graph_nodes.append({
"id": node['id'],
"label": node['label'],
"display_label": display_label,
"properties": props
})
graph_links = []
for rel in rels_data:
source_index = node_id_to_index.get(rel['source_id'])
target_index = node_id_to_index.get(rel['target_id'])
if source_index is not None and target_index is not None:
graph_links.append({
"source": source_index,
"target": target_index,
"type": rel['type'],
"properties": self._deserialize_props(rel['properties'])
})
graph_json = json.dumps({"nodes": graph_nodes, "links": graph_links}, indent=2)
# 3. Create the HTML content with embedded D3.js
html_template = f"""
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Knowledge Graph</title>
<style>
body {{ font-family: sans-serif; margin: 0; background-color: #f0f0f0; }}
svg {{ display: block; }}
.node circle {{ stroke: #fff; stroke-width: 1.5px; }}
.node text {{ pointer-events: none; font-size: 12px; fill: #333; }}
.link {{ stroke-opacity: 0.6; }}
.link-label {{ font-size: 10px; fill: #666; text-anchor: middle; }}
</style>
</head>
<body>
<script src="https://d3js.org/d3.v7.min.js"></script>
<script>
const graphData = {graph_json};
const width = window.innerWidth;
const height = window.innerHeight;
const color = d3.scaleOrdinal(d3.schemeCategory10);
const simulation = d3.forceSimulation(graphData.nodes)
.force("link", d3.forceLink(graphData.links).id(d => d.index).distance(150))
.force("charge", d3.forceManyBody().strength(-300))
.force("center", d3.forceCenter(width / 2, height / 2));
const svg = d3.create("svg")
.attr("width", width)
.attr("height", height)
.attr("viewBox", [0, 0, width, height]);
// Arrowhead marker definition
svg.append("defs").append("marker")
.attr("id", "arrowhead")
.attr("viewBox", "0 -5 10 10")
.attr("refX", 25)
.attr("refY", 0)
.attr("orient", "auto")
.attr("markerWidth", 8)
.attr("markerHeight", 8)
.append("path")
.attr("d", "M0,-5L10,0L0,5")
.attr("fill", "#999");
const link = svg.append("g")
.attr("class", "link")
.selectAll("line")
.data(graphData.links)
.join("line")
.attr("stroke", "#999")
.attr("stroke-width", d => Math.sqrt(d.value || 1))
.attr("marker-end", "url(#arrowhead)");
const linkLabel = svg.append("g")
.attr("class", "link-label")
.selectAll("text")
.data(graphData.links)
.join("text")
.text(d => d.type);
const node = svg.append("g")
.attr("class", "node")
.selectAll("g")
.data(graphData.nodes)
.join("g")
.call(drag(simulation));
node.append("circle")
.attr("r", 10)
.attr("fill", d => color(d.label));
node.append("text")
.text(d => d.display_label)
.attr("x", 12)
.attr("y", 4);
node.append("title")
.text(d => JSON.stringify(d.properties, null, 2));
simulation.on("tick", () => {{
link
.attr("x1", d => d.source.x)
.attr("y1", d => d.source.y)
.attr("x2", d => d.target.x)
.attr("y2", d => d.target.y);
node
.attr("transform", d => `translate(${{d.x}}, ${{d.y}})`);
linkLabel
.attr("x", d => (d.source.x + d.target.x) / 2)
.attr("y", d => (d.source.y + d.target.y) / 2);
}});
function drag(simulation) {{
function dragstarted(event) {{
if (!event.active) simulation.alphaTarget(0.3).restart();
event.subject.fx = event.subject.x;
event.subject.fy = event.subject.y;
}}
function dragged(event) {{
event.subject.fx = event.x;
event.subject.fy = event.y;
}}
function dragended(event) {{
if (!event.active) simulation.alphaTarget(0);
event.subject.fx = null;
event.subject.fy = null;
}}
return d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended);
}}
document.body.appendChild(svg.node());
</script>
</body>
</html>
"""
# 4. Write to file
with open(filename, 'w', encoding='utf-8') as f:
f.write(html_template)
print(f"\\nGraph exported to '{filename}'. Opening in web browser.")
# Open the file in the default web browser
webbrowser.open('file://' + os.path.realpath(filename))
def close(self):
"""Closes the database connection."""
self.conn.close()
def cleanup(self):
"""Closes the connection and deletes the database file."""
self.close()
if os.path.exists(self.db_path):
os.remove(self.db_path)
print(f"Cleaned up and removed {self.db_path}")
def main():
"""Main function to demonstrate the KnowledgeGraph class."""
# Use a temporary file that will be cleaned up
db_file = "movies_kg.db"
kg = KnowledgeGraph(db_file)
print("Initializing Knowledge Graph...")
print("\n--- Step 1: Populating the graph using MERGE ---")
# Merge Nodes
tom_hanks = kg.merge_node('Person', {'name': 'Tom Hanks', 'born': 1956})
meg_ryan = kg.merge_node('Person', {'name': 'Meg Ryan', 'born': 1961})
nora_ephron = kg.merge_node('Person', {'name': 'Nora Ephron', 'born': 1941})
kevin_bacon = kg.merge_node('Person', {'name': 'Kevin Bacon', 'born': 1958})
sleepless = kg.merge_node('Movie', {'title': 'Sleepless in Seattle', 'released': 1993})
mail = kg.merge_node('Movie', {'title': "You've Got Mail", 'released': 1998})
apollo13 = kg.merge_node('Movie', {'title': 'Apollo 13', 'released': 1995})
print("Nodes for people and movies merged.")
# Merge Relationships
kg.merge_relationship(tom_hanks, sleepless, 'ACTED_IN', {'roles': ['Sam Baldwin']})
kg.merge_relationship(meg_ryan, sleepless, 'ACTED_IN', {'roles': ['Annie Reed']})
kg.merge_relationship(nora_ephron, sleepless, 'DIRECTED')
kg.merge_relationship(tom_hanks, mail, 'ACTED_IN', {'roles': ['Joe Fox']})
kg.merge_relationship(meg_ryan, mail, 'ACTED_IN', {'roles': ['Kathleen Kelly']})
kg.merge_relationship(nora_ephron, mail, 'DIRECTED')
kg.merge_relationship(tom_hanks, apollo13, 'ACTED_IN', {'roles': ['Jim Lovell']})
kg.merge_relationship(kevin_bacon, apollo13, 'ACTED_IN', {'roles': ['Jack Swigert']})
print("Relationships between actors, directors, and movies merged.")
print("\n--- Step 2: Querying the graph using MATCH ---")
print("\nQuery: Find all movies Tom Hanks acted in.")
results = kg.match(
source_spec=('Person', {'name': 'Tom Hanks'}),
rel_spec=('ACTED_IN', None),
target_spec=('Movie', None)
)
for path in results:
movie = path['target']['properties']
print(f" - Tom Hanks acted in '{movie['title']}' released in {movie['released']}.")
# --- Step 3: Exporting the graph to HTML ---
kg.export_to_html()
# The database connection is closed but the file is kept
kg.close()
print(f"\nDatabase '{db_file}' is preserved. You can run this script again.")
# To clean up automatically, you can uncomment the next line
# kg.cleanup()
if __name__ == '__main__':
main()
Initializing Knowledge Graph...
--- Step 1: Populating the graph using MERGE ---
Nodes for people and movies merged.
Relationships between actors, directors, and movies merged.
--- Step 2: Querying the graph using MATCH ---
Query: Find all movies Tom Hanks acted in.
- Tom Hanks acted in 'Sleepless in Seattle' released in 1993.
- Tom Hanks acted in 'You've Got Mail' released in 1998.
- Tom Hanks acted in 'Apollo 13' released in 1995.
\nGraph exported to 'knowledge_graph.html'. Opening in web browser.
Database 'movies_kg.db' is preserved. You can run this script again.
=== Code execution complete ===
From <https://gemini.google.com/app/> Google Gemini 2.5 Pro
Comments
Post a Comment