#3673 Neo4j Knowledge Graph Export to MS Access
#3673 Neo4j Knowledge Graph Export to MS Access
#3673 Provide a python application to replicate a Neo4j knowledge graph in MS Access
While a direct 1:1 replication is not possible due to fundamental differences between graph and relational databases, you can effectively translate a Neo4j knowledge graph into a relational model within MS Access using a Python application. This involves mapping Neo4j's nodes and relationships into a series of interconnected tables in Access.
This Python script uses the py2neo
library to connect to and query your Neo4j database and the pyodbc
library to create and populate an MS Access database file (.accdb
).
Conceptual Mapping: Neo4j to MS Access
The core idea is to deconstruct the graph into a relational format:
Nodes become records in tables. Each distinct node label (e.g.,
Person
,Company
) will have its own table in MS Access. Each row in that table will represent a single node.Node properties become table columns. The properties of a node (e.g.,
name
,birthdate
) become the columns of that label's table. A unique ID from Neo4j is stored to identify the node.Relationships become a linking table. A central "relationships" table is created to store the connections. Each row defines a single relationship by storing the start node's ID, the end node's ID, and the relationship type (e.g.,
WORKS_AT
).
Python Application
This application will perform the following steps:
Connect to your Neo4j instance.
Fetch all distinct node labels and relationship types.
Create a new MS Access database file.
For each node label, create a corresponding table in Access.
Fetch all nodes and insert them into their respective Access tables.
Create a single table for all relationships.
Fetch all relationships and insert them into the relationships table, linking nodes by their IDs.
Prerequisites
You must have Python installed, along with the necessary libraries and an ODBC driver for MS Access.
Install Python Libraries:
pip install py2neo pyodbc
Install MS Access ODBC Driver: You need the Microsoft Access Database Engine Redistributable. Download and install it from the official Microsoft website. Ensure you install the version (32-bit or 64-bit) that matches your Python installation.
The Python Script
Save the following code as neo4j_to_access.py
. You will need to update the connection details for your Neo4j database.
import pyodbc
from py2neo import Graph
import os
# --- Configuration ---
# Neo4j connection details
NEO4J_URI = "bolt://localhost:7687"
NEO4J_USER = "neo4j"
NEO4J_PASSWORD = "your_neo4j_password"
# MS Access database file path
ACCESS_DB_FILE = os.path.abspath("knowledge_graph.accdb")
# Note: The driver name may vary based on your installation.
# Check ODBC Data Sources in Windows for the exact name.
ACCESS_DRIVER = '{Microsoft Access Driver (*.mdb, *.accdb)}'
# --- Main Application ---
def get_neo4j_graph():
"""Connects to the Neo4j database and returns a graph object."""
try:
graph = Graph(NEO4J_URI, auth=(NEO4J_USER, NEO4J_PASSWORD))
# Verify connection
graph.run("MATCH (n) RETURN count(n) AS node_count")
print("✅ Successfully connected to Neo4j.")
return graph
except Exception as e:
print(f"❌ Could not connect to Neo4j: {e}")
return None
def setup_access_db():
"""Creates a new MS Access DB file and returns a connection object."""
# Remove old DB file if it exists
if os.path.exists(ACCESS_DB_FILE):
os.remove(ACCESS_DB_FILE)
print(f"Removed existing database file: {ACCESS_DB_FILE}")
try:
# Create the database file
conn_str = f'DRIVER={ACCESS_DRIVER};DBQ={ACCESS_DB_FILE};'
conn = pyodbc.connect(conn_str, autocommit=True)
print(f"✅ MS Access database created at: {ACCESS_DB_FILE}")
return conn
except pyodbc.Error as ex:
sqlstate = ex.args[0]
print(f"❌ Error creating/connecting to Access DB: {sqlstate}")
print("Ensure the Microsoft Access ODBC driver is correctly installed (32-bit vs 64-bit).")
return None
def replicate_nodes(graph, access_conn):
"""Fetches nodes from Neo4j, creates tables, and inserts data into Access."""
cursor = access_conn.cursor()
print("\n--- Replicating Nodes ---")
# Get all distinct node labels
labels = graph.run("CALL db.labels()").to_series()
print(f"Found node labels: {list(labels)}")
for label in labels:
print(f"Processing label: '{label}'")
# Get nodes for the current label
nodes = list(graph.nodes.match(label))
if not nodes:
print(f" No nodes found for label '{label}'. Skipping.")
continue
# Create table for this label
# Get properties from the first node to define columns
sample_node = nodes[0]
# Use a set to handle nodes with varying properties gracefully
all_properties = set()
for node in nodes:
all_properties.update(node.keys())
# Define columns: neo4j_id is the primary key
columns = ["neo4j_id LONG PRIMARY KEY"]
# Use Memo for potentially long string properties, otherwise Text
for prop in sorted(list(all_properties)):
col_type = "MEMO" if isinstance(sample_node[prop], str) and len(str(sample_node[prop])) > 255 else "TEXT(255)"
# Sanitize property names for SQL (e.g., replace spaces)
safe_prop = prop.replace(' ', '_')
columns.append(f"[{safe_prop}] {col_type}")
create_table_sql = f"CREATE TABLE [{label}] ({', '.join(columns)})"
try:
cursor.execute(create_table_sql)
print(f" Created table '[{label}]' in MS Access.")
except pyodbc.Error as e:
print(f" Error creating table for '{label}': {e}")
continue
# Insert data
for node in nodes:
props = dict(node)
# Ensure all possible columns are accounted for, even if null
row_values = [node.identity] + [props.get(key) for key in sorted(list(all_properties))]
placeholders = ', '.join(['?'] * len(row_values))
col_names = "[neo4j_id], " + ", ".join([f"[{p.replace(' ', '_')}]" for p in sorted(list(all_properties))])
insert_sql = f"INSERT INTO [{label}] ({col_names}) VALUES ({placeholders})"
cursor.execute(insert_sql, row_values)
print(f" Inserted {len(nodes)} nodes into '[{label}]'.")
cursor.commit()
cursor.close()
def replicate_relationships(graph, access_conn):
"""Fetches relationships and inserts them into a single relationships table."""
cursor = access_conn.cursor()
print("\n--- Replicating Relationships ---")
# Create a single table for all relationships
create_rel_table_sql = """
CREATE TABLE Relationships (
neo4j_rel_id LONG PRIMARY KEY,
start_node_id LONG,
end_node_id LONG,
type TEXT(255)
)
"""
try:
cursor.execute(create_rel_table_sql)
print(" Created table 'Relationships' in MS Access.")
except pyodbc.Error as e:
print(f" Error creating relationships table: {e}")
cursor.close()
return
# Fetch all relationships
query = "MATCH (a)-[r]->(b) RETURN id(r) AS rel_id, id(a) AS start_id, id(b) AS end_id, type(r) AS rel_type"
relationships = graph.run(query).data()
if not relationships:
print(" No relationships found in Neo4j.")
cursor.close()
return
# Insert data
insert_sql = "INSERT INTO Relationships (neo4j_rel_id, start_node_id, end_node_id, type) VALUES (?, ?, ?, ?)"
for rel in relationships:
cursor.execute(insert_sql, rel['rel_id'], rel['start_id'], rel['end_id'], rel['rel_type'])
print(f" Inserted {len(relationships)} relationships.")
cursor.commit()
cursor.close()
def main():
"""Main function to run the replication process."""
print("Starting Neo4j to MS Access Replication...")
graph = get_neo4j_graph()
if not graph:
return
access_conn = setup_access_db()
if not access_conn:
return
replicate_nodes(graph, access_conn)
replicate_relationships(graph, access_conn)
access_conn.close()
print("\n✅ Replication complete!")
if __name__ == "__main__":
main()
How to Run the Application
Update Configuration: Open the
neo4j_to_access.py
file and change theNEO4J_URI
,NEO4J_USER
, andNEO4J_PASSWORD
constants to match your Neo4j database credentials.Execute from Terminal: Navigate to the directory where you saved the file and run it:
python neo4j_to_access.py
Check the Output: A new file named
knowledge_graph.accdb
will be created in the same directory. You can open this file with MS Access to view the tables and data.
Limitations and Considerations
Querying: You lose the power of Cypher graph queries. In Access, you will need to use SQL
JOIN
operations on theRelationships
table to traverse connections, which is less efficient for complex, multi-hop queries.Schema Flexibility: Neo4j allows nodes with the same label to have different properties (schema-less). This script creates a fixed schema in Access based on the properties found. If some nodes of a certain label are missing a property, that field will be
NULL
in the Access table.Relationship Properties: This basic script does not handle properties on relationships. To support this, you would need to add more columns to the
Relationships
table in Access.Performance: For very large graphs, this script can be slow as it fetches all data into memory. For enterprise-scale graphs, a more robust batch-processing approach would be necessary.
Could I produce a D3 visulation in HTML and JavaScript from the MS Access database?
Yes, you absolutely can, and it's a great way to bring your connected data back to life visually.
However, you can't connect your HTML and JavaScript directly to the MS Access file. Web browsers are restricted from accessing local files for security reasons. You'll need an intermediate step to get the data from Access into a format your web page can read, like JSON.
Here are the two main ways to do it.
Method 1: Exporting to a Web-Friendly Format (The Simple Approach) 💡
This is the easiest method and is perfect for static visualizations where the data doesn't change often. The process is to run queries in Access and export the results as JSON or CSV files, which your D3.js code can then load.
Step 1: Prepare Your Data in MS Access
You need to generate two datasets from your database: one for the nodes and one for the relationships (links).
Nodes: Create a
UNION
query in Access to combine all your node tables (e.g.,Person
,Company
) into a single list. You need at least a uniqueid
and alabel
orname
.Links: Your
Relationships
table is almost ready. You just need to rename the columns to what D3 typically expects:source
(forstart_node_id
) andtarget
(forend_node_id
).
Step 2: Export Data to JSON
MS Access doesn't have a built-in JSON export function. The easiest way is to use the Python script from before with a small modification to write files instead of writing to another database.
Alternatively, export the query results to CSV, which you can then convert to JSON using an online tool or a simple script.
Your final JSON file (graph.json
) should look like this. This structure is exactly what D3's force-directed graph layout expects.
{
"nodes": [
{ "id": 1, "label": "Alice", "group": "Person" },
{ "id": 2, "label": "Bob", "group": "Person" },
{ "id": 101, "label": "MegaCorp", "group": "Company" }
],
"links": [
{ "source": 1, "target": 101, "type": "WORKS_AT" },
{ "source": 2, "target": 101, "type": "WORKS_AT" }
]
}
Step 3: Create the D3 Visualization
Now, create an HTML file that uses D3.js to load your graph.json
file and render it.
Method 2: Using a Server-Side Script (The Dynamic Approach) ⚙️
If your data in Access changes frequently, you can run a lightweight web server (e.g., using Python with Flask) that queries the Access database in real-time and serves the data to your web page.
The Workflow:
Your D3.js code in the browser requests data from a local URL (e.g.,
http://localhost:5000/graph-data
).A Python Flask server running on your machine receives this request.
The Python script connects to your MS Access database using
pyodbc
, runs the queries to get nodes and links, and formats the result as JSON.The server sends this JSON back to the browser.
D3.js receives the data and renders the visualization.
This approach is more complex to set up but creates a dynamic application where the visualization always reflects the current state of the database.
Example D3.js Code
Regardless of which method you use to get the JSON, the D3.js code will be very similar. Here is a basic index.html
file that creates a force-directed graph from a graph.json
file.
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>D3 Knowledge Graph</title>
<style>
body { font-family: sans-serif; }
svg { border: 1px solid #ccc; }
.links line { stroke: #999; stroke-opacity: 0.6; }
.nodes circle { stroke: #fff; stroke-width: 1.5px; }
.labels text { pointer-events: none; font-size: 10px; }
</style>
</head>
<body>
<h1>Knowledge Graph Visualization</h1>
<svg width="960" height="600"></svg>
<script src="https://d3js.org/d3.v5.min.js"></script>
<script>
const svg = d3.select("svg");
const width = +svg.attr("width");
const height = +svg.attr("height");
// Load the graph data from your JSON file
d3.json("graph.json").then(function(graph) {
// Create a force simulation
const simulation = d3.forceSimulation(graph.nodes)
.force("link", d3.forceLink(graph.links).id(d => d.id).distance(70))
.force("charge", d3.forceManyBody().strength(-150))
.force("center", d3.forceCenter(width / 2, height / 2));
// Define color scale for node groups
const color = d3.scaleOrdinal(d3.schemeCategory10);
// Create links (lines)
const link = svg.append("g")
.attr("class", "links")
.selectAll("line")
.data(graph.links)
.enter().append("line")
.attr("stroke-width", 2);
// Create nodes (circles)
const node = svg.append("g")
.attr("class", "nodes")
.selectAll("circle")
.data(graph.nodes)
.enter().append("circle")
.attr("r", 10)
.attr("fill", d => color(d.group))
.call(d3.drag()
.on("start", dragstarted)
.on("drag", dragged)
.on("end", dragended));
// Add labels to nodes
const label = svg.append("g")
.attr("class", "labels")
.selectAll("text")
.data(graph.nodes)
.enter().append("text")
.attr("dx", 12)
.attr("dy", ".35em")
.text(d => d.label);
// This function runs at each "tick" of the simulation
simulation.on("tick", () => {
link
.attr("x1", d => d.source.x)
.attr("y1", d => d.source.y)
.attr("x2", d => d.target.x)
.attr("y2", d => d.target.y);
node
.attr("cx", d => d.x)
.attr("cy", d => d.y);
label
.attr("x", d => d.x)
.attr("y", d => d.y);
});
// Drag functions
function dragstarted(d) {
if (!d3.event.active) simulation.alphaTarget(0.3).restart();
d.fx = d.x;
d.fy = d.y;
}
function dragged(d) {
d.fx = d3.event.x;
d.fy = d3.event.y;
}
function dragended(d) {
if (!d3.event.active) simulation.alphaTarget(0);
d.fx = null;
d.fy = null;
}
});
</script>
</body>
</html>
From <https://gemini.google.com/app/> Google Gemini 2.5 Pro
Comments
Post a Comment