Export the entire graph as a ``.parquet.zip`` archive. Returns the zip bytes ready to be written to a file. Writes parquet files directly into the zip to avoid double-buffering.
(store: GraphStore)
| 44 | |
| 45 | |
| 46 | def export_database(store: GraphStore) -> bytes: |
| 47 | """Export the entire graph as a ``.parquet.zip`` archive. |
| 48 | |
| 49 | Returns the zip bytes ready to be written to a file. |
| 50 | Writes parquet files directly into the zip to avoid double-buffering. |
| 51 | """ |
| 52 | stats = store.get_stats() |
| 53 | if stats["total_nodes"] == 0: |
| 54 | raise ValueError("Nothing to export — the graph is empty.") |
| 55 | |
| 56 | all_node_ids: set[str] = set() |
| 57 | zip_buf = io.BytesIO() |
| 58 | with zipfile.ZipFile(zip_buf, "w", compression=zipfile.ZIP_DEFLATED) as zf: |
| 59 | # Group nodes by type and write one parquet file per type. |
| 60 | # Include IndexMetadata (excluded from stats) so it survives export/import. |
| 61 | type_counts = dict(stats["nodes_by_type"]) |
| 62 | metadata = store.get_metadata() |
| 63 | if metadata: |
| 64 | type_counts["IndexMetadata"] = len(metadata) |
| 65 | |
| 66 | for node_type, count in type_counts.items(): |
| 67 | nodes = store.list_nodes(node_type, limit=count + 1000) |
| 68 | if not nodes: |
| 69 | continue |
| 70 | |
| 71 | rows = [ |
| 72 | { |
| 73 | "id": n["id"], |
| 74 | "name": n["name"], |
| 75 | "properties": _marshal_props(n.get("properties")), |
| 76 | } |
| 77 | for n in nodes |
| 78 | ] |
| 79 | all_node_ids.update(r["id"] for r in rows) |
| 80 | table = pa.Table.from_pylist(rows) |
| 81 | |
| 82 | buf = io.BytesIO() |
| 83 | pq.write_table(table, buf) |
| 84 | zf.writestr(f"nodes_{node_type}.parquet", buf.getvalue()) |
| 85 | logger.debug("Exported %s: %d nodes", node_type, len(nodes)) |
| 86 | |
| 87 | # Export relationships |
| 88 | rels = store.list_relationships_for_nodes(all_node_ids) |
| 89 | if rels: |
| 90 | rel_rows = [ |
| 91 | { |
| 92 | "from": r["source_id"], |
| 93 | "to": r["target_id"], |
| 94 | "id": r["id"], |
| 95 | "type": r["type"], |
| 96 | "properties": _marshal_props(r.get("properties")), |
| 97 | } |
| 98 | for r in rels |
| 99 | ] |
| 100 | table = pa.Table.from_pylist(rel_rows) |
| 101 | |
| 102 | buf = io.BytesIO() |
| 103 | pq.write_table(table, buf) |