(manpage_file, mapping_file, db_path)
| 79 | |
| 80 | |
| 81 | def migrate(manpage_file, mapping_file, db_path): |
| 82 | conn = sqlite3.connect(db_path) |
| 83 | conn.execute("PRAGMA foreign_keys = ON") |
| 84 | conn.execute("PRAGMA journal_mode = WAL") |
| 85 | conn.executescript(_CREATE_SCHEMA) |
| 86 | |
| 87 | # ------------------------------------------------------------------ # |
| 88 | # 1. manpages # |
| 89 | # ------------------------------------------------------------------ # |
| 90 | logger.info("Migrating manpages from %s …", manpage_file) |
| 91 | oid_to_source = {} # mongo ObjectId string -> source path |
| 92 | |
| 93 | with open(manpage_file) as fh: |
| 94 | lines = fh.readlines() |
| 95 | |
| 96 | inserted = skipped = 0 |
| 97 | for line in lines: |
| 98 | line = line.strip() |
| 99 | if not line: |
| 100 | continue |
| 101 | doc = json.loads(line) |
| 102 | oid = _oid(doc["_id"]) |
| 103 | |
| 104 | synopsis = doc.get("synopsis") or "" |
| 105 | if isinstance(synopsis, bytes): |
| 106 | synopsis = synopsis.decode("utf-8") |
| 107 | |
| 108 | paragraphs = [_para_to_store(p) for p in doc.get("paragraphs", [])] |
| 109 | aliases = doc.get("aliases", []) |
| 110 | # aliases may be [[name, score], ...] already |
| 111 | aliases_json = json.dumps(aliases) |
| 112 | |
| 113 | dashless_opts = _coerce_bool( |
| 114 | doc.get("partial_match", doc.get("partialmatch")), False |
| 115 | ) |
| 116 | has_subcommands = _coerce_bool( |
| 117 | doc.get("multi_cmd", doc.get("multicommand")), False |
| 118 | ) |
| 119 | # Convert legacy boolean to subcommands list — the old MongoDB |
| 120 | # data only stored a flag, not actual subcommand names. |
| 121 | subcommands_json = json.dumps([]) if not has_subcommands else json.dumps([]) |
| 122 | nested_cmd = doc.get("nested_cmd", doc.get("nestedcmd", False)) |
| 123 | nested_cmd_json = json.dumps(nested_cmd) |
| 124 | |
| 125 | try: |
| 126 | conn.execute( |
| 127 | """INSERT INTO parsed_manpages |
| 128 | (source, name, synopsis, paragraphs, aliases, |
| 129 | dashless_opts, subcommands, updated, nested_cmd) |
| 130 | VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)""", |
| 131 | ( |
| 132 | doc["source"], |
| 133 | doc["name"], |
| 134 | synopsis or None, |
| 135 | json.dumps(paragraphs), |
| 136 | aliases_json, |
| 137 | int(dashless_opts), |
| 138 | subcommands_json, |
no test coverage detected