Iterparse xml nodes. This method will read in local disk, decompressed XML files for elements and underlying descendants using iterparse, a method to iterate through an XML tree without holding entire XML tree in memory. Raises ------ TypeEr
(self, iterparse: Callable)
| 281 | return dicts |
| 282 | |
| 283 | def _iterparse_nodes(self, iterparse: Callable) -> list[dict[str, str | None]]: |
| 284 | """ |
| 285 | Iterparse xml nodes. |
| 286 | |
| 287 | This method will read in local disk, decompressed XML files for elements |
| 288 | and underlying descendants using iterparse, a method to iterate through |
| 289 | an XML tree without holding entire XML tree in memory. |
| 290 | |
| 291 | Raises |
| 292 | ------ |
| 293 | TypeError |
| 294 | * If ``iterparse`` is not a dict or its dict value is not list-like. |
| 295 | ParserError |
| 296 | * If ``path_or_buffer`` is not a physical file on disk or file-like object. |
| 297 | * If no data is returned from selected items in ``iterparse``. |
| 298 | |
| 299 | Notes |
| 300 | ----- |
| 301 | Namespace URIs will be removed from return node values. Also, |
| 302 | elements with missing children or attributes in submitted list |
| 303 | will have optional keys filled with None values. |
| 304 | """ |
| 305 | |
| 306 | dicts: list[dict[str, str | None]] = [] |
| 307 | row: dict[str, str | None] | None = None |
| 308 | |
| 309 | if not isinstance(self.iterparse, dict): |
| 310 | raise TypeError( |
| 311 | f"{type(self.iterparse).__name__} is not a valid type for iterparse" |
| 312 | ) |
| 313 | |
| 314 | row_node = next(iter(self.iterparse.keys())) if self.iterparse else "" |
| 315 | if not is_list_like(self.iterparse[row_node]): |
| 316 | raise TypeError( |
| 317 | f"{type(self.iterparse[row_node])} is not a valid type " |
| 318 | "for value in iterparse" |
| 319 | ) |
| 320 | |
| 321 | if (not hasattr(self.path_or_buffer, "read")) and ( |
| 322 | not isinstance(self.path_or_buffer, (str, PathLike)) |
| 323 | or is_url(self.path_or_buffer) |
| 324 | or is_fsspec_url(self.path_or_buffer) |
| 325 | or ( |
| 326 | isinstance(self.path_or_buffer, str) |
| 327 | and self.path_or_buffer.startswith(("<?xml", "<")) |
| 328 | ) |
| 329 | or infer_compression(self.path_or_buffer, "infer") is not None |
| 330 | ): |
| 331 | raise ParserError( |
| 332 | "iterparse is designed for large XML files that are fully extracted on " |
| 333 | "local disk and not as compressed files or online sources." |
| 334 | ) |
| 335 | |
| 336 | iterparse_repeats = len(self.iterparse[row_node]) != len( |
| 337 | set(self.iterparse[row_node]) |
| 338 | ) |
| 339 | |
| 340 | for event, elem in iterparse(self.path_or_buffer, events=("start", "end")): |
no test coverage detected