| 1456 | return detect_rows |
| 1457 | |
| 1458 | def detect_colspecs( |
| 1459 | self, infer_nrows: int = 100, skiprows: set[int] | None = None |
| 1460 | ) -> list[tuple[int, int]]: |
| 1461 | # Regex escape the delimiters |
| 1462 | delimiters = "".join([rf"\{x}" for x in self.delimiter]) |
| 1463 | pattern = re.compile(f"([^{delimiters}]+)") |
| 1464 | rows = self.get_rows(infer_nrows, skiprows) |
| 1465 | if not rows: |
| 1466 | raise EmptyDataError("No rows from which to infer column width") |
| 1467 | max_len = max(map(len, rows)) |
| 1468 | mask = np.zeros(max_len + 1, dtype=int) |
| 1469 | if self.comment is not None: |
| 1470 | rows = [row.partition(self.comment)[0] for row in rows] |
| 1471 | for row in rows: |
| 1472 | for m in pattern.finditer(row): |
| 1473 | mask[m.start() : m.end()] = 1 |
| 1474 | shifted = np.roll(mask, 1) |
| 1475 | shifted[0] = 0 |
| 1476 | edges = np.where((mask ^ shifted) == 1)[0] |
| 1477 | edge_pairs = list(zip(edges[::2], edges[1::2], strict=True)) |
| 1478 | return edge_pairs |
| 1479 | |
| 1480 | def __next__(self) -> list[str]: |
| 1481 | if self.buffer is not None: |