MCPcopy
hub / github.com/pandas-dev/pandas / detect_colspecs

Method detect_colspecs

pandas/io/parsers/python_parser.py:1458–1478  ·  view source on GitHub ↗
(
        self, infer_nrows: int = 100, skiprows: set[int] | None = None
    )

Source from the content-addressed store, hash-verified

1456 return detect_rows
1457
1458 def detect_colspecs(
1459 self, infer_nrows: int = 100, skiprows: set[int] | None = None
1460 ) -> list[tuple[int, int]]:
1461 # Regex escape the delimiters
1462 delimiters = "".join([rf"\{x}" for x in self.delimiter])
1463 pattern = re.compile(f"([^{delimiters}]+)")
1464 rows = self.get_rows(infer_nrows, skiprows)
1465 if not rows:
1466 raise EmptyDataError("No rows from which to infer column width")
1467 max_len = max(map(len, rows))
1468 mask = np.zeros(max_len + 1, dtype=int)
1469 if self.comment is not None:
1470 rows = [row.partition(self.comment)[0] for row in rows]
1471 for row in rows:
1472 for m in pattern.finditer(row):
1473 mask[m.start() : m.end()] = 1
1474 shifted = np.roll(mask, 1)
1475 shifted[0] = 0
1476 edges = np.where((mask ^ shifted) == 1)[0]
1477 edge_pairs = list(zip(edges[::2], edges[1::2], strict=True))
1478 return edge_pairs
1479
1480 def __next__(self) -> list[str]:
1481 if self.buffer is not None:

Callers 1

__init__Method · 0.95

Calls 7

get_rowsMethod · 0.95
EmptyDataErrorClass · 0.90
maxFunction · 0.85
partitionMethod · 0.80
startMethod · 0.80
joinMethod · 0.45
whereMethod · 0.45

Tested by

no test coverage detected