MCPcopy
hub / github.com/pandas-dev/pandas / _read_new_value_labels

Method _read_new_value_labels

pandas/io/stata.py:1466–1511  ·  view source on GitHub ↗

Reads value labels with variable length strings (108 and later format)

(self)

Source from the content-addressed store, hash-verified

1464 return s.decode("latin-1")
1465
1466 def _read_new_value_labels(self) -> None:
1467 """Reads value labels with variable length strings (108 and later format)"""
1468 if self._format_version >= 117:
1469 self._path_or_buf.seek(self._seek_value_labels)
1470 else:
1471 assert self._dtype is not None
1472 offset = self._nobs * self._dtype.itemsize
1473 self._path_or_buf.seek(self._data_location + offset)
1474
1475 while True:
1476 if self._format_version >= 117:
1477 if self._path_or_buf.read(5) == b"</val": # <lbl>
1478 break # end of value label table
1479
1480 slength = self._path_or_buf.read(4)
1481 if not slength:
1482 break # end of value label table (format < 117), or end-of-file
1483 if self._format_version == 108:
1484 labname = self._decode(self._path_or_buf.read(9))
1485 elif self._format_version <= 117:
1486 labname = self._decode(self._path_or_buf.read(33))
1487 else:
1488 labname = self._decode(self._path_or_buf.read(129))
1489 self._path_or_buf.read(3) # padding
1490
1491 n = self._read_uint32()
1492 txtlen = self._read_uint32()
1493 off = np.frombuffer(
1494 self._path_or_buf.read(4 * n), dtype=f"{self._byteorder}i4", count=n
1495 )
1496 val = np.frombuffer(
1497 self._path_or_buf.read(4 * n), dtype=f"{self._byteorder}i4", count=n
1498 )
1499 ii = np.argsort(off)
1500 off = off[ii]
1501 val = val[ii]
1502 txt = self._path_or_buf.read(txtlen)
1503 self._value_label_dict[labname] = {}
1504 for i in range(n):
1505 end = off[i + 1] if i < n - 1 else txtlen
1506 self._value_label_dict[labname][val[i]] = self._decode(
1507 txt[off[i] : end]
1508 )
1509
1510 if self._format_version >= 117:
1511 self._path_or_buf.read(6) # </lbl>
1512
1513 def _read_old_value_labels(self) -> None:
1514 """Reads value labels with fixed-length strings (105 and earlier format)"""

Callers 1

_read_value_labelsMethod · 0.95

Calls 5

_decodeMethod · 0.95
_read_uint32Method · 0.95
seekMethod · 0.45
readMethod · 0.45
argsortMethod · 0.45

Tested by

no test coverage detected