Read observations from SAS Xport file, returning as data frame. Parameters ---------- nrows : int Number of rows to read from data file; if None, read whole file. Returns ------- A DataFrame.
(self, nrows: int | None = None)
| 450 | return miss |
| 451 | |
| 452 | def read(self, nrows: int | None = None) -> pd.DataFrame: |
| 453 | """Read observations from SAS Xport file, returning as data frame. |
| 454 | |
| 455 | Parameters |
| 456 | ---------- |
| 457 | nrows : int |
| 458 | Number of rows to read from data file; if None, read whole |
| 459 | file. |
| 460 | |
| 461 | Returns |
| 462 | ------- |
| 463 | A DataFrame. |
| 464 | """ |
| 465 | if nrows is None: |
| 466 | nrows = self.nobs |
| 467 | |
| 468 | read_lines = min(nrows, self.nobs - self._lines_read) |
| 469 | read_len = read_lines * self.record_length |
| 470 | if read_len <= 0: |
| 471 | self.close() |
| 472 | raise StopIteration |
| 473 | raw = self.filepath_or_buffer.read(read_len) |
| 474 | data = np.frombuffer(raw, dtype=self._dtype, count=read_lines) |
| 475 | |
| 476 | df_data = {} |
| 477 | for j, x in enumerate(self.columns): |
| 478 | vec = data["s" + str(j)] |
| 479 | ntype = self.fields[j]["ntype"] |
| 480 | if ntype == "numeric": |
| 481 | vec = _handle_truncated_float_vec(vec, self.fields[j]["field_length"]) |
| 482 | miss = self._missing_double(vec) |
| 483 | v = _parse_float_vec(vec) |
| 484 | v[miss] = np.nan |
| 485 | elif self.fields[j]["ntype"] == "char": |
| 486 | v = [y.rstrip() for y in vec] |
| 487 | |
| 488 | if self._encoding is not None: |
| 489 | v = [y.decode(self._encoding) for y in v] |
| 490 | |
| 491 | df_data.update({x: v}) |
| 492 | df = pd.DataFrame(df_data) |
| 493 | |
| 494 | if self._index is None: |
| 495 | df.index = pd.Index(range(self._lines_read, self._lines_read + read_lines)) |
| 496 | else: |
| 497 | df = df.set_index(self._index) |
| 498 | |
| 499 | self._lines_read += read_lines |
| 500 | |
| 501 | return df |
no test coverage detected