NewSyncIterator iterates values in a column of a parquet file. Required values are the numeric column index and the row groups to iterate over. The column index can be found by name using GetColumnIndexByPath. By default it does the minimal amount of work, which is to scan for matches (using the g
(ctx context.Context, rgs []pq.RowGroup, column int, opts ...SyncIteratorOpt)
| 484 | // |
| 485 | // Not safe for concurrent use. |
| 486 | func NewSyncIterator(ctx context.Context, rgs []pq.RowGroup, column int, opts ...SyncIteratorOpt) *SyncIterator { |
| 487 | // Assign row group bounds. |
| 488 | // Lower bound is inclusive |
| 489 | // Upper bound is exclusive, points at the first row of the next group |
| 490 | rn := EmptyRowNumber() |
| 491 | rgsMin := make([]RowNumber, len(rgs)) |
| 492 | rgsMax := make([]RowNumber, len(rgs)) |
| 493 | for i, rg := range rgs { |
| 494 | rgsMin[i] = rn |
| 495 | rgsMax[i] = rn |
| 496 | rgsMax[i].Skip(rg.NumRows() + 1) |
| 497 | rn.Skip(rg.NumRows()) |
| 498 | } |
| 499 | |
| 500 | // Create the iterator |
| 501 | i := &SyncIterator{ |
| 502 | column: column, |
| 503 | rgs: rgs, |
| 504 | readSize: 1000, // default value |
| 505 | rgsMin: rgsMin, |
| 506 | rgsMax: rgsMax, |
| 507 | curr: EmptyRowNumber(), |
| 508 | at: IteratorResult{}, |
| 509 | maxDefinitionLevel: MaxDefinitionLevel, // default value |
| 510 | } |
| 511 | |
| 512 | // Apply options |
| 513 | for _, opt := range opts { |
| 514 | opt(i) |
| 515 | } |
| 516 | |
| 517 | // Default value, always clone results until we have |
| 518 | // checked the column type and determined it isn't needed. |
| 519 | clone := true |
| 520 | |
| 521 | // Always disable intern/clone for non-pointer types that don't need it. |
| 522 | // This eliminates unnecessary function calls on the hot path. |
| 523 | if len(rgs) > 0 { |
| 524 | cc := rgs[0].ColumnChunks()[column] |
| 525 | switch cc.Type().Kind() { |
| 526 | case pq.ByteArray, pq.FixedLenByteArray: |
| 527 | default: |
| 528 | clone = false |
| 529 | if i.interner != nil { |
| 530 | i.interner.Close() |
| 531 | } |
| 532 | i.interner = nil |
| 533 | } |
| 534 | } |
| 535 | |
| 536 | if i.selectAs != "" { |
| 537 | // Preallocate 1 entry with the given name. |
| 538 | i.at.Entries = []struct { |
| 539 | Key string |
| 540 | Value pq.Value |
| 541 | }{ |
| 542 | {Key: i.selectAs}, |
| 543 | } |