estimateProtoSizeFromParquetRow estimates the byte-length of the corresponding trace in tempopb.Trace format. This method is unreasonably effective. Testing on real blocks shows 90-98% accuracy.
(row parquet.Row)
| 338 | // trace in tempopb.Trace format. This method is unreasonably effective. |
| 339 | // Testing on real blocks shows 90-98% accuracy. |
| 340 | func estimateProtoSizeFromParquetRow(row parquet.Row) (size int) { |
| 341 | for _, v := range row { |
| 342 | size++ // Field identifier |
| 343 | |
| 344 | switch v.Kind() { |
| 345 | case parquet.ByteArray: |
| 346 | size += len(v.ByteArray()) |
| 347 | |
| 348 | case parquet.FixedLenByteArray: |
| 349 | size += len(v.ByteArray()) |
| 350 | |
| 351 | default: |
| 352 | // All other types (ints, bools) approach 1 byte per value |
| 353 | size++ |
| 354 | } |
| 355 | } |
| 356 | return |
| 357 | } |
| 358 | |
| 359 | // estimateMarshalledSizeFromParquetRow estimates the byte size as marshalled into parquet. |
| 360 | // this is a very rough estimate and is generally 66%-100% of actual size. |