estimateProtoSizeFromParquetRow estimates the byte-length of the corresponding trace in tempopb.Trace format. This method is unreasonably effective. Testing on real blocks shows 90-98% accuracy.
(row parquet.Row)
| 314 | // trace in tempopb.Trace format. This method is unreasonably effective. |
| 315 | // Testing on real blocks shows 90-98% accuracy. |
| 316 | func estimateProtoSizeFromParquetRow(row parquet.Row) (size int) { |
| 317 | for _, v := range row { |
| 318 | size++ // Field identifier |
| 319 | |
| 320 | switch v.Kind() { |
| 321 | case parquet.ByteArray: |
| 322 | size += len(v.ByteArray()) |
| 323 | |
| 324 | case parquet.FixedLenByteArray: |
| 325 | size += len(v.ByteArray()) |
| 326 | |
| 327 | default: |
| 328 | // All other types (ints, bools) approach 1 byte per value |
| 329 | size++ |
| 330 | } |
| 331 | } |
| 332 | return |
| 333 | } |
| 334 | |
| 335 | // estimateMarshalledSizeFromParquetRow estimates the byte size as marshalled into parquet. |
| 336 | // this is a very rough estimate and is generally 66%-100% of actual size. |