(scope string, settings heuristicSettings, summary attributeSummary, numRowGroups int)
| 832 | } |
| 833 | |
| 834 | func printFullSummary(scope string, settings heuristicSettings, summary attributeSummary, numRowGroups int) error { |
| 835 | var ( |
| 836 | w = tabwriter.NewWriter(os.Stdout, 0, 0, 2, ' ', 0) |
| 837 | err error |
| 838 | totalBytes = summary.totalBytes() |
| 839 | totalIntegerCount = summary.totalIntegerCount() |
| 840 | ) |
| 841 | |
| 842 | fmt.Println("--------------------------------") |
| 843 | fmt.Printf("- %s Summary -\n", scope) |
| 844 | fmt.Println("--------------------------------") |
| 845 | |
| 846 | fmt.Printf("Total rows: %d\n", summary.rowCount) |
| 847 | fmt.Printf("Total string values: %d\n", summary.totalStringCount()) |
| 848 | |
| 849 | fmt.Println("") |
| 850 | attrList := topN(settings.NumStringAttr, summary.attributes) |
| 851 | if len(attrList) > 0 { |
| 852 | fmt.Printf("Top %d attributes by size\n", len(attrList)) |
| 853 | |
| 854 | for _, a := range attrList { |
| 855 | var ( |
| 856 | name = a.name |
| 857 | thisBytes = a.totalBytes |
| 858 | percentage = float64(thisBytes) / float64(totalBytes) * 100 |
| 859 | totalOccurences = a.cardinality.totalOccurrences() |
| 860 | distinct = a.cardinality.distinctValueCount() |
| 861 | avgReuse = float64(totalOccurences) / float64(distinct) |
| 862 | totalSize = a.cardinality.avgSizePerRowGroup(numRowGroups) |
| 863 | blobText = "" |
| 864 | percentOfRowsText = "" |
| 865 | shouldDedicateText = "" |
| 866 | ) |
| 867 | |
| 868 | if _, ok := summary.dedicated[a.name]; ok { |
| 869 | name = a.name + " (dedicated)" |
| 870 | } |
| 871 | |
| 872 | if settings.BlobThresholdBytes > 0 && totalSize >= settings.BlobThresholdBytes { |
| 873 | blobText = "(blob)" |
| 874 | } |
| 875 | |
| 876 | if summary.rowCount > 0 { |
| 877 | percentOfRows := float64(totalOccurences) / float64(summary.rowCount) |
| 878 | percentOfRowsText = fmt.Sprintf("(%.2f%% of rows)", percentOfRows*100) |
| 879 | if percentOfRows >= settings.StrThresholdPercent { |
| 880 | shouldDedicateText = "✅ Recommended dedicated column" |
| 881 | } |
| 882 | } |
| 883 | |
| 884 | _, err := fmt.Fprintf(w, "name: %s\t size: %s\t (%.2f%%)\tcount: %d\t%s\t distinct: %d\t avg reuse: %.2f\t avg rowgroup content (dict + body): %s %s\t%s\n", |
| 885 | name, |
| 886 | humanize.Bytes(thisBytes), |
| 887 | percentage, |
| 888 | totalOccurences, |
| 889 | percentOfRowsText, |
| 890 | distinct, |
| 891 | avgReuse, |
no test coverage detected