Return the exact mean and sum of square deviations of sequence data. Calculations are done in a single pass, allowing the input to be an iterator. If given *c* is used the mean; otherwise, it is calculated from the data. Use the *c* argument with care, as it can lead to garbage results
(data, c=None)
| 1509 | |
| 1510 | |
| 1511 | def _ss(data, c=None): |
| 1512 | """Return the exact mean and sum of square deviations of sequence data. |
| 1513 | |
| 1514 | Calculations are done in a single pass, allowing the input to be an iterator. |
| 1515 | |
| 1516 | If given *c* is used the mean; otherwise, it is calculated from the data. |
| 1517 | Use the *c* argument with care, as it can lead to garbage results. |
| 1518 | |
| 1519 | """ |
| 1520 | if c is not None: |
| 1521 | T, ssd, count = _sum((d := x - c) * d for x in data) |
| 1522 | return (T, ssd, c, count) |
| 1523 | |
| 1524 | count = 0 |
| 1525 | types = set() |
| 1526 | types_add = types.add |
| 1527 | sx_partials = defaultdict(int) |
| 1528 | sxx_partials = defaultdict(int) |
| 1529 | |
| 1530 | for typ, values in groupby(data, type): |
| 1531 | types_add(typ) |
| 1532 | for n, d in map(_exact_ratio, values): |
| 1533 | count += 1 |
| 1534 | sx_partials[d] += n |
| 1535 | sxx_partials[d] += n * n |
| 1536 | |
| 1537 | if not count: |
| 1538 | ssd = c = Fraction(0) |
| 1539 | |
| 1540 | elif None in sx_partials: |
| 1541 | # The sum will be a NAN or INF. We can ignore all the finite |
| 1542 | # partials, and just look at this special one. |
| 1543 | ssd = c = sx_partials[None] |
| 1544 | assert not _isfinite(ssd) |
| 1545 | |
| 1546 | else: |
| 1547 | sx = sum(Fraction(n, d) for d, n in sx_partials.items()) |
| 1548 | sxx = sum(Fraction(n, d*d) for d, n in sxx_partials.items()) |
| 1549 | # This formula has poor numeric properties for floats, |
| 1550 | # but with fractions it is exact. |
| 1551 | ssd = (count * sxx - sx * sx) / count |
| 1552 | c = sx / count |
| 1553 | |
| 1554 | T = reduce(_coerce, types, int) # or raise TypeError |
| 1555 | return (T, ssd, c, count) |
| 1556 | |
| 1557 | |
| 1558 | def _isfinite(x): |
no test coverage detected