PostgreSQL's linear-regression aggregates.
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 | # ====================================================================== # PostgreSQL's linear-regression aggregates. `RegrSlope`/`RegrIntercept` fit y = # slope·x + intercept, `Corr` is the correlation (−1..1), and `RegrR2` is the # goodness of fit. Note the keyword form `y=`, `x=` (dependent variable first, # as in SQL). # ====================================================================== # --- Django ORM --- Measurement.objects.values("series").annotate( n=RegrCount(y="y", x="x"), corr=Corr(y="y", x="x"), slope=RegrSlope(y="y", x="x"), intercept=RegrIntercept(y="y", x="x"), r2=RegrR2(y="y", x="x"), cov=CovarPop(y="y", x="x"), avg_x=RegrAvgX(y="y", x="x"), avg_y=RegrAvgY(y="y", x="x"), sxx=RegrSXX(y="y", x="x"), syy=RegrSYY(y="y", x="x"), sxy=RegrSXY(y="y", x="x"), ).order_by("series") # --- generated SQL --- # SELECT "examples_measurement"."series" AS "series", REGR_COUNT("examples_measurement"."y", "examples_measurement"."x") AS "n", CORR("examples_measurement"."y", "examples_measurement"."x") AS "corr", REGR_SLOPE("examples_measurement"."y", "examples_measurement"."x") AS "slope", REGR_INTERCEPT("examples_measurement"."y", "examples_measurement"."x") AS "intercept", REGR_R2("examples_measurement"."y", "examples_measurement"."x") AS "r2", COVAR_POP("examples_measurement"."y", "examples_measurement"."x") AS "cov", REGR_AVGX("examples_measurement"."y", "examples_measurement"."x") AS "avg_x", REGR_AVGY("examples_measurement"."y", "examples_measurement"."x") AS "avg_y", REGR_SXX("examples_measurement"."y", "examples_measurement"."x") AS "sxx", REGR_SYY("examples_measurement"."y", "examples_measurement"."x") AS "syy", REGR_SXY("examples_measurement"."y", "examples_measurement"."x") AS "sxy" # FROM "examples_measurement" # GROUP BY 1 # ORDER BY 1 ASC # ---------------------------------------------------------------------- # series 'linear' (n=10) # ---------------------------------------------------------------------- # Result: # fit: y = 1.994 * x + 1.033 # corr=1.000 r2=1.000 covar_pop=16.450 # avg_x=5.50 avg_y=12.00 # sxx=82.500 syy=328.100 sxy=164.500 (slope = sxy/sxx = 1.994) # ---------------------------------------------------------------------- # series 'noisy' (n=10) # ---------------------------------------------------------------------- # Result: # fit: y = -0.018 * x + 4.800 # corr=-0.020 r2=0.000 covar_pop=-0.150 # avg_x=5.50 avg_y=4.70 # sxx=82.500 syy=68.100 sxy=-1.500 (slope = sxy/sxx = -0.018) |