SPH
StatisticTests.cpp
Go to the documentation of this file.
1 #include "post/StatisticTests.h"
2 #include <algorithm>
3 #include <numeric>
4 
6 
8  SPH_ASSERT(points.size() >= 2);
9  // find the mean
10  PlotPoint mean(0._f, 0._f);
11  for (PlotPoint p : points) {
12  mean += p;
13  }
14  mean /= points.size();
15 
16  Float corr = 0._f;
17  Float normX = 0._f;
18  Float normY = 0._f;
19  for (PlotPoint p : points) {
20  corr += (p.x - mean.x) * (p.y - mean.y);
21  normX += sqr(p.x - mean.x);
22  normY += sqr(p.y - mean.y);
23  }
24  // may be slightly over/below 1/-1 due to round-off errors
25  return corr / (sqrt(normX * normY));
26 }
27 
28 Float Post::chiSquareDistribution(const Float chiSqr, const Float dof) {
29  return 1._f / (pow(2._f, 0.5_f * dof) * std::tgamma(0.5_f * dof)) * pow(chiSqr, 0.5_f * dof - 1._f) *
30  exp(-0.5_f * chiSqr);
31 }
32 
34  SPH_ASSERT(measured.size() == expected.size());
35  Float chiSqr = 0._f;
36  for (Size i = 0; i < measured.size(); ++i) {
37  SPH_ASSERT(measured[i] >= 0._f && expected[i] >= 0._f);
38  if (expected[i] == 0._f) {
39  if (measured[i] == 0._f) {
40  continue;
41  } else {
42  // measured nonzero, but expected is zero -> measured cannot be from the expected
43  // distribution
44  return INFINITY;
45  }
46  }
47  chiSqr += sqr(measured[i] - expected[i]) / expected[i];
48  }
49  return chiSqr;
50 }
51 
52 // Numerical recipes 624
54  constexpr Float eps1 = 1.e-3_f;
55  constexpr Float eps2 = 1.e-8_f;
56 
57  Float Q = 0._f;
58  Float prevTerm = 0._f;
59  for (Size j = 1; j < 100; ++j) {
60  const Float term = (int(isOdd(j)) * 2 - 1) * exp(-2._f * sqr(j) * sqr(x));
61  Q += term;
62  if (abs(term) <= eps1 * prevTerm || abs(term) <= eps2 * Q) {
63  return 2._f * Q;
64  }
65  prevTerm = abs(term);
66  }
67  return 1._f;
68 }
69 
70 static Array<Float> sortData(ArrayView<const Float> data) {
71  Array<Float> sortedData;
72  sortedData.pushAll(data.begin(), data.end());
73  std::sort(sortedData.begin(), sortedData.end(), [](Float p1, Float p2) { return p1 < p2; });
74  return sortedData;
75 }
76 
77 static Array<PlotPoint> makeCdf(ArrayView<const Float> pdf) {
78  Array<Float> sortedPdf = sortData(pdf);
79  Array<PlotPoint> cdf(pdf.size());
80  const Float step = 1._f / (pdf.size() - 1);
81  for (Size i = 0; i < pdf.size(); ++i) {
82  cdf[i] = { sortedPdf[i], i * step };
83  }
84  SPH_ASSERT(cdf.front().y == 0 && cdf.back().y == 1);
85  return cdf;
86 }
87 
88 static Float ksProb(const Float sqrtN, const Float D) {
89  return Post::kolmogorovSmirnovDistribution((sqrtN + 0.12_f + 0.11_f / sqrtN) * D);
90 }
91 
93  const Function<Float(Float)>& expectedCdf) {
94  SPH_ASSERT(data.size() >= 2);
95  Array<PlotPoint> cdf = makeCdf(data);
96 
97  // find the maximum difference (Kolmogorov-Smirnov D)
98  Float D = 0._f;
99  Float prevY = 0._f;
100  for (PlotPoint p : cdf) {
101  const Float expectedY = expectedCdf(p.x);
102  D = max(D, abs(p.y - expectedY), abs(prevY - expectedY));
103  prevY = p.y;
104  }
105  const Float sqrtN = sqrt(Float(data.size()));
106  Float prob = ksProb(sqrtN, D);
107  SPH_ASSERT(prob >= 0._f && prob <= 1._f);
108  return { D, prob };
109 }
110 
112  Array<PlotPoint> cdf1 = makeCdf(data1);
113  Array<PlotPoint> cdf2 = makeCdf(data2);
114 
115  Float D = 0._f;
116  for (Size i = 0, j = 0; i < cdf1.size() && j < cdf2.size();) {
117  if (cdf1[i].x <= cdf2[j].x) {
118  ++i;
119  }
120  if (cdf1[i].x >= cdf2[j].x) {
121  ++j;
122  }
123  D = max(D, abs(cdf1[i].y - cdf2[j].y));
124  }
125 
126  const Float sqrtNe = sqrt(Float(data1.size() * data2.size()) / (data1.size() + data2.size()));
127  Float prob = ksProb(sqrtNe, D);
128  SPH_ASSERT(prob >= 0._f && prob <= 1._f);
129  return { D, prob };
130 }
131 
132 
133 static StaticArray<Float, 4> countQuadrants(const PlotPoint origin, ArrayView<const PlotPoint> data) {
134  StaticArray<Float, 4> quadrants;
135  quadrants.fill(0._f);
136  for (PlotPoint p : data) {
137  if (p.y > origin.y) {
138  p.x > origin.x ? quadrants[0]++ : quadrants[1]++;
139  } else {
140  p.x > origin.x ? quadrants[3]++ : quadrants[2]++;
141  }
142  }
143  for (Float& q : quadrants) {
144  q /= data.size();
145  }
146  return quadrants;
147 }
148 
150  Float D = 0._f;
151  for (PlotPoint p : data) {
152  StaticArray<Float, 4> measuredQuadrants = countQuadrants(p, data);
153  StaticArray<Float, 4> expectedQuadrants = expected(p);
154  for (Size i = 0; i < 4; ++i) {
155  D = max(D, measuredQuadrants[i] - expectedQuadrants[i]);
156  }
157  }
158 
159  const Float sqrtNe = sqrt(Float(data.size()));
160  const Float r = correlationCoefficient(data);
161  const Float prob =
162  kolmogorovSmirnovDistribution(sqrtNe * D / (1._f + sqrt(1._f - sqr(r)) * (0.25_f - 0.75_f / sqrtNe)));
163  SPH_ASSERT(prob >= 0._f && prob <= 1._f);
164  return { D, prob };
165 }
166 
168  return [rangeX, rangeY](PlotPoint p) -> StaticArray<Float, 4> {
169  const Float x = clamp((p.x - rangeX.lower()) / rangeX.size(), 0._f, 1._f);
170  const Float y = clamp((p.y - rangeY.lower()) / rangeY.size(), 0._f, 1._f);
171  return { (1._f - x) * (1._f - y), x * (1._f - y), x * y, (1._f - x) * y };
172  };
173 }
174 
#define SPH_ASSERT(x,...)
Definition: Assert.h:94
NAMESPACE_SPH_BEGIN
Definition: BarnesHut.cpp:13
uint32_t Size
Integral type used to index arrays (by default).
Definition: Globals.h:16
double Float
Precision used withing the code. Use Float instead of float or double where precision is important.
Definition: Globals.h:13
constexpr INLINE T max(const T &f1, const T &f2)
Definition: MathBasic.h:20
constexpr INLINE bool isOdd(const T &f)
Definition: MathBasic.h:40
constexpr INLINE T clamp(const T &f, const T &f1, const T &f2)
Definition: MathBasic.h:35
constexpr INLINE T sqr(const T &f) noexcept
Return a squared value.
Definition: MathUtils.h:67
INLINE T sqrt(const T f)
Return a squared root of a value.
Definition: MathUtils.h:78
constexpr INLINE Float pow(const Float v)
Power for floats.
INLINE T exp(const T f)
Definition: MathUtils.h:269
INLINE auto abs(const T &f)
Definition: MathUtils.h:276
#define NAMESPACE_SPH_END
Definition: Object.h:12
Object providing safe access to continuous memory of data.
Definition: ArrayView.h:17
INLINE TCounter size() const
Definition: ArrayView.h:101
INLINE Iterator< StorageType > begin()
Definition: ArrayView.h:55
INLINE Iterator< StorageType > end()
Definition: ArrayView.h:63
INLINE Iterator< StorageType > end() noexcept
Definition: Array.h:462
INLINE TCounter size() const noexcept
Definition: Array.h:193
INLINE Iterator< StorageType > begin() noexcept
Definition: Array.h:450
void pushAll(const TIter first, const TIter last)
Definition: Array.h:312
Object representing a 1D interval of real numbers.
Definition: Interval.h:17
INLINE Float lower() const
Returns lower bound of the interval.
Definition: Interval.h:74
INLINE Float size() const
Returns the size of the interval.
Definition: Interval.h:89
Array with fixed number of allocated elements.
Definition: StaticArray.h:19
void fill(const T &value)
Assigns a value to all constructed elements of the array.
Definition: StaticArray.h:121
INLINE TCounter size() const
Returns the current size of the array (number of constructed elements).
Definition: StaticArray.h:147
Float kolmogorovSmirnovDistribution(const Float x)
Float chiSquareDistribution(const Float chiSqr, const Float dof)
Float correlationCoefficient(ArrayView< const PlotPoint > points)
KsFunction getUniformKsFunction(Interval rangeX, Interval rangeY)
Float chiSquareTest(ArrayView< const Float > measured, ArrayView< const Float > expected)
KsResult kolmogorovSmirnovTest(ArrayView< const Float > data, const Function< Float(Float)> &expectedCdf)
One-dimensional Kolmogorov-Smirnov test with given CDF of expected probability distribution.
Point in 2D plot.
Definition: Point.h:16
Float y
Definition: Point.h:17
Float x
Definition: Point.h:17