diff --git a/pygal/config.py b/pygal/config.py index 733afa6..3e4122a 100644 --- a/pygal/config.py +++ b/pygal/config.py @@ -344,7 +344,8 @@ class Config(CommonConfig): mode = Key( None, str, "Value", "Sets the mode to be used. " "(Currently only supported on box plot)", - "May be %s" % ' or '.join(["1.5IQR", "extremes", "tukey"])) + "May be %s" % ' or '.join(["1.5IQR", "extremes", "tukey", "stdev",\ + "pstdev"])) order_min = Key( None, int, "Value", "Minimum order of scale, defaults to None") diff --git a/pygal/graph/box.py b/pygal/graph/box.py index e662675..1469bee 100644 --- a/pygal/graph/box.py +++ b/pygal/graph/box.py @@ -51,7 +51,7 @@ class Box(Graph): if self.mode == "extremes": return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' \ % tuple(map(sup, x[1:6])) - elif self.mode == "tukey": + elif self.mode in ["tukey", "stdev", "pstdev"]: return 'Min: %s Lower Whisker: %s Q1: %s Q2: %s Q3: %s '\ 'Upper Whisker: %s Max: %s' % tuple(map(sup, x)) else: @@ -195,8 +195,15 @@ class Box(Graph): Extremes mode: (mode='extremes') Return a 7-tuple of 2x minimum, Q1, Median, Q3, and 2x maximum for a list of numeric values. - Outliers (Tukey) mode: (mode='tukey') + Tukey mode: (mode='tukey') Return a 7-tuple of min, q[0..4], max and a list of outliers + Outliers are considered values x: x < q1 - IQR or x > q3 + IQR + SD mode: (mode='stdev') + Return a 7-tuple of min, q[0..4], max and a list of outliers + Outliers are considered values x: x < q2 - SD or x > q2 + SD + SDp mode: (mode='pstdev') + Return a 7-tuple of min, q[0..4], max and a list of outliers + Outliers are considered values x: x < q2 - SDp or x > q2 + SDp The iterator values may include None values. @@ -212,6 +219,21 @@ class Box(Graph): else: # seq has an odd length return seq[n // 2] + def mean(seq): + return sum(seq) /len(seq) + + def stdev(seq): + m = mean(seq) + l = len(seq) + v = sum((n - m)**2 for n in seq) / (l - 1) # variance + return v**0.5 # sqrt + + def pstdev(seq): + m = mean(seq) + l = len(seq) + v = sum((n - m)**2 for n in seq) / l # variance + return v**0.5 # sqrt + outliers = [] # sort the copy in case the originals must stay in original order s = sorted([x for x in values if x is not None]) @@ -252,7 +274,25 @@ class Box(Graph): q0 = s[b0] q4 = s[b4-1] outliers = s[:b0] + s[b4:] - + elif mode == 'stdev': + # one standard deviation above and below the mean of the data + sd = stdev(s) + print s, sd + b0 = bisect_left(s, q2 - sd) + b4 = bisect_right(s, q2 + sd) + q0 = s[b0] + q4 = s[b4-1] + outliers = s[:b0] + s[b4:] + elif mode == 'pstdev': + # one population standard deviation above and below + # the mean of the data + sdp = pstdev(s) + print s, sd + b0 = bisect_left(s, q2 - sdp) + b4 = bisect_right(s, q2 + sdp) + q0 = s[b0] + q4 = s[b4-1] + outliers = s[:b0] + s[b4:] else: q0 = q1 - 1.5 * iqr q4 = q3 + 1.5 * iqr diff --git a/pygal/test/test_box.py b/pygal/test/test_box.py index 95105d6..a3f98f7 100644 --- a/pygal/test/test_box.py +++ b/pygal/test/test_box.py @@ -113,7 +113,7 @@ def test_quartiles_tukey(): assert max_s == 75 assert outliers == [75] - # one more outlier, -30 + # one more outlier, 77 c = [6, 7, 15, 36, 39, 40, 41, 42, 43, 47, 49, 75, 77] (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points( c, mode='tukey') @@ -125,6 +125,18 @@ def test_quartiles_tukey(): assert 75 in outliers assert 77 in outliers +def test_quartiles_stdev(): + a = [35, 42, 35, 41, 36, 6, 12, 51, 33, 27, 46, 36, 44, 53, 75, 46, 16,\ + 51, 45, 29, 25, 26, 54, 61, 27, 40, 23, 34, 51, 37] + SD = 14.67 + (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points( + a, mode='stdev') + assert min_s == min(a) + assert max_s == max(a) + assert q2 == 36.5 + assert q4 <= q2 + SD + assert q0 >= q2 - SD + assert all(n in outliers for n in [6, 12, 16, 53, 54, 61, 75]) def test_simple_box(): box = ghostedBox()