Browse Source

new stdev and pstdev modes of Box plot

pull/226/head
Jan Richter 9 years ago
parent
commit
04bc001793
  1. 3
      pygal/config.py
  2. 46
      pygal/graph/box.py
  3. 14
      pygal/test/test_box.py

3
pygal/config.py

@ -344,7 +344,8 @@ class Config(CommonConfig):
mode = Key(
None, str, "Value", "Sets the mode to be used. "
"(Currently only supported on box plot)",
"May be %s" % ' or '.join(["1.5IQR", "extremes", "tukey"]))
"May be %s" % ' or '.join(["1.5IQR", "extremes", "tukey", "stdev",\
"pstdev"]))
order_min = Key(
None, int, "Value", "Minimum order of scale, defaults to None")

46
pygal/graph/box.py

@ -51,7 +51,7 @@ class Box(Graph):
if self.mode == "extremes":
return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' \
% tuple(map(sup, x[1:6]))
elif self.mode == "tukey":
elif self.mode in ["tukey", "stdev", "pstdev"]:
return 'Min: %s Lower Whisker: %s Q1: %s Q2: %s Q3: %s '\
'Upper Whisker: %s Max: %s' % tuple(map(sup, x))
else:
@ -195,8 +195,15 @@ class Box(Graph):
Extremes mode: (mode='extremes')
Return a 7-tuple of 2x minimum, Q1, Median, Q3,
and 2x maximum for a list of numeric values.
Outliers (Tukey) mode: (mode='tukey')
Tukey mode: (mode='tukey')
Return a 7-tuple of min, q[0..4], max and a list of outliers
Outliers are considered values x: x < q1 - IQR or x > q3 + IQR
SD mode: (mode='stdev')
Return a 7-tuple of min, q[0..4], max and a list of outliers
Outliers are considered values x: x < q2 - SD or x > q2 + SD
SDp mode: (mode='pstdev')
Return a 7-tuple of min, q[0..4], max and a list of outliers
Outliers are considered values x: x < q2 - SDp or x > q2 + SDp
The iterator values may include None values.
@ -212,6 +219,21 @@ class Box(Graph):
else: # seq has an odd length
return seq[n // 2]
def mean(seq):
return sum(seq) /len(seq)
def stdev(seq):
m = mean(seq)
l = len(seq)
v = sum((n - m)**2 for n in seq) / (l - 1) # variance
return v**0.5 # sqrt
def pstdev(seq):
m = mean(seq)
l = len(seq)
v = sum((n - m)**2 for n in seq) / l # variance
return v**0.5 # sqrt
outliers = []
# sort the copy in case the originals must stay in original order
s = sorted([x for x in values if x is not None])
@ -252,7 +274,25 @@ class Box(Graph):
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
elif mode == 'stdev':
# one standard deviation above and below the mean of the data
sd = stdev(s)
print s, sd
b0 = bisect_left(s, q2 - sd)
b4 = bisect_right(s, q2 + sd)
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
elif mode == 'pstdev':
# one population standard deviation above and below
# the mean of the data
sdp = pstdev(s)
print s, sd
b0 = bisect_left(s, q2 - sdp)
b4 = bisect_right(s, q2 + sdp)
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
else:
q0 = q1 - 1.5 * iqr
q4 = q3 + 1.5 * iqr

14
pygal/test/test_box.py

@ -113,7 +113,7 @@ def test_quartiles_tukey():
assert max_s == 75
assert outliers == [75]
# one more outlier, -30
# one more outlier, 77
c = [6, 7, 15, 36, 39, 40, 41, 42, 43, 47, 49, 75, 77]
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
c, mode='tukey')
@ -125,6 +125,18 @@ def test_quartiles_tukey():
assert 75 in outliers
assert 77 in outliers
def test_quartiles_stdev():
a = [35, 42, 35, 41, 36, 6, 12, 51, 33, 27, 46, 36, 44, 53, 75, 46, 16,\
51, 45, 29, 25, 26, 54, 61, 27, 40, 23, 34, 51, 37]
SD = 14.67
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
a, mode='stdev')
assert min_s == min(a)
assert max_s == max(a)
assert q2 == 36.5
assert q4 <= q2 + SD
assert q0 >= q2 - SD
assert all(n in outliers for n in [6, 12, 16, 53, 54, 61, 75])
def test_simple_box():
box = ghostedBox()

Loading…
Cancel
Save