Browse Source

Merge branch 'feature-box-modes' of https://github.com/dzejar/pygal into dzejar-feature-box-modes

pull/219/merge
Florian Mounier 10 years ago
parent
commit
f103cbdb23
  1. 3
      pygal/config.py
  2. 108
      pygal/graph/box.py
  3. 81
      pygal/test/test_box.py

3
pygal/config.py

@ -344,7 +344,8 @@ class Config(CommonConfig):
mode = Key( mode = Key(
None, str, "Value", "Sets the mode to be used. " None, str, "Value", "Sets the mode to be used. "
"(Currently only supported on box plot)", "(Currently only supported on box plot)",
"May be %s" % ' or '.join(["1.5IQR", "extremes"])) "May be %s" % ' or '.join(["1.5IQR", "extremes", "tukey", "stdev",\
"pstdev"]))
order_min = Key( order_min = Key(
None, int, "Value", "Minimum order of scale, defaults to None") None, int, "Value", "Minimum order of scale, defaults to None")

108
pygal/graph/box.py

@ -24,6 +24,7 @@ from __future__ import division
from pygal.graph.graph import Graph from pygal.graph.graph import Graph
from pygal.util import compute_scale, decorate from pygal.util import compute_scale, decorate
from pygal._compat import is_list_like from pygal._compat import is_list_like
from bisect import bisect_left, bisect_right
class Box(Graph): class Box(Graph):
@ -48,9 +49,14 @@ class Box(Graph):
def format_maybe_quartile(x): def format_maybe_quartile(x):
if is_list_like(x): if is_list_like(x):
if self.mode == "extremes": if self.mode == "extremes":
return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' % tuple(map(sup, x)) return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' \
% tuple(map(sup, x[1:6]))
elif self.mode in ["tukey", "stdev", "pstdev"]:
return 'Min: %s Lower Whisker: %s Q1: %s Q2: %s Q3: %s '\
'Upper Whisker: %s Max: %s' % tuple(map(sup, x))
else: else:
return 'Q1: %s Q2: %s Q3: %s' % tuple(map(sup, x[1:4])) # 1.5IQR mode
return 'Q1: %s Q2: %s Q3: %s' % tuple(map(sup, x[2:5]))
else: else:
return sup(x) return sup(x)
return format_maybe_quartile return format_maybe_quartile
@ -61,7 +67,9 @@ class Box(Graph):
within the rendering process within the rendering process
""" """
for serie in self.series: for serie in self.series:
serie.values = self._box_points(serie.values, self.mode) serie.values, serie.outliers = \
self._box_points(serie.values, self.mode)
if self._min: if self._min:
self._box.ymin = min(self._min, self.zero) self._box.ymin = min(self._min, self.zero)
@ -91,8 +99,8 @@ class Box(Graph):
@property @property
def _len(self): def _len(self):
"""Len is always 5 here""" """Len is always 7 here"""
return 5 return 7
def _boxf(self, serie): def _boxf(self, serie):
""" """
@ -112,11 +120,12 @@ class Box(Graph):
metadata) metadata)
val = self._format(serie.values) val = self._format(serie.values)
x_center, y_center = self._draw_box(box, serie.values, serie.index) x_center, y_center = self._draw_box(box, serie.values[1:6],
serie.outliers, serie.index)
self._tooltip_data(box, val, x_center, y_center, classes="centered") self._tooltip_data(box, val, x_center, y_center, classes="centered")
self._static_value(serie_node, val, x_center, y_center) self._static_value(serie_node, val, x_center, y_center)
def _draw_box(self, parent_node, quartiles, box_index): def _draw_box(self, parent_node, quartiles, outliers, box_index):
""" """
Return the center of a bounding box defined by a box plot. Return the center of a bounding box defined by a box plot.
Draws a box plot on self.svg. Draws a box plot on self.svg.
@ -164,6 +173,17 @@ class Box(Graph):
width=width, width=width,
class_='subtle-fill reactive tooltip-trigger') class_='subtle-fill reactive tooltip-trigger')
# draw outliers
for o in outliers:
self.svg.node(
parent_node,
tag='circle',
cx=left_edge+width/2,
cy=self.view.y(o),
r=3,
class_='subtle-fill reactive tooltip-trigger')
return (left_edge + width / 2, self.view.y( return (left_edge + width / 2, self.view.y(
sum(quartiles) / len(quartiles))) sum(quartiles) / len(quartiles)))
@ -171,11 +191,20 @@ class Box(Graph):
def _box_points(values, mode='1.5IQR'): def _box_points(values, mode='1.5IQR'):
""" """
Default mode: (mode='1.5IQR' or unset) Default mode: (mode='1.5IQR' or unset)
Return a 5-tuple of Q1 - 1.5 * IQR, Q1, Median, Q3, Return a 7-tuple of min, Q1 - 1.5 * IQR, Q1, Median, Q3,
and Q3 + 1.5 * IQR for a list of numeric values. Q3 + 1.5 * IQR and max for a list of numeric values.
Extremes mode: (mode='extremes') Extremes mode: (mode='extremes')
Return a 5-tuple of minimum, Q1, Median, Q3, Return a 7-tuple of 2x minimum, Q1, Median, Q3,
and maximum for a list of numeric values. and 2x maximum for a list of numeric values.
Tukey mode: (mode='tukey')
Return a 7-tuple of min, q[0..4], max and a list of outliers
Outliers are considered values x: x < q1 - IQR or x > q3 + IQR
SD mode: (mode='stdev')
Return a 7-tuple of min, q[0..4], max and a list of outliers
Outliers are considered values x: x < q2 - SD or x > q2 + SD
SDp mode: (mode='pstdev')
Return a 7-tuple of min, q[0..4], max and a list of outliers
Outliers are considered values x: x < q2 - SDp or x > q2 + SDp
The iterator values may include None values. The iterator values may include None values.
@ -191,11 +220,29 @@ class Box(Graph):
else: # seq has an odd length else: # seq has an odd length
return seq[n // 2] return seq[n // 2]
def mean(seq):
return sum(seq) /len(seq)
def stdev(seq):
m = mean(seq)
l = len(seq)
v = sum((n - m)**2 for n in seq) / (l - 1) # variance
return v**0.5 # sqrt
def pstdev(seq):
m = mean(seq)
l = len(seq)
v = sum((n - m)**2 for n in seq) / l # variance
return v**0.5 # sqrt
outliers = []
# sort the copy in case the originals must stay in original order # sort the copy in case the originals must stay in original order
s = sorted([x for x in values if x is not None]) s = sorted([x for x in values if x is not None])
n = len(s) n = len(s)
if not n: if not n:
return 0, 0, 0, 0, 0 return (0, 0, 0, 0, 0, 0, 0), []
elif n == 1:
return (s[0], s[0], s[0], s[0], s[0], s[0], s[0]), []
else: else:
q2 = median(s) q2 = median(s)
# See 'Method 3' in http://en.wikipedia.org/wiki/Quartile # See 'Method 3' in http://en.wikipedia.org/wiki/Quartile
@ -209,17 +256,46 @@ class Box(Graph):
elif n % 4 == 1: # n is of form 4n + 1 where n >= 1 elif n % 4 == 1: # n is of form 4n + 1 where n >= 1
m = (n - 1) // 4 m = (n - 1) // 4
q1 = 0.25 * s[m-1] + 0.75 * s[m] q1 = 0.25 * s[m-1] + 0.75 * s[m]
q3 = 0.75 * s[3*m] + 0.25 * s[3*m + 1] q3 = 0.75 * s[3*m] + 0.25 * s[3*m+1]
else: # n is of form 4n + 3 where n >= 1 else: # n is of form 4n + 3 where n >= 1
m = (n - 3) // 4 m = (n - 3) // 4
q1 = 0.75 * s[m] + 0.25 * s[m+1] q1 = 0.75 * s[m] + 0.25 * s[m+1]
q3 = 0.25 * s[3*m+1] + 0.75 * s[3*m+2] q3 = 0.25 * s[3*m+1] + 0.75 * s[3*m+2]
iqr = q3 - q1 iqr = q3 - q1
min_s = s[0]
max_s = s[-1]
if mode == 'extremes': if mode == 'extremes':
q0 = min(s) q0 = min_s
q4 = max(s) q4 = max_s
elif mode == 'tukey':
# the lowest datum still within 1.5 IQR of the lower quartile,
# and the highest datum still within 1.5 IQR of the upper
# quartile [Tukey box plot, Wikipedia ]
b0 = bisect_left(s, q1 - 1.5 * iqr)
b4 = bisect_right(s, q3 + 1.5 * iqr)
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
elif mode == 'stdev':
# one standard deviation above and below the mean of the data
sd = stdev(s)
b0 = bisect_left(s, q2 - sd)
b4 = bisect_right(s, q2 + sd)
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
elif mode == 'pstdev':
# one population standard deviation above and below
# the mean of the data
sdp = pstdev(s)
b0 = bisect_left(s, q2 - sdp)
b4 = bisect_right(s, q2 + sdp)
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
else: else:
# 1.5IQR mode
q0 = q1 - 1.5 * iqr q0 = q1 - 1.5 * iqr
q4 = q3 + 1.5 * iqr q4 = q3 + 1.5 * iqr
return q0, q1, q2, q3, q4 return (min_s, q0, q1, q2, q3, q4, max_s), outliers

81
pygal/test/test_box.py

@ -22,7 +22,7 @@ from pygal import Box as ghostedBox
def test_quartiles(): def test_quartiles():
a = [-2.0, 3.0, 4.0, 5.0, 8.0] # odd test data a = [-2.0, 3.0, 4.0, 5.0, 8.0] # odd test data
q0, q1, q2, q3, q4 = Box._box_points(a) (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(a)
assert q1 == 7.0 / 4.0 assert q1 == 7.0 / 4.0
assert q2 == 4.0 assert q2 == 4.0
@ -31,17 +31,17 @@ def test_quartiles():
assert q4 == 23 / 4.0 + 6.0 # q3 + 1.5 * iqr assert q4 == 23 / 4.0 + 6.0 # q3 + 1.5 * iqr
b = [1.0, 4.0, 6.0, 8.0] # even test data b = [1.0, 4.0, 6.0, 8.0] # even test data
q0, q1, q2, q3, q4 = Box._box_points(b) (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(b)
assert q2 == 5.0 assert q2 == 5.0
c = [2.0, None, 4.0, 6.0, None] # odd with None elements c = [2.0, None, 4.0, 6.0, None] # odd with None elements
q0, q1, q2, q3, q4 = Box._box_points(c) (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(c)
assert q2 == 4.0 assert q2 == 4.0
d = [4] d = [4]
q0, q1, q2, q3, q4 = Box._box_points(d) (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(d)
assert q0 == 4 assert q0 == 4
assert q1 == 4 assert q1 == 4
@ -49,9 +49,11 @@ def test_quartiles():
assert q3 == 4 assert q3 == 4
assert q4 == 4 assert q4 == 4
def test_quartiles_min_extremes(): def test_quartiles_min_extremes():
a = [-2.0, 3.0, 4.0, 5.0, 8.0] # odd test data a = [-2.0, 3.0, 4.0, 5.0, 8.0] # odd test data
q0, q1, q2, q3, q4 = Box._box_points(a, mode='extremes') (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
a, mode='extremes')
assert q1 == 7.0 / 4.0 assert q1 == 7.0 / 4.0
assert q2 == 4.0 assert q2 == 4.0
@ -60,17 +62,20 @@ def test_quartiles_min_extremes():
assert q4 == 8.0 # max assert q4 == 8.0 # max
b = [1.0, 4.0, 6.0, 8.0] # even test data b = [1.0, 4.0, 6.0, 8.0] # even test data
q0, q1, q2, q3, q4 = Box._box_points(b, mode='extremes') (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
b, mode='extremes')
assert q2 == 5.0 assert q2 == 5.0
c = [2.0, None, 4.0, 6.0, None] # odd with None elements c = [2.0, None, 4.0, 6.0, None] # odd with None elements
q0, q1, q2, q3, q4 = Box._box_points(c, mode='extremes') (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
c, mode='extremes')
assert q2 == 4.0 assert q2 == 4.0
d = [4] d = [4]
q0, q1, q2, q3, q4 = Box._box_points(d, mode='extremes') (min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
d, mode='extremes')
assert q0 == 4 assert q0 == 4
assert q1 == 4 assert q1 == 4
@ -79,6 +84,66 @@ def test_quartiles_min_extremes():
assert q4 == 4 assert q4 == 4
def test_quartiles_tukey():
a = [] # empty data
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
a, mode='tukey')
assert min_s == q0 == q1 == q2 == q3 == q4 == 0
assert outliers == []
# https://en.wikipedia.org/wiki/Quartile example 1
b = [6, 7, 15, 36, 39, 40, 41, 42, 43, 47, 49]
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
b, mode='tukey')
assert min_s == q0 == 6
assert q1 == 20.25
assert q2 == 40
assert q3 == 42.75
assert max_s == q4 == 49
assert outliers == []
# previous test with added outlier 75
c = [6, 7, 15, 36, 39, 40, 41, 42, 43, 47, 49, 75]
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
c, mode='tukey')
assert min_s == q0 == 6
assert q1 == 25.5
assert q2 == (40 + 41) / 2.0
assert q3 == 45
assert max_s == 75
assert outliers == [75]
# one more outlier, 77
c = [6, 7, 15, 36, 39, 40, 41, 42, 43, 47, 49, 75, 77]
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
c, mode='tukey')
assert min_s == q0 == 6
assert q1 == 30.75
assert q2 == 41
assert q3 == 47.5
assert max_s == 77
assert 75 in outliers
assert 77 in outliers
def test_quartiles_stdev():
a = [35, 42, 35, 41, 36, 6, 12, 51, 33, 27, 46, 36, 44, 53, 75, 46, 16,\
51, 45, 29, 25, 26, 54, 61, 27, 40, 23, 34, 51, 37]
SD = 14.67
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
a, mode='stdev')
assert min_s == min(a)
assert max_s == max(a)
assert q2 == 36.5
assert q4 <= q2 + SD
assert q0 >= q2 - SD
assert all(n in outliers for n in [6, 12, 16, 53, 54, 61, 75])
b = [5] # test for posible zero division
(min_s, q0, q1, q2, q3, q4, max_s), outliers = Box._box_points(
b, mode='stdev')
assert min_s == q0 == q1 == q2 == q3 == q4 == max_s == b[0]
assert outliers == []
def test_simple_box(): def test_simple_box():
box = ghostedBox() box = ghostedBox()
box.add('test1', [-1, 2, 3, 3.1, 3.2, 4, 5]) box.add('test1', [-1, 2, 3, 3.1, 3.2, 4, 5])

Loading…
Cancel
Save