Browse Source

added 'tukey' Box plot mode and outliers plotting

Tukey box plot (introduced by John W. Tukey) takes for whiskers the lowest
datum still within 1.5 IQR of the lower quartile, and the highest datum
still within 1.5 IQR of the upper quartile.
(https://en.wikipedia.org/wiki/Box_plot)
pull/226/head
Jan Richter 10 years ago
parent
commit
8e7efa53bd
  1. 2
      pygal/config.py
  2. 66
      pygal/graph/box.py

2
pygal/config.py

@ -344,7 +344,7 @@ class Config(CommonConfig):
mode = Key(
None, str, "Value", "Sets the mode to be used. "
"(Currently only supported on box plot)",
"May be %s" % ' or '.join(["1.5IQR", "extremes"]))
"May be %s" % ' or '.join(["1.5IQR", "extremes", "tukey"]))
order_min = Key(
None, int, "Value", "Minimum order of scale, defaults to None")

66
pygal/graph/box.py

@ -24,6 +24,7 @@ from __future__ import division
from pygal.graph.graph import Graph
from pygal.util import compute_scale, decorate
from pygal._compat import is_list_like
from bisect import bisect_left, bisect_right
class Box(Graph):
@ -48,9 +49,13 @@ class Box(Graph):
def format_maybe_quartile(x):
if is_list_like(x):
if self.mode == "extremes":
return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' % tuple(map(sup, x))
return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' \
% tuple(map(sup, x[1:6]))
elif self.mode == "tukey":
return 'Min: %s Lower Whisker: %s Q1: %s Q2: %s Q3: %s '\
'Upper Whisker: %s Max: %s' % tuple(map(sup, x))
else:
return 'Q1: %s Q2: %s Q3: %s' % tuple(map(sup, x[1:4]))
return 'Q1: %s Q2: %s Q3: %s' % tuple(map(sup, x[2:5]))
else:
return sup(x)
return format_maybe_quartile
@ -61,7 +66,9 @@ class Box(Graph):
within the rendering process
"""
for serie in self.series:
serie.values = self._box_points(serie.values, self.mode)
serie.values, serie.outliers = \
self._box_points(serie.values, self.mode)
if self._min:
self._box.ymin = min(self._min, self.zero)
@ -92,7 +99,7 @@ class Box(Graph):
@property
def _len(self):
"""Len is always 5 here"""
return 5
return 7
def _boxf(self, serie):
"""
@ -112,11 +119,12 @@ class Box(Graph):
metadata)
val = self._format(serie.values)
x_center, y_center = self._draw_box(box, serie.values, serie.index)
x_center, y_center = self._draw_box(box, serie.values[1:6],
serie.outliers, serie.index)
self._tooltip_data(box, val, x_center, y_center, classes="centered")
self._static_value(serie_node, val, x_center, y_center)
def _draw_box(self, parent_node, quartiles, box_index):
def _draw_box(self, parent_node, quartiles, outliers, box_index):
"""
Return the center of a bounding box defined by a box plot.
Draws a box plot on self.svg.
@ -164,6 +172,17 @@ class Box(Graph):
width=width,
class_='subtle-fill reactive tooltip-trigger')
# draw outliers
for o in outliers:
self.svg.node(
parent_node,
tag='circle',
cx=left_edge+width/2,
cy=self.view.y(o),
r=3,
class_='subtle-fill reactive tooltip-trigger')
return (left_edge + width / 2, self.view.y(
sum(quartiles) / len(quartiles)))
@ -171,11 +190,13 @@ class Box(Graph):
def _box_points(values, mode='1.5IQR'):
"""
Default mode: (mode='1.5IQR' or unset)
Return a 5-tuple of Q1 - 1.5 * IQR, Q1, Median, Q3,
and Q3 + 1.5 * IQR for a list of numeric values.
Return a 7-tuple of min, Q1 - 1.5 * IQR, Q1, Median, Q3,
Q3 + 1.5 * IQR and max for a list of numeric values.
Extremes mode: (mode='extremes')
Return a 5-tuple of minimum, Q1, Median, Q3,
and maximum for a list of numeric values.
Return a 7-tuple of 2x minimum, Q1, Median, Q3,
and 2x maximum for a list of numeric values.
Outliers (Tukey) mode: (mode='tukey')
Return a 7-tuple of min, q[0..4], max and a list of outliers
The iterator values may include None values.
@ -192,10 +213,11 @@ class Box(Graph):
return seq[n // 2]
# sort the copy in case the originals must stay in original order
outliers = []
s = sorted([x for x in values if x is not None])
n = len(s)
if not n:
return 0, 0, 0, 0, 0
return (0, 0, 0, 0, 0, 0, 0), []
else:
q2 = median(s)
# See 'Method 3' in http://en.wikipedia.org/wiki/Quartile
@ -209,17 +231,31 @@ class Box(Graph):
elif n % 4 == 1: # n is of form 4n + 1 where n >= 1
m = (n - 1) // 4
q1 = 0.25 * s[m-1] + 0.75 * s[m]
q3 = 0.75 * s[3*m] + 0.25 * s[3*m + 1]
q3 = 0.75 * s[3*m] + 0.25 * s[3*m+1]
else: # n is of form 4n + 3 where n >= 1
m = (n - 3) // 4
q1 = 0.75 * s[m] + 0.25 * s[m+1]
q3 = 0.25 * s[3*m+1] + 0.75 * s[3*m+2]
iqr = q3 - q1
min_s = s[0]
max_s = s[-1]
if mode == 'extremes':
q0 = min(s)
q4 = max(s)
q0 = min_s
q4 = max_s
elif mode == 'tukey':
# the lowest datum still within 1.5 IQR of the lower quartile,
# and the highest datum still within 1.5 IQR of the upper
# quartile [Tukey box plot, Wikipedia ]
b0 = bisect_left(s, q1 - 1.5 * iqr)
b4 = bisect_right(s, q3 + 1.5 * iqr)
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
#print "Q: [%s,%s,%s,%s,%s] O: %s" \
# % (q0, q1, q2, q3, q4, outliers)
else:
q0 = q1 - 1.5 * iqr
q4 = q3 + 1.5 * iqr
return q0, q1, q2, q3, q4
return (min_s, q0, q1, q2, q3, q4, max_s), outliers

Loading…
Cancel
Save