Browse Source

added 'tukey' Box plot mode and outliers plotting

Tukey box plot (introduced by John W. Tukey) takes for whiskers the lowest
datum still within 1.5 IQR of the lower quartile, and the highest datum
still within 1.5 IQR of the upper quartile.
(https://en.wikipedia.org/wiki/Box_plot)
pull/226/head
Jan Richter 10 years ago
parent
commit
8e7efa53bd
  1. 2
      pygal/config.py
  2. 66
      pygal/graph/box.py

2
pygal/config.py

@ -344,7 +344,7 @@ class Config(CommonConfig):
mode = Key( mode = Key(
None, str, "Value", "Sets the mode to be used. " None, str, "Value", "Sets the mode to be used. "
"(Currently only supported on box plot)", "(Currently only supported on box plot)",
"May be %s" % ' or '.join(["1.5IQR", "extremes"])) "May be %s" % ' or '.join(["1.5IQR", "extremes", "tukey"]))
order_min = Key( order_min = Key(
None, int, "Value", "Minimum order of scale, defaults to None") None, int, "Value", "Minimum order of scale, defaults to None")

66
pygal/graph/box.py

@ -24,6 +24,7 @@ from __future__ import division
from pygal.graph.graph import Graph from pygal.graph.graph import Graph
from pygal.util import compute_scale, decorate from pygal.util import compute_scale, decorate
from pygal._compat import is_list_like from pygal._compat import is_list_like
from bisect import bisect_left, bisect_right
class Box(Graph): class Box(Graph):
@ -48,9 +49,13 @@ class Box(Graph):
def format_maybe_quartile(x): def format_maybe_quartile(x):
if is_list_like(x): if is_list_like(x):
if self.mode == "extremes": if self.mode == "extremes":
return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' % tuple(map(sup, x)) return 'Min: %s Q1: %s Q2: %s Q3: %s Max: %s' \
% tuple(map(sup, x[1:6]))
elif self.mode == "tukey":
return 'Min: %s Lower Whisker: %s Q1: %s Q2: %s Q3: %s '\
'Upper Whisker: %s Max: %s' % tuple(map(sup, x))
else: else:
return 'Q1: %s Q2: %s Q3: %s' % tuple(map(sup, x[1:4])) return 'Q1: %s Q2: %s Q3: %s' % tuple(map(sup, x[2:5]))
else: else:
return sup(x) return sup(x)
return format_maybe_quartile return format_maybe_quartile
@ -61,7 +66,9 @@ class Box(Graph):
within the rendering process within the rendering process
""" """
for serie in self.series: for serie in self.series:
serie.values = self._box_points(serie.values, self.mode) serie.values, serie.outliers = \
self._box_points(serie.values, self.mode)
if self._min: if self._min:
self._box.ymin = min(self._min, self.zero) self._box.ymin = min(self._min, self.zero)
@ -92,7 +99,7 @@ class Box(Graph):
@property @property
def _len(self): def _len(self):
"""Len is always 5 here""" """Len is always 5 here"""
return 5 return 7
def _boxf(self, serie): def _boxf(self, serie):
""" """
@ -112,11 +119,12 @@ class Box(Graph):
metadata) metadata)
val = self._format(serie.values) val = self._format(serie.values)
x_center, y_center = self._draw_box(box, serie.values, serie.index) x_center, y_center = self._draw_box(box, serie.values[1:6],
serie.outliers, serie.index)
self._tooltip_data(box, val, x_center, y_center, classes="centered") self._tooltip_data(box, val, x_center, y_center, classes="centered")
self._static_value(serie_node, val, x_center, y_center) self._static_value(serie_node, val, x_center, y_center)
def _draw_box(self, parent_node, quartiles, box_index): def _draw_box(self, parent_node, quartiles, outliers, box_index):
""" """
Return the center of a bounding box defined by a box plot. Return the center of a bounding box defined by a box plot.
Draws a box plot on self.svg. Draws a box plot on self.svg.
@ -164,6 +172,17 @@ class Box(Graph):
width=width, width=width,
class_='subtle-fill reactive tooltip-trigger') class_='subtle-fill reactive tooltip-trigger')
# draw outliers
for o in outliers:
self.svg.node(
parent_node,
tag='circle',
cx=left_edge+width/2,
cy=self.view.y(o),
r=3,
class_='subtle-fill reactive tooltip-trigger')
return (left_edge + width / 2, self.view.y( return (left_edge + width / 2, self.view.y(
sum(quartiles) / len(quartiles))) sum(quartiles) / len(quartiles)))
@ -171,11 +190,13 @@ class Box(Graph):
def _box_points(values, mode='1.5IQR'): def _box_points(values, mode='1.5IQR'):
""" """
Default mode: (mode='1.5IQR' or unset) Default mode: (mode='1.5IQR' or unset)
Return a 5-tuple of Q1 - 1.5 * IQR, Q1, Median, Q3, Return a 7-tuple of min, Q1 - 1.5 * IQR, Q1, Median, Q3,
and Q3 + 1.5 * IQR for a list of numeric values. Q3 + 1.5 * IQR and max for a list of numeric values.
Extremes mode: (mode='extremes') Extremes mode: (mode='extremes')
Return a 5-tuple of minimum, Q1, Median, Q3, Return a 7-tuple of 2x minimum, Q1, Median, Q3,
and maximum for a list of numeric values. and 2x maximum for a list of numeric values.
Outliers (Tukey) mode: (mode='tukey')
Return a 7-tuple of min, q[0..4], max and a list of outliers
The iterator values may include None values. The iterator values may include None values.
@ -192,10 +213,11 @@ class Box(Graph):
return seq[n // 2] return seq[n // 2]
# sort the copy in case the originals must stay in original order # sort the copy in case the originals must stay in original order
outliers = []
s = sorted([x for x in values if x is not None]) s = sorted([x for x in values if x is not None])
n = len(s) n = len(s)
if not n: if not n:
return 0, 0, 0, 0, 0 return (0, 0, 0, 0, 0, 0, 0), []
else: else:
q2 = median(s) q2 = median(s)
# See 'Method 3' in http://en.wikipedia.org/wiki/Quartile # See 'Method 3' in http://en.wikipedia.org/wiki/Quartile
@ -209,17 +231,31 @@ class Box(Graph):
elif n % 4 == 1: # n is of form 4n + 1 where n >= 1 elif n % 4 == 1: # n is of form 4n + 1 where n >= 1
m = (n - 1) // 4 m = (n - 1) // 4
q1 = 0.25 * s[m-1] + 0.75 * s[m] q1 = 0.25 * s[m-1] + 0.75 * s[m]
q3 = 0.75 * s[3*m] + 0.25 * s[3*m + 1] q3 = 0.75 * s[3*m] + 0.25 * s[3*m+1]
else: # n is of form 4n + 3 where n >= 1 else: # n is of form 4n + 3 where n >= 1
m = (n - 3) // 4 m = (n - 3) // 4
q1 = 0.75 * s[m] + 0.25 * s[m+1] q1 = 0.75 * s[m] + 0.25 * s[m+1]
q3 = 0.25 * s[3*m+1] + 0.75 * s[3*m+2] q3 = 0.25 * s[3*m+1] + 0.75 * s[3*m+2]
iqr = q3 - q1 iqr = q3 - q1
min_s = s[0]
max_s = s[-1]
if mode == 'extremes': if mode == 'extremes':
q0 = min(s) q0 = min_s
q4 = max(s) q4 = max_s
elif mode == 'tukey':
# the lowest datum still within 1.5 IQR of the lower quartile,
# and the highest datum still within 1.5 IQR of the upper
# quartile [Tukey box plot, Wikipedia ]
b0 = bisect_left(s, q1 - 1.5 * iqr)
b4 = bisect_right(s, q3 + 1.5 * iqr)
q0 = s[b0]
q4 = s[b4-1]
outliers = s[:b0] + s[b4:]
#print "Q: [%s,%s,%s,%s,%s] O: %s" \
# % (q0, q1, q2, q3, q4, outliers)
else: else:
q0 = q1 - 1.5 * iqr q0 = q1 - 1.5 * iqr
q4 = q3 + 1.5 * iqr q4 = q3 + 1.5 * iqr
return q0, q1, q2, q3, q4 return (min_s, q0, q1, q2, q3, q4, max_s), outliers

Loading…
Cancel
Save