Browse Source

Confidence intervals (thanks @chartique). Fix #292. Use metadata to specify CI. Missing auto viewport. (Computing ymin ymax from ci)

pull/293/head
Florian Mounier 9 years ago
parent
commit
5996d0569c
  1. 54
      demo/moulinrouge/tests.py
  2. 2
      docs/changelog.rst
  3. 6
      pygal/css/style.css
  4. 11
      pygal/graph/bar.py
  5. 19
      pygal/graph/graph.py
  6. 5
      pygal/graph/line.py
  7. 1
      pygal/graph/stackedbar.py
  8. 72
      pygal/stats.py
  9. 13
      pygal/style.py
  10. 24
      pygal/svg.py
  11. 1
      pygal/util.py

54
demo/moulinrouge/tests.py

@ -24,6 +24,7 @@ except ImportError:
from flask import abort from flask import abort
from pygal.style import styles, Style, RotateStyle from pygal.style import styles, Style, RotateStyle
from pygal.colors import rotate from pygal.colors import rotate
from pygal import stats
from pygal.graph.horizontal import HorizontalGraph from pygal.graph.horizontal import HorizontalGraph
from random import randint, choice from random import randint, choice
from datetime import datetime, date from datetime import datetime, date
@ -1023,6 +1024,59 @@ def get_test_routes(app):
chart.interpolate = 'cubic' chart.interpolate = 'cubic'
return chart.render_response() return chart.render_response()
@app.route('/test/erfinv/approx')
def test_erfinv():
from scipy import stats as sstats
chart = Line(show_dots=False)
chart.add('scipy', [
sstats.norm.ppf(x/1000) for x in range(1, 999)])
chart.add('approx', [stats.ppf(x/1000) for x in range(1, 999)])
chart.add('scipy', [
sstats.norm.ppf(x/1000) for x in range(1, 999)])
# chart.add('approx', [special.erfinv(x/1000) - erfinv(x/1000) for x in range(-999, 1000)])
return chart.render_response()
@app.route('/test/ci/<chart>')
def test_ci_for(chart):
chart = CHARTS_BY_NAME[chart](
confidence_interval_proportion=True,
style=styles['default'](
value_font_family='googlefont:Raleway',
value_colors=(None, None, 'blue', 'red', 'green'),
ci_colors=(None, 'magenta')
))
chart.add('Series 1', [
{'value': 127.3, 'ci': {
'type': 'continuous', 'sample_size': 3534, 'stddev': 19,
'confidence': .99}},
{'value': 127.3, 'ci': {
'type': 'continuous', 'sample_size': 3534, 'stddev': 19}},
{'value': 127.3, 'ci': {
'type': 'continuous', 'sample_size': 3534, 'stddev': 19,
'confidence': .90}},
{'value': 127.3, 'ci': {
'type': 'continuous', 'sample_size': 3534, 'stddev': 19,
'confidence': .75}},
# {'value': 73, 'ci': {'sample_size': 200}},
# {'value': 54, 'ci': {'type': 'dichotomous', 'sample_size': 250}},
# {'value': 67, 'ci': {'sample_size': 100}},
# {'value': 61, 'ci': {'sample_size': 750}}
])
chart.add('Series 2', [
{'value': 34.5, 'ci': {
'type': 'dichotomous', 'sample_size': 3532}},
])
chart.add('Series 3', [
{'value': 100, 'ci': {'low': 50, 'high': 150}},
{'value': 100, 'ci': {'low': 75, 'high': 175}},
{'value': 50, 'ci': {'low': 50, 'high': 100}},
{'value': 125, 'ci': {'low': 120, 'high': 130}},
])
chart.range = (30, 200)
# chart.range = (32, 37)
return chart.render_response()
return list(sorted(filter( return list(sorted(filter(
lambda x: x.startswith('test') and not x.endswith('_for'), locals())) lambda x: x.startswith('test') and not x.endswith('_for'), locals()))
) + list(sorted(filter( ) + list(sorted(filter(

2
docs/changelog.rst

@ -6,6 +6,8 @@ Changelog
====== ======
* Bar print value positioning with `print_values_position`. Can be `top`, `center` or `bottom` (thanks @chartique #291) * Bar print value positioning with `print_values_position`. Can be `top`, `center` or `bottom` (thanks @chartique #291)
* Confidence intervals (thanks @chartique #292)
2.0.12 2.0.12
====== ======

6
pygal/css/style.css

@ -94,6 +94,10 @@
fill-opacity: {{ style.opacity }}; fill-opacity: {{ style.opacity }};
} }
{{ id }}.ci {
stroke: {{ style.foreground }};
}
{{ id }}.reactive.active, {{ id }}.reactive.active,
{{ id }}.active .reactive { {{ id }}.active .reactive {
fill-opacity: {{ style.opacity_hover }}; fill-opacity: {{ style.opacity_hover }};
@ -151,5 +155,3 @@
{{ colors }} {{ colors }}
{{ strokes }} {{ strokes }}

11
pygal/graph/bar.py

@ -116,11 +116,18 @@ class Bar(Graph):
self.svg.node(bars, class_='bar'), self.svg.node(bars, class_='bar'),
metadata) metadata)
bounds = self._bar( x_, y_, width, height = self._bar(
serie, bar, x, y, i, self.zero, secondary=rescale) serie, bar, x, y, i, self.zero, secondary=rescale)
print(y_)
self._confidence_interval(
serie_node['overlay'], x_ + width / 2, y_, serie.values[i],
metadata)
self._tooltip_and_print_values( self._tooltip_and_print_values(
serie_node, serie, bar, i, val, metadata, *bounds) serie_node, serie, bar, i, val, metadata,
x_, y_, width, height)
def _compute(self): def _compute(self):
"""Compute y min and max and y scale and set labels""" """Compute y min and max and y scale and set labels"""

19
pygal/graph/graph.py

@ -27,6 +27,7 @@ from math import ceil, cos, sin, sqrt
from pygal._compat import is_list_like, is_str, to_str from pygal._compat import is_list_like, is_str, to_str
from pygal.graph.public import PublicApi from pygal.graph.public import PublicApi
from pygal.interpolate import INTERPOLATIONS from pygal.interpolate import INTERPOLATIONS
from pygal import stats
from pygal.util import ( from pygal.util import (
cached_property, compute_scale, cut, decorate, cached_property, compute_scale, cut, decorate,
get_text_box, get_texts_box, humanize, majorize, rad, reverse_text_len, get_text_box, get_texts_box, humanize, majorize, rad, reverse_text_len,
@ -684,7 +685,6 @@ class Graph(PublicApi):
# Inner margin # Inner margin
if self.print_values_position == 'top': if self.print_values_position == 'top':
gw = self.width - self.margin_box.x
gh = self.height - self.margin_box.y gh = self.height - self.margin_box.y
alpha = 1.1 * (self.style.value_font_size / gh) * self._box.height alpha = 1.1 * (self.style.value_font_size / gh) * self._box.height
if self._max > 0: if self._max > 0:
@ -692,6 +692,23 @@ class Graph(PublicApi):
if self._min < 0: if self._min < 0:
self._box.ymin -= alpha self._box.ymin -= alpha
def _confidence_interval(self, node, x, y, value, metadata):
if not metadata or 'ci' not in metadata:
return
ci = metadata['ci']
ci['point_estimate'] = value
low, high = getattr(
stats,
'confidence_interval_%s' % ci.get('type', 'manual')
)(**ci)
self.svg.confidence_interval(
node, x,
# Respect some charts y modifications (pyramid, stackbar)
y + (self.view.y(low) - self.view.y(value)),
y + (self.view.y(high) - self.view.y(value)))
@cached_property @cached_property
def _legends(self): def _legends(self):
"""Getter for series title""" """Getter for series title"""

5
pygal/graph/line.py

@ -111,10 +111,15 @@ class Line(Graph):
if y > self.view.height / 2: if y > self.view.height / 2:
classes.append('top') classes.append('top')
classes = ' '.join(classes) classes = ' '.join(classes)
self._confidence_interval(
serie_node['overlay'], x, y, serie.values[i], metadata)
dots = decorate( dots = decorate(
self.svg, self.svg,
self.svg.node(serie_node['overlay'], class_="dots"), self.svg.node(serie_node['overlay'], class_="dots"),
metadata) metadata)
val = self._get_value(serie.points, i) val = self._get_value(serie.points, i)
alter(self.svg.transposable_node( alter(self.svg.transposable_node(
dots, 'circle', cx=x, cy=y, r=serie.dots_size, dots, 'circle', cx=x, cy=y, r=serie.dots_size,

1
pygal/graph/stackedbar.py

@ -25,7 +25,6 @@ from __future__ import division
from pygal.adapters import none_to_zero from pygal.adapters import none_to_zero
from pygal.graph.bar import Bar from pygal.graph.bar import Bar
from pygal.util import ident, swap
class StackedBar(Bar): class StackedBar(Bar):

72
pygal/stats.py

@ -0,0 +1,72 @@
from math import log, sqrt, pi
try:
from scipy import stats
except ImportError:
stats = None
def erfinv(x, a=.147):
"""Approximation of the inverse error function
https://en.wikipedia.org/wiki/Error_function
#Approximation_with_elementary_functions
"""
lnx = log(1 - x * x)
part1 = (2 / (a * pi) + lnx / 2)
part2 = lnx / a
sgn = 1 if x > 0 else -1
return sgn * sqrt(sqrt(part1 * part1 - part2) - part1)
def norm_ppf(x):
if not 0 < x < 1:
raise ValueError("Can't compute the percentage point for value %d" % x)
return sqrt(2) * erfinv(2 * x - 1)
def ppf(x, n):
if stats:
if n < 30:
return stats.t.ppf(x, n)
return stats.norm.ppf(x)
else:
if n < 30:
# TODO: implement power series:
# http://eprints.maths.ox.ac.uk/184/1/tdist.pdf
raise ImportError(
'You must have scipy installed to use t-student '
'when sample_size is below 30')
return norm_ppf(x)
# According to http://sphweb.bumc.bu.edu/otlt/MPH-Modules/BS/
# BS704_Confidence_Intervals/BS704_Confidence_Intervals_print.html
def confidence_interval_continuous(
point_estimate, stddev, sample_size, confidence=.95, **kwargs):
"""Continuous confidence interval from sample size and standard error"""
alpha = ppf((confidence + 1) / 2, sample_size - 1)
margin = stddev / sqrt(sample_size)
return (point_estimate - alpha * margin, point_estimate + alpha * margin)
def confidence_interval_dichotomous(
point_estimate, sample_size, confidence=.95, bias=False,
percentage=True, **kwargs):
"""Dichotomous confidence interval from sample size and maybe a bias"""
alpha = ppf((confidence + 1) / 2, sample_size - 1)
p = point_estimate
if percentage:
p /= 100
margin = sqrt(p * (1 - p) / sample_size)
if bias:
margin += .5 / sample_size
if percentage:
margin *= 100
return (point_estimate - alpha * margin, point_estimate + alpha * margin)
def confidence_interval_manual(point_estimate, low, high):
return (low, high)

13
pygal/style.py

@ -88,6 +88,7 @@ class Style(object):
) )
value_colors = () value_colors = ()
ci_colors = ()
def __init__(self, **kwargs): def __init__(self, **kwargs):
"""Create the style""" """Create the style"""
@ -124,6 +125,15 @@ class Style(object):
' fill: {1};\n' ' fill: {1};\n'
'}}\n') % (prefix,)).format(*tupl) '}}\n') % (prefix,)).format(*tupl)
def ci_color(tupl):
"""Make a value color css"""
if not tupl[1]:
return ''
return ((
'%s .color-{0} .ci {{\n'
' stroke: {1};\n'
'}}\n') % (prefix,)).format(*tupl)
if len(self.colors) < len_: if len(self.colors) < len_:
missing = len_ - len(self.colors) missing = len_ - len(self.colors)
cycles = 1 + missing // len(self.colors) cycles = 1 + missing // len(self.colors)
@ -150,7 +160,8 @@ class Style(object):
return '\n'.join(chain( return '\n'.join(chain(
map(color, enumerate(colors)), map(color, enumerate(colors)),
map(value_color, enumerate(value_colors)))) map(value_color, enumerate(value_colors)),
map(ci_color, enumerate(self.ci_colors))))
def to_dict(self): def to_dict(self):
"""Convert instance to a serializable mapping.""" """Convert instance to a serializable mapping."""

24
pygal/svg.py

@ -232,7 +232,7 @@ class Svg(object):
def line(self, node, coords, close=False, **kwargs): def line(self, node, coords, close=False, **kwargs):
"""Draw a svg line""" """Draw a svg line"""
line_len = len(coords) line_len = len(coords)
if line_len < 2: if len([c for c in coords if c[1] is not None]) < 2:
return return
root = 'M%s L%s Z' if close else 'M%s L%s' root = 'M%s L%s Z' if close else 'M%s L%s'
origin_index = 0 origin_index = 0
@ -296,6 +296,28 @@ class Svg(object):
self.graph._static_value(serie_node, val, x, y, metadata) self.graph._static_value(serie_node, val, x, y, metadata)
return rv return rv
def confidence_interval(self, node, x, low, high, width=7):
if self.graph.horizontal:
coord_format = lambda xy: '%f %f' % (xy[1], xy[0])
else:
coord_format = lambda xy: '%f %f' % xy
shr = lambda xy: (xy[0] + width, xy[1])
shl = lambda xy: (xy[0] - width, xy[1])
top = (x, high)
bottom = (x, low)
ci = self.node(node, class_="ci")
self.node(
ci, 'path', d="M%s L%s M%s L%s M%s L%s L%s M%s L%s" % tuple(
map(coord_format, (
top, shr(top), top, shl(top), top,
bottom, shr(bottom), bottom, shl(bottom)
))
), class_='nofill'
)
def pre_render(self): def pre_render(self):
"""Last things to do before rendering""" """Last things to do before rendering"""
self.add_styles() self.add_styles()

1
pygal/util.py

@ -250,7 +250,6 @@ def decorate(svg, node, metadata):
if 'label' in metadata: if 'label' in metadata:
svg.node(node, 'desc', class_='label').text = to_unicode( svg.node(node, 'desc', class_='label').text = to_unicode(
metadata['label']) metadata['label'])
return node return node

Loading…
Cancel
Save