From 0049922f2e690a6d58f335ca9196c95f1de84370 Mon Sep 17 00:00:00 2001 From: Antonio Larrosa Date: Thu, 23 Mar 2017 17:30:48 +0100 Subject: [PATCH 1/6] Fix send_file to work with non-ascii filenames This commit implements https://tools.ietf.org/html/rfc2231#section-4 in order to support sending unicode characters. Tested on both Firefox and Chromium under Linux. This adds unidecode as a dependency, which might be relaxed by using .encode('latin-1', 'ignore') but wouldn't be as useful. Also, added a test for the correct headers to be added. Previously, using a filename parameter to send_file with unicode characters, it failed with the next error since HTTP headers don't allow non latin-1 characters. Error on request: Traceback (most recent call last): File "/usr/lib/python3.6/site-packages/werkzeug/serving.py", line 193, in run_wsgi execute(self.server.app) File "/usr/lib/python3.6/site-packages/werkzeug/serving.py", line 186, in execute write(b'') File "/usr/lib/python3.6/site-packages/werkzeug/serving.py", line 152, in write self.send_header(key, value) File "/usr/lib64/python3.6/http/server.py", line 509, in send_header ("%s: %s\r\n" % (keyword, value)).encode('latin-1', 'strict')) UnicodeEncodeError: 'latin-1' codec can't encode character '\uff0f' in position 58: ordinal not in range(256) Fixes #1286 --- flask/helpers.py | 6 +++++- setup.py | 1 + tests/test_helpers.py | 11 +++++++++++ tox.ini | 1 + 4 files changed, 18 insertions(+), 1 deletion(-) diff --git a/flask/helpers.py b/flask/helpers.py index 2f446327..b2ea2ce9 100644 --- a/flask/helpers.py +++ b/flask/helpers.py @@ -41,6 +41,7 @@ from .signals import message_flashed from .globals import session, _request_ctx_stack, _app_ctx_stack, \ current_app, request from ._compat import string_types, text_type +from unidecode import unidecode # sentinel @@ -534,8 +535,11 @@ def send_file(filename_or_fp, mimetype=None, as_attachment=False, if attachment_filename is None: raise TypeError('filename unavailable, required for ' 'sending as attachment') + filename_dict = { + 'filename': unidecode(attachment_filename), + 'filename*': "UTF-8''%s" % url_quote(attachment_filename)} headers.add('Content-Disposition', 'attachment', - filename=attachment_filename) + **filename_dict) if current_app.use_x_sendfile and filename: if file is not None: diff --git a/setup.py b/setup.py index 08995073..2ffe72ed 100644 --- a/setup.py +++ b/setup.py @@ -75,6 +75,7 @@ setup( 'Jinja2>=2.4', 'itsdangerous>=0.21', 'click>=2.0', + 'unidecode', ], classifiers=[ 'Development Status :: 4 - Beta', diff --git a/tests/test_helpers.py b/tests/test_helpers.py index fd448fb8..362b6cfa 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -560,6 +560,17 @@ class TestSendfile(object): assert options['filename'] == 'index.txt' rv.close() + def test_attachment_with_utf8_filename(self): + app = flask.Flask(__name__) + with app.test_request_context(): + with open(os.path.join(app.root_path, 'static/index.html')) as f: + rv = flask.send_file(f, as_attachment=True, + attachment_filename='Ñandú/pingüino.txt') + value, options = \ + parse_options_header(rv.headers['Content-Disposition']) + assert options == {'filename': 'Nandu/pinguino.txt', 'filename*': "UTF-8''%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt"} + rv.close() + def test_static_file(self): app = flask.Flask(__name__) # default cache timeout is 12 hours diff --git a/tox.ini b/tox.ini index 764b4030..3db7b91f 100644 --- a/tox.ini +++ b/tox.ini @@ -27,6 +27,7 @@ deps= devel: git+https://github.com/pallets/itsdangerous.git devel: git+https://github.com/jek/blinker.git simplejson: simplejson + unidecode [testenv:docs] deps = sphinx From 6ef45f30ab0e95d80ee2a29168f098e9037b9e0b Mon Sep 17 00:00:00 2001 From: Antonio Larrosa Date: Fri, 24 Mar 2017 20:05:01 +0100 Subject: [PATCH 2/6] Fix previous commits to work with python 2 and python 3 Also, parse_options_header seems to interpret filename* so we better test the actual value used in the headers (and since it's valid in any order, use a set to compare) --- flask/helpers.py | 2 +- tests/test_helpers.py | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/flask/helpers.py b/flask/helpers.py index b2ea2ce9..e4fb8c43 100644 --- a/flask/helpers.py +++ b/flask/helpers.py @@ -536,7 +536,7 @@ def send_file(filename_or_fp, mimetype=None, as_attachment=False, raise TypeError('filename unavailable, required for ' 'sending as attachment') filename_dict = { - 'filename': unidecode(attachment_filename), + 'filename': unidecode(text_type(attachment_filename)), 'filename*': "UTF-8''%s" % url_quote(attachment_filename)} headers.add('Content-Disposition', 'attachment', **filename_dict) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 362b6cfa..f7affb2c 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -565,10 +565,11 @@ class TestSendfile(object): with app.test_request_context(): with open(os.path.join(app.root_path, 'static/index.html')) as f: rv = flask.send_file(f, as_attachment=True, - attachment_filename='Ñandú/pingüino.txt') - value, options = \ - parse_options_header(rv.headers['Content-Disposition']) - assert options == {'filename': 'Nandu/pinguino.txt', 'filename*': "UTF-8''%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt"} + attachment_filename=u'Ñandú/pingüino.txt') + content_disposition = set(rv.headers['Content-Disposition'].split(';')) + assert content_disposition == set(['attachment', + ' filename="Nandu/pinguino.txt"', + " filename*=UTF-8''%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt"]) rv.close() def test_static_file(self): From bf023e7dc0bae78ff0ab14dc9f10a87e2b56f676 Mon Sep 17 00:00:00 2001 From: Antonio Larrosa Date: Thu, 30 Mar 2017 17:32:21 +0200 Subject: [PATCH 3/6] Remove unidecode dependency and use unicodedata instead I found a way to remove the unidecode dependency without sacrificing much by using unicodedata.normalize . --- flask/helpers.py | 6 ++++-- setup.py | 1 - tox.ini | 1 - 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/flask/helpers.py b/flask/helpers.py index e4fb8c43..aa8be315 100644 --- a/flask/helpers.py +++ b/flask/helpers.py @@ -14,6 +14,7 @@ import sys import pkgutil import posixpath import mimetypes +import unicodedata from time import time from zlib import adler32 from threading import RLock @@ -41,7 +42,6 @@ from .signals import message_flashed from .globals import session, _request_ctx_stack, _app_ctx_stack, \ current_app, request from ._compat import string_types, text_type -from unidecode import unidecode # sentinel @@ -536,7 +536,9 @@ def send_file(filename_or_fp, mimetype=None, as_attachment=False, raise TypeError('filename unavailable, required for ' 'sending as attachment') filename_dict = { - 'filename': unidecode(text_type(attachment_filename)), + 'filename': (unicodedata.normalize('NFKD', + text_type(attachment_filename)).encode('ascii', + 'ignore')), 'filename*': "UTF-8''%s" % url_quote(attachment_filename)} headers.add('Content-Disposition', 'attachment', **filename_dict) diff --git a/setup.py b/setup.py index 2ffe72ed..08995073 100644 --- a/setup.py +++ b/setup.py @@ -75,7 +75,6 @@ setup( 'Jinja2>=2.4', 'itsdangerous>=0.21', 'click>=2.0', - 'unidecode', ], classifiers=[ 'Development Status :: 4 - Beta', diff --git a/tox.ini b/tox.ini index 3db7b91f..764b4030 100644 --- a/tox.ini +++ b/tox.ini @@ -27,7 +27,6 @@ deps= devel: git+https://github.com/pallets/itsdangerous.git devel: git+https://github.com/jek/blinker.git simplejson: simplejson - unidecode [testenv:docs] deps = sphinx From d50a5db5ed07a14733b8cbfc0962b793209014dd Mon Sep 17 00:00:00 2001 From: Antonio Larrosa Date: Fri, 7 Apr 2017 20:34:52 +0200 Subject: [PATCH 4/6] Keep using only filename if it's valid ascii --- flask/helpers.py | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) diff --git a/flask/helpers.py b/flask/helpers.py index aa8be315..19d33420 100644 --- a/flask/helpers.py +++ b/flask/helpers.py @@ -14,7 +14,7 @@ import sys import pkgutil import posixpath import mimetypes -import unicodedata +from unicodedata import normalize from time import time from zlib import adler32 from threading import RLock @@ -535,13 +535,22 @@ def send_file(filename_or_fp, mimetype=None, as_attachment=False, if attachment_filename is None: raise TypeError('filename unavailable, required for ' 'sending as attachment') - filename_dict = { - 'filename': (unicodedata.normalize('NFKD', - text_type(attachment_filename)).encode('ascii', - 'ignore')), - 'filename*': "UTF-8''%s" % url_quote(attachment_filename)} + normalized = normalize('NFKD', text_type(attachment_filename)) + + try: + normalized.encode('ascii') + except UnicodeEncodeError: + filenames = { + 'filename': normalized.encode('ascii', 'ignore'), + 'filename*': "UTF-8''%s" % url_quote(attachment_filename), + } + else: + filenames = { + 'filename': attachment_filename, + } + headers.add('Content-Disposition', 'attachment', - **filename_dict) + **filenames) if current_app.use_x_sendfile and filename: if file is not None: From c1973016eacfb22fdec837394d8b89c0ca35ad15 Mon Sep 17 00:00:00 2001 From: David Lord Date: Fri, 7 Apr 2017 18:02:31 -0700 Subject: [PATCH 5/6] style cleanup break out header parts in test test for no filename* parameter for ascii header --- flask/helpers.py | 19 ++++++++++++------- tests/test_helpers.py | 25 ++++++++++++++++--------- 2 files changed, 28 insertions(+), 16 deletions(-) diff --git a/flask/helpers.py b/flask/helpers.py index 19d33420..420467ad 100644 --- a/flask/helpers.py +++ b/flask/helpers.py @@ -14,10 +14,10 @@ import sys import pkgutil import posixpath import mimetypes -from unicodedata import normalize from time import time from zlib import adler32 from threading import RLock +import unicodedata from werkzeug.routing import BuildError from functools import update_wrapper @@ -478,6 +478,11 @@ def send_file(filename_or_fp, mimetype=None, as_attachment=False, .. versionchanged:: 0.12 The `attachment_filename` is preferred over `filename` for MIME-type detection. + + .. versionchanged:: 0.13 + UTF-8 filenames, as specified in `RFC 2231`_, are supported. + + .. _RFC 2231: https://tools.ietf.org/html/rfc2231#section-4 :param filename_or_fp: the filename of the file to send in `latin-1`. This is relative to the :attr:`~Flask.root_path` @@ -535,7 +540,10 @@ def send_file(filename_or_fp, mimetype=None, as_attachment=False, if attachment_filename is None: raise TypeError('filename unavailable, required for ' 'sending as attachment') - normalized = normalize('NFKD', text_type(attachment_filename)) + + normalized = unicodedata.normalize( + 'NFKD', text_type(attachment_filename) + ) try: normalized.encode('ascii') @@ -545,12 +553,9 @@ def send_file(filename_or_fp, mimetype=None, as_attachment=False, 'filename*': "UTF-8''%s" % url_quote(attachment_filename), } else: - filenames = { - 'filename': attachment_filename, - } + filenames = {'filename': attachment_filename} - headers.add('Content-Disposition', 'attachment', - **filenames) + headers.add('Content-Disposition', 'attachment', **filenames) if current_app.use_x_sendfile and filename: if file is not None: diff --git a/tests/test_helpers.py b/tests/test_helpers.py index f7affb2c..1aeff065 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -540,10 +540,11 @@ class TestSendfile(object): value, options = \ parse_options_header(rv.headers['Content-Disposition']) assert value == 'attachment' + assert options['filename'] == 'index.html' + assert 'filename*' not in options rv.close() with app.test_request_context(): - assert options['filename'] == 'index.html' rv = flask.send_file('static/index.html', as_attachment=True) value, options = parse_options_header(rv.headers['Content-Disposition']) assert value == 'attachment' @@ -562,15 +563,21 @@ class TestSendfile(object): def test_attachment_with_utf8_filename(self): app = flask.Flask(__name__) + with app.test_request_context(): - with open(os.path.join(app.root_path, 'static/index.html')) as f: - rv = flask.send_file(f, as_attachment=True, - attachment_filename=u'Ñandú/pingüino.txt') - content_disposition = set(rv.headers['Content-Disposition'].split(';')) - assert content_disposition == set(['attachment', - ' filename="Nandu/pinguino.txt"', - " filename*=UTF-8''%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt"]) - rv.close() + rv = flask.send_file( + 'static/index.html', as_attachment=True, + attachment_filename=u'Ñandú/pingüino.txt' + ) + value, options = parse_options_header( + rv.headers['Content-Disposition'] + ) + rv.close() + + assert value == 'attachment' + assert sorted(options.keys()) == ('filename', 'filename*') + assert options['filename'] == 'Nandu/pinguino.txt' + assert options['filename*'] == 'UTF-8''%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt' def test_static_file(self): app = flask.Flask(__name__) From f790ab7177c9959b560d238f4be9c124d635e70a Mon Sep 17 00:00:00 2001 From: David Lord Date: Sat, 8 Apr 2017 10:33:06 -0700 Subject: [PATCH 6/6] need to test against raw header parsing prefers the last value parsed for the option --- tests/test_helpers.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/tests/test_helpers.py b/tests/test_helpers.py index 1aeff065..d93b443e 100644 --- a/tests/test_helpers.py +++ b/tests/test_helpers.py @@ -541,7 +541,7 @@ class TestSendfile(object): parse_options_header(rv.headers['Content-Disposition']) assert value == 'attachment' assert options['filename'] == 'index.html' - assert 'filename*' not in options + assert 'filename*' not in rv.headers['Content-Disposition'] rv.close() with app.test_request_context(): @@ -565,20 +565,15 @@ class TestSendfile(object): app = flask.Flask(__name__) with app.test_request_context(): - rv = flask.send_file( - 'static/index.html', as_attachment=True, - attachment_filename=u'Ñandú/pingüino.txt' - ) - value, options = parse_options_header( - rv.headers['Content-Disposition'] - ) + rv = flask.send_file('static/index.html', as_attachment=True, attachment_filename=u'Ñandú/pingüino.txt') + content_disposition = set(rv.headers['Content-Disposition'].split('; ')) + assert content_disposition == set(( + 'attachment', + 'filename="Nandu/pinguino.txt"', + "filename*=UTF-8''%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt" + )) rv.close() - assert value == 'attachment' - assert sorted(options.keys()) == ('filename', 'filename*') - assert options['filename'] == 'Nandu/pinguino.txt' - assert options['filename*'] == 'UTF-8''%C3%91and%C3%BA%EF%BC%8Fping%C3%BCino.txt' - def test_static_file(self): app = flask.Flask(__name__) # default cache timeout is 12 hours