mirror of https://github.com/gogits/gogs.git
279 lines
6.2 KiB
279 lines
6.2 KiB
package quotedprintable |
|
|
|
import ( |
|
"bytes" |
|
"encoding/base64" |
|
"errors" |
|
"fmt" |
|
"io" |
|
"strings" |
|
"unicode" |
|
"unicode/utf8" |
|
) |
|
|
|
// A WordEncoder is a RFC 2047 encoded-word encoder. |
|
type WordEncoder byte |
|
|
|
const ( |
|
// BEncoding represents Base64 encoding scheme as defined by RFC 2045. |
|
BEncoding = WordEncoder('b') |
|
// QEncoding represents the Q-encoding scheme as defined by RFC 2047. |
|
QEncoding = WordEncoder('q') |
|
) |
|
|
|
var ( |
|
errInvalidWord = errors.New("mime: invalid RFC 2047 encoded-word") |
|
) |
|
|
|
// Encode returns the encoded-word form of s. If s is ASCII without special |
|
// characters, it is returned unchanged. The provided charset is the IANA |
|
// charset name of s. It is case insensitive. |
|
func (e WordEncoder) Encode(charset, s string) string { |
|
if !needsEncoding(s) { |
|
return s |
|
} |
|
return e.encodeWord(charset, s) |
|
} |
|
|
|
func needsEncoding(s string) bool { |
|
for _, b := range s { |
|
if (b < ' ' || b > '~') && b != '\t' { |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
// encodeWord encodes a string into an encoded-word. |
|
func (e WordEncoder) encodeWord(charset, s string) string { |
|
buf := getBuffer() |
|
defer putBuffer(buf) |
|
|
|
buf.WriteString("=?") |
|
buf.WriteString(charset) |
|
buf.WriteByte('?') |
|
buf.WriteByte(byte(e)) |
|
buf.WriteByte('?') |
|
|
|
if e == BEncoding { |
|
w := base64.NewEncoder(base64.StdEncoding, buf) |
|
io.WriteString(w, s) |
|
w.Close() |
|
} else { |
|
enc := make([]byte, 3) |
|
for i := 0; i < len(s); i++ { |
|
b := s[i] |
|
switch { |
|
case b == ' ': |
|
buf.WriteByte('_') |
|
case b <= '~' && b >= '!' && b != '=' && b != '?' && b != '_': |
|
buf.WriteByte(b) |
|
default: |
|
enc[0] = '=' |
|
enc[1] = upperhex[b>>4] |
|
enc[2] = upperhex[b&0x0f] |
|
buf.Write(enc) |
|
} |
|
} |
|
} |
|
buf.WriteString("?=") |
|
return buf.String() |
|
} |
|
|
|
const upperhex = "0123456789ABCDEF" |
|
|
|
// A WordDecoder decodes MIME headers containing RFC 2047 encoded-words. |
|
type WordDecoder struct { |
|
// CharsetReader, if non-nil, defines a function to generate |
|
// charset-conversion readers, converting from the provided |
|
// charset into UTF-8. |
|
// Charsets are always lower-case. utf-8, iso-8859-1 and us-ascii charsets |
|
// are handled by default. |
|
// One of the the CharsetReader's result values must be non-nil. |
|
CharsetReader func(charset string, input io.Reader) (io.Reader, error) |
|
} |
|
|
|
// Decode decodes an encoded-word. If word is not a valid RFC 2047 encoded-word, |
|
// word is returned unchanged. |
|
func (d *WordDecoder) Decode(word string) (string, error) { |
|
fields := strings.Split(word, "?") // TODO: remove allocation? |
|
if len(fields) != 5 || fields[0] != "=" || fields[4] != "=" || len(fields[2]) != 1 { |
|
return "", errInvalidWord |
|
} |
|
|
|
content, err := decode(fields[2][0], fields[3]) |
|
if err != nil { |
|
return "", err |
|
} |
|
|
|
buf := getBuffer() |
|
defer putBuffer(buf) |
|
|
|
if err := d.convert(buf, fields[1], content); err != nil { |
|
return "", err |
|
} |
|
|
|
return buf.String(), nil |
|
} |
|
|
|
// DecodeHeader decodes all encoded-words of the given string. It returns an |
|
// error if and only if CharsetReader of d returns an error. |
|
func (d *WordDecoder) DecodeHeader(header string) (string, error) { |
|
// If there is no encoded-word, returns before creating a buffer. |
|
i := strings.Index(header, "=?") |
|
if i == -1 { |
|
return header, nil |
|
} |
|
|
|
buf := getBuffer() |
|
defer putBuffer(buf) |
|
|
|
buf.WriteString(header[:i]) |
|
header = header[i:] |
|
|
|
betweenWords := false |
|
for { |
|
start := strings.Index(header, "=?") |
|
if start == -1 { |
|
break |
|
} |
|
cur := start + len("=?") |
|
|
|
i := strings.Index(header[cur:], "?") |
|
if i == -1 { |
|
break |
|
} |
|
charset := header[cur : cur+i] |
|
cur += i + len("?") |
|
|
|
if len(header) < cur+len("Q??=") { |
|
break |
|
} |
|
encoding := header[cur] |
|
cur++ |
|
|
|
if header[cur] != '?' { |
|
break |
|
} |
|
cur++ |
|
|
|
j := strings.Index(header[cur:], "?=") |
|
if j == -1 { |
|
break |
|
} |
|
text := header[cur : cur+j] |
|
end := cur + j + len("?=") |
|
|
|
content, err := decode(encoding, text) |
|
if err != nil { |
|
betweenWords = false |
|
buf.WriteString(header[:start+2]) |
|
header = header[start+2:] |
|
continue |
|
} |
|
|
|
// Write characters before the encoded-word. White-space and newline |
|
// characters separating two encoded-words must be deleted. |
|
if start > 0 && (!betweenWords || hasNonWhitespace(header[:start])) { |
|
buf.WriteString(header[:start]) |
|
} |
|
|
|
if err := d.convert(buf, charset, content); err != nil { |
|
return "", err |
|
} |
|
|
|
header = header[end:] |
|
betweenWords = true |
|
} |
|
|
|
if len(header) > 0 { |
|
buf.WriteString(header) |
|
} |
|
|
|
return buf.String(), nil |
|
} |
|
|
|
func decode(encoding byte, text string) ([]byte, error) { |
|
switch encoding { |
|
case 'B', 'b': |
|
return base64.StdEncoding.DecodeString(text) |
|
case 'Q', 'q': |
|
return qDecode(text) |
|
} |
|
return nil, errInvalidWord |
|
} |
|
|
|
func (d *WordDecoder) convert(buf *bytes.Buffer, charset string, content []byte) error { |
|
switch { |
|
case strings.EqualFold("utf-8", charset): |
|
buf.Write(content) |
|
case strings.EqualFold("iso-8859-1", charset): |
|
for _, c := range content { |
|
buf.WriteRune(rune(c)) |
|
} |
|
case strings.EqualFold("us-ascii", charset): |
|
for _, c := range content { |
|
if c >= utf8.RuneSelf { |
|
buf.WriteRune(unicode.ReplacementChar) |
|
} else { |
|
buf.WriteByte(c) |
|
} |
|
} |
|
default: |
|
if d.CharsetReader == nil { |
|
return fmt.Errorf("mime: unhandled charset %q", charset) |
|
} |
|
r, err := d.CharsetReader(strings.ToLower(charset), bytes.NewReader(content)) |
|
if err != nil { |
|
return err |
|
} |
|
if _, err = buf.ReadFrom(r); err != nil { |
|
return err |
|
} |
|
} |
|
return nil |
|
} |
|
|
|
// hasNonWhitespace reports whether s (assumed to be ASCII) contains at least |
|
// one byte of non-whitespace. |
|
func hasNonWhitespace(s string) bool { |
|
for _, b := range s { |
|
switch b { |
|
// Encoded-words can only be separated by linear white spaces which does |
|
// not include vertical tabs (\v). |
|
case ' ', '\t', '\n', '\r': |
|
default: |
|
return true |
|
} |
|
} |
|
return false |
|
} |
|
|
|
// qDecode decodes a Q encoded string. |
|
func qDecode(s string) ([]byte, error) { |
|
dec := make([]byte, len(s)) |
|
n := 0 |
|
for i := 0; i < len(s); i++ { |
|
switch c := s[i]; { |
|
case c == '_': |
|
dec[n] = ' ' |
|
case c == '=': |
|
if i+2 >= len(s) { |
|
return nil, errInvalidWord |
|
} |
|
b, err := readHexByte(s[i+1], s[i+2]) |
|
if err != nil { |
|
return nil, err |
|
} |
|
dec[n] = b |
|
i += 2 |
|
case (c <= '~' && c >= ' ') || c == '\n' || c == '\r' || c == '\t': |
|
dec[n] = c |
|
default: |
|
return nil, errInvalidWord |
|
} |
|
n++ |
|
} |
|
|
|
return dec[:n], nil |
|
}
|
|
|