mirror of https://github.com/gogits/gogs.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
648 lines
10 KiB
648 lines
10 KiB
// Copyright 2012 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
// +build ignore |
|
|
|
package main |
|
|
|
// This program generates table.go and table_test.go. |
|
// Invoke as |
|
// |
|
// go run gen.go |gofmt >table.go |
|
// go run gen.go -test |gofmt >table_test.go |
|
|
|
import ( |
|
"flag" |
|
"fmt" |
|
"math/rand" |
|
"os" |
|
"sort" |
|
"strings" |
|
) |
|
|
|
// identifier converts s to a Go exported identifier. |
|
// It converts "div" to "Div" and "accept-charset" to "AcceptCharset". |
|
func identifier(s string) string { |
|
b := make([]byte, 0, len(s)) |
|
cap := true |
|
for _, c := range s { |
|
if c == '-' { |
|
cap = true |
|
continue |
|
} |
|
if cap && 'a' <= c && c <= 'z' { |
|
c -= 'a' - 'A' |
|
} |
|
cap = false |
|
b = append(b, byte(c)) |
|
} |
|
return string(b) |
|
} |
|
|
|
var test = flag.Bool("test", false, "generate table_test.go") |
|
|
|
func main() { |
|
flag.Parse() |
|
|
|
var all []string |
|
all = append(all, elements...) |
|
all = append(all, attributes...) |
|
all = append(all, eventHandlers...) |
|
all = append(all, extra...) |
|
sort.Strings(all) |
|
|
|
if *test { |
|
fmt.Printf("// generated by go run gen.go -test; DO NOT EDIT\n\n") |
|
fmt.Printf("package atom\n\n") |
|
fmt.Printf("var testAtomList = []string{\n") |
|
for _, s := range all { |
|
fmt.Printf("\t%q,\n", s) |
|
} |
|
fmt.Printf("}\n") |
|
return |
|
} |
|
|
|
// uniq - lists have dups |
|
// compute max len too |
|
maxLen := 0 |
|
w := 0 |
|
for _, s := range all { |
|
if w == 0 || all[w-1] != s { |
|
if maxLen < len(s) { |
|
maxLen = len(s) |
|
} |
|
all[w] = s |
|
w++ |
|
} |
|
} |
|
all = all[:w] |
|
|
|
// Find hash that minimizes table size. |
|
var best *table |
|
for i := 0; i < 1000000; i++ { |
|
if best != nil && 1<<(best.k-1) < len(all) { |
|
break |
|
} |
|
h := rand.Uint32() |
|
for k := uint(0); k <= 16; k++ { |
|
if best != nil && k >= best.k { |
|
break |
|
} |
|
var t table |
|
if t.init(h, k, all) { |
|
best = &t |
|
break |
|
} |
|
} |
|
} |
|
if best == nil { |
|
fmt.Fprintf(os.Stderr, "failed to construct string table\n") |
|
os.Exit(1) |
|
} |
|
|
|
// Lay out strings, using overlaps when possible. |
|
layout := append([]string{}, all...) |
|
|
|
// Remove strings that are substrings of other strings |
|
for changed := true; changed; { |
|
changed = false |
|
for i, s := range layout { |
|
if s == "" { |
|
continue |
|
} |
|
for j, t := range layout { |
|
if i != j && t != "" && strings.Contains(s, t) { |
|
changed = true |
|
layout[j] = "" |
|
} |
|
} |
|
} |
|
} |
|
|
|
// Join strings where one suffix matches another prefix. |
|
for { |
|
// Find best i, j, k such that layout[i][len-k:] == layout[j][:k], |
|
// maximizing overlap length k. |
|
besti := -1 |
|
bestj := -1 |
|
bestk := 0 |
|
for i, s := range layout { |
|
if s == "" { |
|
continue |
|
} |
|
for j, t := range layout { |
|
if i == j { |
|
continue |
|
} |
|
for k := bestk + 1; k <= len(s) && k <= len(t); k++ { |
|
if s[len(s)-k:] == t[:k] { |
|
besti = i |
|
bestj = j |
|
bestk = k |
|
} |
|
} |
|
} |
|
} |
|
if bestk > 0 { |
|
layout[besti] += layout[bestj][bestk:] |
|
layout[bestj] = "" |
|
continue |
|
} |
|
break |
|
} |
|
|
|
text := strings.Join(layout, "") |
|
|
|
atom := map[string]uint32{} |
|
for _, s := range all { |
|
off := strings.Index(text, s) |
|
if off < 0 { |
|
panic("lost string " + s) |
|
} |
|
atom[s] = uint32(off<<8 | len(s)) |
|
} |
|
|
|
// Generate the Go code. |
|
fmt.Printf("// generated by go run gen.go; DO NOT EDIT\n\n") |
|
fmt.Printf("package atom\n\nconst (\n") |
|
for _, s := range all { |
|
fmt.Printf("\t%s Atom = %#x\n", identifier(s), atom[s]) |
|
} |
|
fmt.Printf(")\n\n") |
|
|
|
fmt.Printf("const hash0 = %#x\n\n", best.h0) |
|
fmt.Printf("const maxAtomLen = %d\n\n", maxLen) |
|
|
|
fmt.Printf("var table = [1<<%d]Atom{\n", best.k) |
|
for i, s := range best.tab { |
|
if s == "" { |
|
continue |
|
} |
|
fmt.Printf("\t%#x: %#x, // %s\n", i, atom[s], s) |
|
} |
|
fmt.Printf("}\n") |
|
datasize := (1 << best.k) * 4 |
|
|
|
fmt.Printf("const atomText =\n") |
|
textsize := len(text) |
|
for len(text) > 60 { |
|
fmt.Printf("\t%q +\n", text[:60]) |
|
text = text[60:] |
|
} |
|
fmt.Printf("\t%q\n\n", text) |
|
|
|
fmt.Fprintf(os.Stderr, "%d atoms; %d string bytes + %d tables = %d total data\n", len(all), textsize, datasize, textsize+datasize) |
|
} |
|
|
|
type byLen []string |
|
|
|
func (x byLen) Less(i, j int) bool { return len(x[i]) > len(x[j]) } |
|
func (x byLen) Swap(i, j int) { x[i], x[j] = x[j], x[i] } |
|
func (x byLen) Len() int { return len(x) } |
|
|
|
// fnv computes the FNV hash with an arbitrary starting value h. |
|
func fnv(h uint32, s string) uint32 { |
|
for i := 0; i < len(s); i++ { |
|
h ^= uint32(s[i]) |
|
h *= 16777619 |
|
} |
|
return h |
|
} |
|
|
|
// A table represents an attempt at constructing the lookup table. |
|
// The lookup table uses cuckoo hashing, meaning that each string |
|
// can be found in one of two positions. |
|
type table struct { |
|
h0 uint32 |
|
k uint |
|
mask uint32 |
|
tab []string |
|
} |
|
|
|
// hash returns the two hashes for s. |
|
func (t *table) hash(s string) (h1, h2 uint32) { |
|
h := fnv(t.h0, s) |
|
h1 = h & t.mask |
|
h2 = (h >> 16) & t.mask |
|
return |
|
} |
|
|
|
// init initializes the table with the given parameters. |
|
// h0 is the initial hash value, |
|
// k is the number of bits of hash value to use, and |
|
// x is the list of strings to store in the table. |
|
// init returns false if the table cannot be constructed. |
|
func (t *table) init(h0 uint32, k uint, x []string) bool { |
|
t.h0 = h0 |
|
t.k = k |
|
t.tab = make([]string, 1<<k) |
|
t.mask = 1<<k - 1 |
|
for _, s := range x { |
|
if !t.insert(s) { |
|
return false |
|
} |
|
} |
|
return true |
|
} |
|
|
|
// insert inserts s in the table. |
|
func (t *table) insert(s string) bool { |
|
h1, h2 := t.hash(s) |
|
if t.tab[h1] == "" { |
|
t.tab[h1] = s |
|
return true |
|
} |
|
if t.tab[h2] == "" { |
|
t.tab[h2] = s |
|
return true |
|
} |
|
if t.push(h1, 0) { |
|
t.tab[h1] = s |
|
return true |
|
} |
|
if t.push(h2, 0) { |
|
t.tab[h2] = s |
|
return true |
|
} |
|
return false |
|
} |
|
|
|
// push attempts to push aside the entry in slot i. |
|
func (t *table) push(i uint32, depth int) bool { |
|
if depth > len(t.tab) { |
|
return false |
|
} |
|
s := t.tab[i] |
|
h1, h2 := t.hash(s) |
|
j := h1 + h2 - i |
|
if t.tab[j] != "" && !t.push(j, depth+1) { |
|
return false |
|
} |
|
t.tab[j] = s |
|
return true |
|
} |
|
|
|
// The lists of element names and attribute keys were taken from |
|
// https://html.spec.whatwg.org/multipage/indices.html#index |
|
// as of the "HTML Living Standard - Last Updated 21 February 2015" version. |
|
|
|
var elements = []string{ |
|
"a", |
|
"abbr", |
|
"address", |
|
"area", |
|
"article", |
|
"aside", |
|
"audio", |
|
"b", |
|
"base", |
|
"bdi", |
|
"bdo", |
|
"blockquote", |
|
"body", |
|
"br", |
|
"button", |
|
"canvas", |
|
"caption", |
|
"cite", |
|
"code", |
|
"col", |
|
"colgroup", |
|
"command", |
|
"data", |
|
"datalist", |
|
"dd", |
|
"del", |
|
"details", |
|
"dfn", |
|
"dialog", |
|
"div", |
|
"dl", |
|
"dt", |
|
"em", |
|
"embed", |
|
"fieldset", |
|
"figcaption", |
|
"figure", |
|
"footer", |
|
"form", |
|
"h1", |
|
"h2", |
|
"h3", |
|
"h4", |
|
"h5", |
|
"h6", |
|
"head", |
|
"header", |
|
"hgroup", |
|
"hr", |
|
"html", |
|
"i", |
|
"iframe", |
|
"img", |
|
"input", |
|
"ins", |
|
"kbd", |
|
"keygen", |
|
"label", |
|
"legend", |
|
"li", |
|
"link", |
|
"map", |
|
"mark", |
|
"menu", |
|
"menuitem", |
|
"meta", |
|
"meter", |
|
"nav", |
|
"noscript", |
|
"object", |
|
"ol", |
|
"optgroup", |
|
"option", |
|
"output", |
|
"p", |
|
"param", |
|
"pre", |
|
"progress", |
|
"q", |
|
"rp", |
|
"rt", |
|
"ruby", |
|
"s", |
|
"samp", |
|
"script", |
|
"section", |
|
"select", |
|
"small", |
|
"source", |
|
"span", |
|
"strong", |
|
"style", |
|
"sub", |
|
"summary", |
|
"sup", |
|
"table", |
|
"tbody", |
|
"td", |
|
"template", |
|
"textarea", |
|
"tfoot", |
|
"th", |
|
"thead", |
|
"time", |
|
"title", |
|
"tr", |
|
"track", |
|
"u", |
|
"ul", |
|
"var", |
|
"video", |
|
"wbr", |
|
} |
|
|
|
// https://html.spec.whatwg.org/multipage/indices.html#attributes-3 |
|
|
|
var attributes = []string{ |
|
"abbr", |
|
"accept", |
|
"accept-charset", |
|
"accesskey", |
|
"action", |
|
"alt", |
|
"async", |
|
"autocomplete", |
|
"autofocus", |
|
"autoplay", |
|
"challenge", |
|
"charset", |
|
"checked", |
|
"cite", |
|
"class", |
|
"cols", |
|
"colspan", |
|
"command", |
|
"content", |
|
"contenteditable", |
|
"contextmenu", |
|
"controls", |
|
"coords", |
|
"crossorigin", |
|
"data", |
|
"datetime", |
|
"default", |
|
"defer", |
|
"dir", |
|
"dirname", |
|
"disabled", |
|
"download", |
|
"draggable", |
|
"dropzone", |
|
"enctype", |
|
"for", |
|
"form", |
|
"formaction", |
|
"formenctype", |
|
"formmethod", |
|
"formnovalidate", |
|
"formtarget", |
|
"headers", |
|
"height", |
|
"hidden", |
|
"high", |
|
"href", |
|
"hreflang", |
|
"http-equiv", |
|
"icon", |
|
"id", |
|
"inputmode", |
|
"ismap", |
|
"itemid", |
|
"itemprop", |
|
"itemref", |
|
"itemscope", |
|
"itemtype", |
|
"keytype", |
|
"kind", |
|
"label", |
|
"lang", |
|
"list", |
|
"loop", |
|
"low", |
|
"manifest", |
|
"max", |
|
"maxlength", |
|
"media", |
|
"mediagroup", |
|
"method", |
|
"min", |
|
"minlength", |
|
"multiple", |
|
"muted", |
|
"name", |
|
"novalidate", |
|
"open", |
|
"optimum", |
|
"pattern", |
|
"ping", |
|
"placeholder", |
|
"poster", |
|
"preload", |
|
"radiogroup", |
|
"readonly", |
|
"rel", |
|
"required", |
|
"reversed", |
|
"rows", |
|
"rowspan", |
|
"sandbox", |
|
"spellcheck", |
|
"scope", |
|
"scoped", |
|
"seamless", |
|
"selected", |
|
"shape", |
|
"size", |
|
"sizes", |
|
"sortable", |
|
"sorted", |
|
"span", |
|
"src", |
|
"srcdoc", |
|
"srclang", |
|
"start", |
|
"step", |
|
"style", |
|
"tabindex", |
|
"target", |
|
"title", |
|
"translate", |
|
"type", |
|
"typemustmatch", |
|
"usemap", |
|
"value", |
|
"width", |
|
"wrap", |
|
} |
|
|
|
var eventHandlers = []string{ |
|
"onabort", |
|
"onautocomplete", |
|
"onautocompleteerror", |
|
"onafterprint", |
|
"onbeforeprint", |
|
"onbeforeunload", |
|
"onblur", |
|
"oncancel", |
|
"oncanplay", |
|
"oncanplaythrough", |
|
"onchange", |
|
"onclick", |
|
"onclose", |
|
"oncontextmenu", |
|
"oncuechange", |
|
"ondblclick", |
|
"ondrag", |
|
"ondragend", |
|
"ondragenter", |
|
"ondragleave", |
|
"ondragover", |
|
"ondragstart", |
|
"ondrop", |
|
"ondurationchange", |
|
"onemptied", |
|
"onended", |
|
"onerror", |
|
"onfocus", |
|
"onhashchange", |
|
"oninput", |
|
"oninvalid", |
|
"onkeydown", |
|
"onkeypress", |
|
"onkeyup", |
|
"onlanguagechange", |
|
"onload", |
|
"onloadeddata", |
|
"onloadedmetadata", |
|
"onloadstart", |
|
"onmessage", |
|
"onmousedown", |
|
"onmousemove", |
|
"onmouseout", |
|
"onmouseover", |
|
"onmouseup", |
|
"onmousewheel", |
|
"onoffline", |
|
"ononline", |
|
"onpagehide", |
|
"onpageshow", |
|
"onpause", |
|
"onplay", |
|
"onplaying", |
|
"onpopstate", |
|
"onprogress", |
|
"onratechange", |
|
"onreset", |
|
"onresize", |
|
"onscroll", |
|
"onseeked", |
|
"onseeking", |
|
"onselect", |
|
"onshow", |
|
"onsort", |
|
"onstalled", |
|
"onstorage", |
|
"onsubmit", |
|
"onsuspend", |
|
"ontimeupdate", |
|
"ontoggle", |
|
"onunload", |
|
"onvolumechange", |
|
"onwaiting", |
|
} |
|
|
|
// extra are ad-hoc values not covered by any of the lists above. |
|
var extra = []string{ |
|
"align", |
|
"annotation", |
|
"annotation-xml", |
|
"applet", |
|
"basefont", |
|
"bgsound", |
|
"big", |
|
"blink", |
|
"center", |
|
"color", |
|
"desc", |
|
"face", |
|
"font", |
|
"foreignObject", // HTML is case-insensitive, but SVG-embedded-in-HTML is case-sensitive. |
|
"foreignobject", |
|
"frame", |
|
"frameset", |
|
"image", |
|
"isindex", |
|
"listing", |
|
"malignmark", |
|
"marquee", |
|
"math", |
|
"mglyph", |
|
"mi", |
|
"mn", |
|
"mo", |
|
"ms", |
|
"mtext", |
|
"nobr", |
|
"noembed", |
|
"noframes", |
|
"plaintext", |
|
"prompt", |
|
"public", |
|
"spacer", |
|
"strike", |
|
"svg", |
|
"system", |
|
"tt", |
|
"xmp", |
|
}
|
|
|