mirror of https://github.com/gogits/gogs.git
You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
701 lines
18 KiB
701 lines
18 KiB
// Copyright 2009 The Go Authors. All rights reserved. |
|
// Use of this source code is governed by a BSD-style |
|
// license that can be found in the LICENSE file. |
|
|
|
package flate |
|
|
|
import ( |
|
"io" |
|
) |
|
|
|
const ( |
|
// The largest offset code. |
|
offsetCodeCount = 30 |
|
|
|
// The special code used to mark the end of a block. |
|
endBlockMarker = 256 |
|
|
|
// The first length code. |
|
lengthCodesStart = 257 |
|
|
|
// The number of codegen codes. |
|
codegenCodeCount = 19 |
|
badCode = 255 |
|
|
|
// bufferFlushSize indicates the buffer size |
|
// after which bytes are flushed to the writer. |
|
// Should preferably be a multiple of 6, since |
|
// we accumulate 6 bytes between writes to the buffer. |
|
bufferFlushSize = 240 |
|
|
|
// bufferSize is the actual output byte buffer size. |
|
// It must have additional headroom for a flush |
|
// which can contain up to 8 bytes. |
|
bufferSize = bufferFlushSize + 8 |
|
) |
|
|
|
// The number of extra bits needed by length code X - LENGTH_CODES_START. |
|
var lengthExtraBits = []int8{ |
|
/* 257 */ 0, 0, 0, |
|
/* 260 */ 0, 0, 0, 0, 0, 1, 1, 1, 1, 2, |
|
/* 270 */ 2, 2, 2, 3, 3, 3, 3, 4, 4, 4, |
|
/* 280 */ 4, 5, 5, 5, 5, 0, |
|
} |
|
|
|
// The length indicated by length code X - LENGTH_CODES_START. |
|
var lengthBase = []uint32{ |
|
0, 1, 2, 3, 4, 5, 6, 7, 8, 10, |
|
12, 14, 16, 20, 24, 28, 32, 40, 48, 56, |
|
64, 80, 96, 112, 128, 160, 192, 224, 255, |
|
} |
|
|
|
// offset code word extra bits. |
|
var offsetExtraBits = []int8{ |
|
0, 0, 0, 0, 1, 1, 2, 2, 3, 3, |
|
4, 4, 5, 5, 6, 6, 7, 7, 8, 8, |
|
9, 9, 10, 10, 11, 11, 12, 12, 13, 13, |
|
/* extended window */ |
|
14, 14, 15, 15, 16, 16, 17, 17, 18, 18, 19, 19, 20, 20, |
|
} |
|
|
|
var offsetBase = []uint32{ |
|
/* normal deflate */ |
|
0x000000, 0x000001, 0x000002, 0x000003, 0x000004, |
|
0x000006, 0x000008, 0x00000c, 0x000010, 0x000018, |
|
0x000020, 0x000030, 0x000040, 0x000060, 0x000080, |
|
0x0000c0, 0x000100, 0x000180, 0x000200, 0x000300, |
|
0x000400, 0x000600, 0x000800, 0x000c00, 0x001000, |
|
0x001800, 0x002000, 0x003000, 0x004000, 0x006000, |
|
|
|
/* extended window */ |
|
0x008000, 0x00c000, 0x010000, 0x018000, 0x020000, |
|
0x030000, 0x040000, 0x060000, 0x080000, 0x0c0000, |
|
0x100000, 0x180000, 0x200000, 0x300000, |
|
} |
|
|
|
// The odd order in which the codegen code sizes are written. |
|
var codegenOrder = []uint32{16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15} |
|
|
|
type huffmanBitWriter struct { |
|
// writer is the underlying writer. |
|
// Do not use it directly; use the write method, which ensures |
|
// that Write errors are sticky. |
|
writer io.Writer |
|
|
|
// Data waiting to be written is bytes[0:nbytes] |
|
// and then the low nbits of bits. |
|
bits uint64 |
|
nbits uint |
|
bytes [bufferSize]byte |
|
codegenFreq [codegenCodeCount]int32 |
|
nbytes int |
|
literalFreq []int32 |
|
offsetFreq []int32 |
|
codegen []uint8 |
|
literalEncoding *huffmanEncoder |
|
offsetEncoding *huffmanEncoder |
|
codegenEncoding *huffmanEncoder |
|
err error |
|
} |
|
|
|
func newHuffmanBitWriter(w io.Writer) *huffmanBitWriter { |
|
return &huffmanBitWriter{ |
|
writer: w, |
|
literalFreq: make([]int32, maxNumLit), |
|
offsetFreq: make([]int32, offsetCodeCount), |
|
codegen: make([]uint8, maxNumLit+offsetCodeCount+1), |
|
literalEncoding: newHuffmanEncoder(maxNumLit), |
|
codegenEncoding: newHuffmanEncoder(codegenCodeCount), |
|
offsetEncoding: newHuffmanEncoder(offsetCodeCount), |
|
} |
|
} |
|
|
|
func (w *huffmanBitWriter) reset(writer io.Writer) { |
|
w.writer = writer |
|
w.bits, w.nbits, w.nbytes, w.err = 0, 0, 0, nil |
|
w.bytes = [bufferSize]byte{} |
|
} |
|
|
|
func (w *huffmanBitWriter) flush() { |
|
if w.err != nil { |
|
w.nbits = 0 |
|
return |
|
} |
|
n := w.nbytes |
|
for w.nbits != 0 { |
|
w.bytes[n] = byte(w.bits) |
|
w.bits >>= 8 |
|
if w.nbits > 8 { // Avoid underflow |
|
w.nbits -= 8 |
|
} else { |
|
w.nbits = 0 |
|
} |
|
n++ |
|
} |
|
w.bits = 0 |
|
w.write(w.bytes[:n]) |
|
w.nbytes = 0 |
|
} |
|
|
|
func (w *huffmanBitWriter) write(b []byte) { |
|
if w.err != nil { |
|
return |
|
} |
|
_, w.err = w.writer.Write(b) |
|
} |
|
|
|
func (w *huffmanBitWriter) writeBits(b int32, nb uint) { |
|
if w.err != nil { |
|
return |
|
} |
|
w.bits |= uint64(b) << w.nbits |
|
w.nbits += nb |
|
if w.nbits >= 48 { |
|
bits := w.bits |
|
w.bits >>= 48 |
|
w.nbits -= 48 |
|
n := w.nbytes |
|
bytes := w.bytes[n : n+6] |
|
bytes[0] = byte(bits) |
|
bytes[1] = byte(bits >> 8) |
|
bytes[2] = byte(bits >> 16) |
|
bytes[3] = byte(bits >> 24) |
|
bytes[4] = byte(bits >> 32) |
|
bytes[5] = byte(bits >> 40) |
|
n += 6 |
|
if n >= bufferFlushSize { |
|
w.write(w.bytes[:n]) |
|
n = 0 |
|
} |
|
w.nbytes = n |
|
} |
|
} |
|
|
|
func (w *huffmanBitWriter) writeBytes(bytes []byte) { |
|
if w.err != nil { |
|
return |
|
} |
|
n := w.nbytes |
|
if w.nbits&7 != 0 { |
|
w.err = InternalError("writeBytes with unfinished bits") |
|
return |
|
} |
|
for w.nbits != 0 { |
|
w.bytes[n] = byte(w.bits) |
|
w.bits >>= 8 |
|
w.nbits -= 8 |
|
n++ |
|
} |
|
if n != 0 { |
|
w.write(w.bytes[:n]) |
|
} |
|
w.nbytes = 0 |
|
w.write(bytes) |
|
} |
|
|
|
// RFC 1951 3.2.7 specifies a special run-length encoding for specifying |
|
// the literal and offset lengths arrays (which are concatenated into a single |
|
// array). This method generates that run-length encoding. |
|
// |
|
// The result is written into the codegen array, and the frequencies |
|
// of each code is written into the codegenFreq array. |
|
// Codes 0-15 are single byte codes. Codes 16-18 are followed by additional |
|
// information. Code badCode is an end marker |
|
// |
|
// numLiterals The number of literals in literalEncoding |
|
// numOffsets The number of offsets in offsetEncoding |
|
// litenc, offenc The literal and offset encoder to use |
|
func (w *huffmanBitWriter) generateCodegen(numLiterals int, numOffsets int, litEnc, offEnc *huffmanEncoder) { |
|
for i := range w.codegenFreq { |
|
w.codegenFreq[i] = 0 |
|
} |
|
// Note that we are using codegen both as a temporary variable for holding |
|
// a copy of the frequencies, and as the place where we put the result. |
|
// This is fine because the output is always shorter than the input used |
|
// so far. |
|
codegen := w.codegen // cache |
|
// Copy the concatenated code sizes to codegen. Put a marker at the end. |
|
cgnl := codegen[:numLiterals] |
|
for i := range cgnl { |
|
cgnl[i] = uint8(litEnc.codes[i].len) |
|
} |
|
|
|
cgnl = codegen[numLiterals : numLiterals+numOffsets] |
|
for i := range cgnl { |
|
cgnl[i] = uint8(offEnc.codes[i].len) |
|
} |
|
codegen[numLiterals+numOffsets] = badCode |
|
|
|
size := codegen[0] |
|
count := 1 |
|
outIndex := 0 |
|
for inIndex := 1; size != badCode; inIndex++ { |
|
// INVARIANT: We have seen "count" copies of size that have not yet |
|
// had output generated for them. |
|
nextSize := codegen[inIndex] |
|
if nextSize == size { |
|
count++ |
|
continue |
|
} |
|
// We need to generate codegen indicating "count" of size. |
|
if size != 0 { |
|
codegen[outIndex] = size |
|
outIndex++ |
|
w.codegenFreq[size]++ |
|
count-- |
|
for count >= 3 { |
|
n := 6 |
|
if n > count { |
|
n = count |
|
} |
|
codegen[outIndex] = 16 |
|
outIndex++ |
|
codegen[outIndex] = uint8(n - 3) |
|
outIndex++ |
|
w.codegenFreq[16]++ |
|
count -= n |
|
} |
|
} else { |
|
for count >= 11 { |
|
n := 138 |
|
if n > count { |
|
n = count |
|
} |
|
codegen[outIndex] = 18 |
|
outIndex++ |
|
codegen[outIndex] = uint8(n - 11) |
|
outIndex++ |
|
w.codegenFreq[18]++ |
|
count -= n |
|
} |
|
if count >= 3 { |
|
// count >= 3 && count <= 10 |
|
codegen[outIndex] = 17 |
|
outIndex++ |
|
codegen[outIndex] = uint8(count - 3) |
|
outIndex++ |
|
w.codegenFreq[17]++ |
|
count = 0 |
|
} |
|
} |
|
count-- |
|
for ; count >= 0; count-- { |
|
codegen[outIndex] = size |
|
outIndex++ |
|
w.codegenFreq[size]++ |
|
} |
|
// Set up invariant for next time through the loop. |
|
size = nextSize |
|
count = 1 |
|
} |
|
// Marker indicating the end of the codegen. |
|
codegen[outIndex] = badCode |
|
} |
|
|
|
// dynamicSize returns the size of dynamically encoded data in bits. |
|
func (w *huffmanBitWriter) dynamicSize(litEnc, offEnc *huffmanEncoder, extraBits int) (size, numCodegens int) { |
|
numCodegens = len(w.codegenFreq) |
|
for numCodegens > 4 && w.codegenFreq[codegenOrder[numCodegens-1]] == 0 { |
|
numCodegens-- |
|
} |
|
header := 3 + 5 + 5 + 4 + (3 * numCodegens) + |
|
w.codegenEncoding.bitLength(w.codegenFreq[:]) + |
|
int(w.codegenFreq[16])*2 + |
|
int(w.codegenFreq[17])*3 + |
|
int(w.codegenFreq[18])*7 |
|
size = header + |
|
litEnc.bitLength(w.literalFreq) + |
|
offEnc.bitLength(w.offsetFreq) + |
|
extraBits |
|
|
|
return size, numCodegens |
|
} |
|
|
|
// fixedSize returns the size of dynamically encoded data in bits. |
|
func (w *huffmanBitWriter) fixedSize(extraBits int) int { |
|
return 3 + |
|
fixedLiteralEncoding.bitLength(w.literalFreq) + |
|
fixedOffsetEncoding.bitLength(w.offsetFreq) + |
|
extraBits |
|
} |
|
|
|
// storedSize calculates the stored size, including header. |
|
// The function returns the size in bits and whether the block |
|
// fits inside a single block. |
|
func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) { |
|
if in == nil { |
|
return 0, false |
|
} |
|
if len(in) <= maxStoreBlockSize { |
|
return (len(in) + 5) * 8, true |
|
} |
|
return 0, false |
|
} |
|
|
|
func (w *huffmanBitWriter) writeCode(c hcode) { |
|
if w.err != nil { |
|
return |
|
} |
|
w.bits |= uint64(c.code) << w.nbits |
|
w.nbits += uint(c.len) |
|
if w.nbits >= 48 { |
|
bits := w.bits |
|
w.bits >>= 48 |
|
w.nbits -= 48 |
|
n := w.nbytes |
|
bytes := w.bytes[n : n+6] |
|
bytes[0] = byte(bits) |
|
bytes[1] = byte(bits >> 8) |
|
bytes[2] = byte(bits >> 16) |
|
bytes[3] = byte(bits >> 24) |
|
bytes[4] = byte(bits >> 32) |
|
bytes[5] = byte(bits >> 40) |
|
n += 6 |
|
if n >= bufferFlushSize { |
|
w.write(w.bytes[:n]) |
|
n = 0 |
|
} |
|
w.nbytes = n |
|
} |
|
} |
|
|
|
// Write the header of a dynamic Huffman block to the output stream. |
|
// |
|
// numLiterals The number of literals specified in codegen |
|
// numOffsets The number of offsets specified in codegen |
|
// numCodegens The number of codegens used in codegen |
|
func (w *huffmanBitWriter) writeDynamicHeader(numLiterals int, numOffsets int, numCodegens int, isEof bool) { |
|
if w.err != nil { |
|
return |
|
} |
|
var firstBits int32 = 4 |
|
if isEof { |
|
firstBits = 5 |
|
} |
|
w.writeBits(firstBits, 3) |
|
w.writeBits(int32(numLiterals-257), 5) |
|
w.writeBits(int32(numOffsets-1), 5) |
|
w.writeBits(int32(numCodegens-4), 4) |
|
|
|
for i := 0; i < numCodegens; i++ { |
|
value := uint(w.codegenEncoding.codes[codegenOrder[i]].len) |
|
w.writeBits(int32(value), 3) |
|
} |
|
|
|
i := 0 |
|
for { |
|
var codeWord int = int(w.codegen[i]) |
|
i++ |
|
if codeWord == badCode { |
|
break |
|
} |
|
w.writeCode(w.codegenEncoding.codes[uint32(codeWord)]) |
|
|
|
switch codeWord { |
|
case 16: |
|
w.writeBits(int32(w.codegen[i]), 2) |
|
i++ |
|
break |
|
case 17: |
|
w.writeBits(int32(w.codegen[i]), 3) |
|
i++ |
|
break |
|
case 18: |
|
w.writeBits(int32(w.codegen[i]), 7) |
|
i++ |
|
break |
|
} |
|
} |
|
} |
|
|
|
func (w *huffmanBitWriter) writeStoredHeader(length int, isEof bool) { |
|
if w.err != nil { |
|
return |
|
} |
|
var flag int32 |
|
if isEof { |
|
flag = 1 |
|
} |
|
w.writeBits(flag, 3) |
|
w.flush() |
|
w.writeBits(int32(length), 16) |
|
w.writeBits(int32(^uint16(length)), 16) |
|
} |
|
|
|
func (w *huffmanBitWriter) writeFixedHeader(isEof bool) { |
|
if w.err != nil { |
|
return |
|
} |
|
// Indicate that we are a fixed Huffman block |
|
var value int32 = 2 |
|
if isEof { |
|
value = 3 |
|
} |
|
w.writeBits(value, 3) |
|
} |
|
|
|
// writeBlock will write a block of tokens with the smallest encoding. |
|
// The original input can be supplied, and if the huffman encoded data |
|
// is larger than the original bytes, the data will be written as a |
|
// stored block. |
|
// If the input is nil, the tokens will always be Huffman encoded. |
|
func (w *huffmanBitWriter) writeBlock(tokens []token, eof bool, input []byte) { |
|
if w.err != nil { |
|
return |
|
} |
|
|
|
tokens = append(tokens, endBlockMarker) |
|
numLiterals, numOffsets := w.indexTokens(tokens) |
|
|
|
var extraBits int |
|
storedSize, storable := w.storedSize(input) |
|
if storable { |
|
// We only bother calculating the costs of the extra bits required by |
|
// the length of offset fields (which will be the same for both fixed |
|
// and dynamic encoding), if we need to compare those two encodings |
|
// against stored encoding. |
|
for lengthCode := lengthCodesStart + 8; lengthCode < numLiterals; lengthCode++ { |
|
// First eight length codes have extra size = 0. |
|
extraBits += int(w.literalFreq[lengthCode]) * int(lengthExtraBits[lengthCode-lengthCodesStart]) |
|
} |
|
for offsetCode := 4; offsetCode < numOffsets; offsetCode++ { |
|
// First four offset codes have extra size = 0. |
|
extraBits += int(w.offsetFreq[offsetCode]) * int(offsetExtraBits[offsetCode]) |
|
} |
|
} |
|
|
|
// Figure out smallest code. |
|
// Fixed Huffman baseline. |
|
var literalEncoding = fixedLiteralEncoding |
|
var offsetEncoding = fixedOffsetEncoding |
|
var size = w.fixedSize(extraBits) |
|
|
|
// Dynamic Huffman? |
|
var numCodegens int |
|
|
|
// Generate codegen and codegenFrequencies, which indicates how to encode |
|
// the literalEncoding and the offsetEncoding. |
|
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) |
|
w.codegenEncoding.generate(w.codegenFreq[:], 7) |
|
dynamicSize, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, extraBits) |
|
|
|
if dynamicSize < size { |
|
size = dynamicSize |
|
literalEncoding = w.literalEncoding |
|
offsetEncoding = w.offsetEncoding |
|
} |
|
|
|
// Stored bytes? |
|
if storable && storedSize < size { |
|
w.writeStoredHeader(len(input), eof) |
|
w.writeBytes(input) |
|
return |
|
} |
|
|
|
// Huffman. |
|
if literalEncoding == fixedLiteralEncoding { |
|
w.writeFixedHeader(eof) |
|
} else { |
|
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) |
|
} |
|
|
|
// Write the tokens. |
|
w.writeTokens(tokens, literalEncoding.codes, offsetEncoding.codes) |
|
} |
|
|
|
// writeBlockDynamic encodes a block using a dynamic Huffman table. |
|
// This should be used if the symbols used have a disproportionate |
|
// histogram distribution. |
|
// If input is supplied and the compression savings are below 1/16th of the |
|
// input size the block is stored. |
|
func (w *huffmanBitWriter) writeBlockDynamic(tokens []token, eof bool, input []byte) { |
|
if w.err != nil { |
|
return |
|
} |
|
|
|
tokens = append(tokens, endBlockMarker) |
|
numLiterals, numOffsets := w.indexTokens(tokens) |
|
|
|
// Generate codegen and codegenFrequencies, which indicates how to encode |
|
// the literalEncoding and the offsetEncoding. |
|
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, w.offsetEncoding) |
|
w.codegenEncoding.generate(w.codegenFreq[:], 7) |
|
size, numCodegens := w.dynamicSize(w.literalEncoding, w.offsetEncoding, 0) |
|
|
|
// Store bytes, if we don't get a reasonable improvement. |
|
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { |
|
w.writeStoredHeader(len(input), eof) |
|
w.writeBytes(input) |
|
return |
|
} |
|
|
|
// Write Huffman table. |
|
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) |
|
|
|
// Write the tokens. |
|
w.writeTokens(tokens, w.literalEncoding.codes, w.offsetEncoding.codes) |
|
} |
|
|
|
// indexTokens indexes a slice of tokens, and updates |
|
// literalFreq and offsetFreq, and generates literalEncoding |
|
// and offsetEncoding. |
|
// The number of literal and offset tokens is returned. |
|
func (w *huffmanBitWriter) indexTokens(tokens []token) (numLiterals, numOffsets int) { |
|
for i := range w.literalFreq { |
|
w.literalFreq[i] = 0 |
|
} |
|
for i := range w.offsetFreq { |
|
w.offsetFreq[i] = 0 |
|
} |
|
|
|
for _, t := range tokens { |
|
if t < matchType { |
|
w.literalFreq[t.literal()]++ |
|
continue |
|
} |
|
length := t.length() |
|
offset := t.offset() |
|
w.literalFreq[lengthCodesStart+lengthCode(length)]++ |
|
w.offsetFreq[offsetCode(offset)]++ |
|
} |
|
|
|
// get the number of literals |
|
numLiterals = len(w.literalFreq) |
|
for w.literalFreq[numLiterals-1] == 0 { |
|
numLiterals-- |
|
} |
|
// get the number of offsets |
|
numOffsets = len(w.offsetFreq) |
|
for numOffsets > 0 && w.offsetFreq[numOffsets-1] == 0 { |
|
numOffsets-- |
|
} |
|
if numOffsets == 0 { |
|
// We haven't found a single match. If we want to go with the dynamic encoding, |
|
// we should count at least one offset to be sure that the offset huffman tree could be encoded. |
|
w.offsetFreq[0] = 1 |
|
numOffsets = 1 |
|
} |
|
w.literalEncoding.generate(w.literalFreq, 15) |
|
w.offsetEncoding.generate(w.offsetFreq, 15) |
|
return |
|
} |
|
|
|
// writeTokens writes a slice of tokens to the output. |
|
// codes for literal and offset encoding must be supplied. |
|
func (w *huffmanBitWriter) writeTokens(tokens []token, leCodes, oeCodes []hcode) { |
|
if w.err != nil { |
|
return |
|
} |
|
for _, t := range tokens { |
|
if t < matchType { |
|
w.writeCode(leCodes[t.literal()]) |
|
continue |
|
} |
|
// Write the length |
|
length := t.length() |
|
lengthCode := lengthCode(length) |
|
w.writeCode(leCodes[lengthCode+lengthCodesStart]) |
|
extraLengthBits := uint(lengthExtraBits[lengthCode]) |
|
if extraLengthBits > 0 { |
|
extraLength := int32(length - lengthBase[lengthCode]) |
|
w.writeBits(extraLength, extraLengthBits) |
|
} |
|
// Write the offset |
|
offset := t.offset() |
|
offsetCode := offsetCode(offset) |
|
w.writeCode(oeCodes[offsetCode]) |
|
extraOffsetBits := uint(offsetExtraBits[offsetCode]) |
|
if extraOffsetBits > 0 { |
|
extraOffset := int32(offset - offsetBase[offsetCode]) |
|
w.writeBits(extraOffset, extraOffsetBits) |
|
} |
|
} |
|
} |
|
|
|
// huffOffset is a static offset encoder used for huffman only encoding. |
|
// It can be reused since we will not be encoding offset values. |
|
var huffOffset *huffmanEncoder |
|
|
|
func init() { |
|
w := newHuffmanBitWriter(nil) |
|
w.offsetFreq[0] = 1 |
|
huffOffset = newHuffmanEncoder(offsetCodeCount) |
|
huffOffset.generate(w.offsetFreq, 15) |
|
} |
|
|
|
// writeBlockHuff encodes a block of bytes as either |
|
// Huffman encoded literals or uncompressed bytes if the |
|
// results only gains very little from compression. |
|
func (w *huffmanBitWriter) writeBlockHuff(eof bool, input []byte) { |
|
if w.err != nil { |
|
return |
|
} |
|
|
|
// Clear histogram |
|
for i := range w.literalFreq { |
|
w.literalFreq[i] = 0 |
|
} |
|
|
|
// Add everything as literals |
|
histogram(input, w.literalFreq) |
|
|
|
w.literalFreq[endBlockMarker] = 1 |
|
|
|
const numLiterals = endBlockMarker + 1 |
|
const numOffsets = 1 |
|
|
|
w.literalEncoding.generate(w.literalFreq, 15) |
|
|
|
// Figure out smallest code. |
|
// Always use dynamic Huffman or Store |
|
var numCodegens int |
|
|
|
// Generate codegen and codegenFrequencies, which indicates how to encode |
|
// the literalEncoding and the offsetEncoding. |
|
w.generateCodegen(numLiterals, numOffsets, w.literalEncoding, huffOffset) |
|
w.codegenEncoding.generate(w.codegenFreq[:], 7) |
|
size, numCodegens := w.dynamicSize(w.literalEncoding, huffOffset, 0) |
|
|
|
// Store bytes, if we don't get a reasonable improvement. |
|
if ssize, storable := w.storedSize(input); storable && ssize < (size+size>>4) { |
|
w.writeStoredHeader(len(input), eof) |
|
w.writeBytes(input) |
|
return |
|
} |
|
|
|
// Huffman. |
|
w.writeDynamicHeader(numLiterals, numOffsets, numCodegens, eof) |
|
encoding := w.literalEncoding.codes[:257] |
|
n := w.nbytes |
|
for _, t := range input { |
|
// Bitwriting inlined, ~30% speedup |
|
c := encoding[t] |
|
w.bits |= uint64(c.code) << w.nbits |
|
w.nbits += uint(c.len) |
|
if w.nbits < 48 { |
|
continue |
|
} |
|
// Store 6 bytes |
|
bits := w.bits |
|
w.bits >>= 48 |
|
w.nbits -= 48 |
|
bytes := w.bytes[n : n+6] |
|
bytes[0] = byte(bits) |
|
bytes[1] = byte(bits >> 8) |
|
bytes[2] = byte(bits >> 16) |
|
bytes[3] = byte(bits >> 24) |
|
bytes[4] = byte(bits >> 32) |
|
bytes[5] = byte(bits >> 40) |
|
n += 6 |
|
if n < bufferFlushSize { |
|
continue |
|
} |
|
w.write(w.bytes[:n]) |
|
if w.err != nil { |
|
return // Return early in the event of write failures |
|
} |
|
n = 0 |
|
} |
|
w.nbytes = n |
|
w.writeCode(encoding[endBlockMarker]) |
|
}
|
|
|