vendor: update buildkit to 2f99651

Signed-off-by: CrazyMax <crazy-max@users.noreply.github.com>
This commit is contained in:
CrazyMax
2022-02-09 21:53:27 +01:00
parent 60a025b227
commit 307c94e5c7
360 changed files with 13218 additions and 6961 deletions

View File

@ -3,6 +3,7 @@
before:
hooks:
- ./gen.sh
- go install mvdan.cc/garble@latest
builds:
-
@ -31,6 +32,7 @@ builds:
- mips64le
goarm:
- 7
gobinary: garble
-
id: "s2d"
binary: s2d
@ -57,6 +59,7 @@ builds:
- mips64le
goarm:
- 7
gobinary: garble
-
id: "s2sx"
binary: s2sx
@ -84,6 +87,7 @@ builds:
- mips64le
goarm:
- 7
gobinary: garble
archives:
-

View File

@ -17,6 +17,13 @@ This package provides various compression algorithms.
# changelog
* Jan 11, 2022 (v1.14.1)
* s2: Add stream index in [#462](https://github.com/klauspost/compress/pull/462)
* flate: Speed and efficiency improvements in [#439](https://github.com/klauspost/compress/pull/439) [#461](https://github.com/klauspost/compress/pull/461) [#455](https://github.com/klauspost/compress/pull/455) [#452](https://github.com/klauspost/compress/pull/452) [#458](https://github.com/klauspost/compress/pull/458)
* zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468)
* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
* Aug 30, 2021 (v1.13.5)
* gz/zlib/flate: Alias stdlib errors [#425](https://github.com/klauspost/compress/pull/425)
* s2: Add block support to commandline tools [#413](https://github.com/klauspost/compress/pull/413)
@ -432,6 +439,13 @@ For more information see my blog post on [Fast Linear Time Compression](http://b
This is implemented on Go 1.7 as "Huffman Only" mode, though not exposed for gzip.
# Other packages
Here are other packages of good quality and pure Go (no cgo wrappers or autoconverted code):
* [github.com/pierrec/lz4](https://github.com/pierrec/lz4) - strong multithreaded LZ4 compression.
* [github.com/cosnicolaou/pbzip2](https://github.com/cosnicolaou/pbzip2) - multithreaded bzip2 decompression.
* [github.com/dsnet/compress](https://github.com/dsnet/compress) - brotli decompression, bzip2 writer.
# license

View File

@ -20,7 +20,7 @@ type dEntrySingle struct {
// double-symbols decoding
type dEntryDouble struct {
seq uint16
seq [4]byte
nBits uint8
len uint8
}
@ -753,23 +753,21 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
val2 := br[stream2].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
v2 := single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
v2 = single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
@ -780,23 +778,21 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
br[stream2].fillFast()
val := br[stream].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
val2 := br[stream2].peekBitsFast(d.actualTableLog)
v := single[val&tlMask]
v2 := single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream] = uint8(v.entry >> 8)
buf[off+bufoff*stream2] = uint8(v2.entry >> 8)
val = br[stream].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
br[stream].advance(uint8(v.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
val2 = br[stream2].peekBitsFast(d.actualTableLog)
v = single[val&tlMask]
v2 = single[val2&tlMask]
br[stream].advance(uint8(v.entry))
br[stream2].advance(uint8(v2.entry))
buf[off+bufoff*stream+1] = uint8(v.entry >> 8)
buf[off+bufoff*stream2+1] = uint8(v2.entry >> 8)
}
@ -914,7 +910,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
out := dst
dstEvery := (dstSize + 3) / 4
shift := (8 - d.actualTableLog) & 7
shift := (56 + (8 - d.actualTableLog)) & 63
const tlSize = 1 << 8
single := d.dt.single[:tlSize]
@ -935,79 +931,91 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
// Interleave 2 decodes.
const stream = 0
const stream2 = 1
br[stream].fillFast()
br[stream2].fillFast()
br1 := &br[stream]
br2 := &br[stream2]
br1.fillFast()
br2.fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br1.value>>shift)].entry
v2 := single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
buf[off+bufoff*stream+3] = uint8(v >> 8)
}
{
const stream = 2
const stream2 = 3
br[stream].fillFast()
br[stream2].fillFast()
br1 := &br[stream]
br2 := &br[stream2]
br1.fillFast()
br2.fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br1.value>>shift)].entry
v2 := single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
v = single[uint8(br1.value>>shift)].entry
v2 = single[uint8(br2.value>>shift)].entry
br1.bitsRead += uint8(v)
br1.value <<= v & 63
br2.bitsRead += uint8(v2)
br2.value <<= v2 & 63
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
buf[off+bufoff*stream+3] = uint8(v >> 8)
}
off += 4
@ -1073,7 +1081,7 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
v := single[uint8(br.value>>shift)].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)
@ -1121,7 +1129,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
out := dst
dstEvery := (dstSize + 3) / 4
const shift = 0
const shift = 56
const tlSize = 1 << 8
const tlMask = tlSize - 1
single := d.dt.single[:tlSize]
@ -1145,37 +1153,41 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br[stream].value>>shift)].entry
v2 := single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
{
@ -1184,37 +1196,41 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
br[stream].fillFast()
br[stream2].fillFast()
v := single[br[stream].peekByteFast()>>shift].entry
v := single[uint8(br[stream].value>>shift)].entry
v2 := single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 := single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+1] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+1] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+2] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+2] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
v = single[br[stream].peekByteFast()>>shift].entry
v = single[uint8(br[stream].value>>shift)].entry
v2 = single[uint8(br[stream2].value>>shift)].entry
br[stream].bitsRead += uint8(v)
br[stream].value <<= v & 63
br[stream2].bitsRead += uint8(v2)
br[stream2].value <<= v2 & 63
buf[off+bufoff*stream+3] = uint8(v >> 8)
br[stream].advance(uint8(v))
v2 = single[br[stream2].peekByteFast()>>shift].entry
buf[off+bufoff*stream2+3] = uint8(v2 >> 8)
br[stream2].advance(uint8(v2))
}
off += 4
@ -1280,7 +1296,7 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
}
// Read value and increment offset.
v := single[br.peekByteFast()>>shift].entry
v := single[br.peekByteFast()].entry
nBits := uint8(v)
br.advance(nBits)
bitsLeft -= int(nBits)

View File

@ -50,16 +50,23 @@ func (b *bitReader) getBits(n uint8) int {
if n == 0 /*|| b.bitsRead >= 64 */ {
return 0
}
return b.getBitsFast(n)
return int(b.get32BitsFast(n))
}
// getBitsFast requires that at least one bit is requested every time.
// get32BitsFast requires that at least one bit is requested every time.
// There are no checks if the buffer is filled.
func (b *bitReader) getBitsFast(n uint8) int {
func (b *bitReader) get32BitsFast(n uint8) uint32 {
const regMask = 64 - 1
v := uint32((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return int(v)
return v
}
func (b *bitReader) get16BitsFast(n uint8) uint16 {
const regMask = 64 - 1
v := uint16((b.value << (b.bitsRead & regMask)) >> ((regMask + 1 - n) & regMask))
b.bitsRead += n
return v
}
// fillFast() will make sure at least 32 bits are available.

View File

@ -38,7 +38,7 @@ func (b *bitWriter) addBits16NC(value uint16, bits uint8) {
b.nBits += bits
}
// addBits32NC will add up to 32 bits.
// addBits32NC will add up to 31 bits.
// It will not check if there is space for them,
// so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
@ -46,6 +46,26 @@ func (b *bitWriter) addBits32NC(value uint32, bits uint8) {
b.nBits += bits
}
// addBits64NC will add up to 64 bits.
// There must be space for 32 bits.
func (b *bitWriter) addBits64NC(value uint64, bits uint8) {
if bits <= 31 {
b.addBits32Clean(uint32(value), bits)
return
}
b.addBits32Clean(uint32(value), 32)
b.flush32()
b.addBits32Clean(uint32(value>>32), bits-32)
}
// addBits32Clean will add up to 32 bits.
// It will not check if there is space for them.
// The input must not contain more bits than specified.
func (b *bitWriter) addBits32Clean(value uint32, bits uint8) {
b.bitContainer |= uint64(value) << (b.nBits & 63)
b.nBits += bits
}
// addBits16Clean will add up to 16 bits. value may not contain more set bits than indicated.
// It will not check if there is space for them, so the caller must ensure that it has flushed recently.
func (b *bitWriter) addBits16Clean(value uint16, bits uint8) {

View File

@ -76,12 +76,11 @@ type blockDec struct {
// Window size of the block.
WindowSize uint64
history chan *history
input chan struct{}
result chan decodeOutput
sequenceBuf []seq
err error
decWG sync.WaitGroup
history chan *history
input chan struct{}
result chan decodeOutput
err error
decWG sync.WaitGroup
// Frame to use for singlethreaded decoding.
// Should not be used by the decoder itself since parent may be another frame.
@ -512,18 +511,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
nSeqs = 0x7f00 + int(in[1]) + (int(in[2]) << 8)
in = in[3:]
}
// Allocate sequences
if cap(b.sequenceBuf) < nSeqs {
if b.lowMem {
b.sequenceBuf = make([]seq, nSeqs)
} else {
// Allocate max
b.sequenceBuf = make([]seq, nSeqs, maxSequences)
}
} else {
// Reuse buffer
b.sequenceBuf = b.sequenceBuf[:nSeqs]
}
var seqs = &sequenceDecs{}
if nSeqs > 0 {
if len(in) < 1 {

View File

@ -51,7 +51,7 @@ func (b *blockEnc) init() {
if cap(b.literals) < maxCompressedBlockSize {
b.literals = make([]byte, 0, maxCompressedBlockSize)
}
const defSeqs = 200
const defSeqs = 2000
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
@ -426,7 +426,7 @@ func fuzzFseEncoder(data []byte) int {
return 0
}
enc := fseEncoder{}
hist := enc.Histogram()[:256]
hist := enc.Histogram()
maxSym := uint8(0)
for i, v := range data {
v = v & 63
@ -722,52 +722,53 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
println("Encoded seq", seq, s, "codes:", s.llCode, s.mlCode, s.ofCode, "states:", ll.state, ml.state, of.state, "bits:", llB, mlB, ofB)
}
seq--
if llEnc.maxBits+mlEnc.maxBits+ofEnc.maxBits <= 32 {
// No need to flush (common)
for seq >= 0 {
s = b.sequences[seq]
wr.flush32()
llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
// tabelog max is 8 for all.
of.encode(ofB)
ml.encode(mlB)
ll.encode(llB)
wr.flush32()
// Store sequences in reverse...
for seq >= 0 {
s = b.sequences[seq]
// We checked that all can stay within 32 bits
wr.addBits32NC(s.litLen, llB.outBits)
wr.addBits32NC(s.matchLen, mlB.outBits)
wr.addBits32NC(s.offset, ofB.outBits)
ofB := ofTT[s.ofCode]
wr.flush32() // tablelog max is below 8 for each, so it will fill max 24 bits.
//of.encode(ofB)
nbBitsOut := (uint32(of.state) + ofB.deltaNbBits) >> 16
dstState := int32(of.state>>(nbBitsOut&15)) + int32(ofB.deltaFindState)
wr.addBits16NC(of.state, uint8(nbBitsOut))
of.state = of.stateTable[dstState]
if debugSequences {
println("Encoded seq", seq, s)
}
// Accumulate extra bits.
outBits := ofB.outBits & 31
extraBits := uint64(s.offset & bitMask32[outBits])
extraBitsN := outBits
seq--
mlB := mlTT[s.mlCode]
//ml.encode(mlB)
nbBitsOut = (uint32(ml.state) + mlB.deltaNbBits) >> 16
dstState = int32(ml.state>>(nbBitsOut&15)) + int32(mlB.deltaFindState)
wr.addBits16NC(ml.state, uint8(nbBitsOut))
ml.state = ml.stateTable[dstState]
outBits = mlB.outBits & 31
extraBits = extraBits<<outBits | uint64(s.matchLen&bitMask32[outBits])
extraBitsN += outBits
llB := llTT[s.llCode]
//ll.encode(llB)
nbBitsOut = (uint32(ll.state) + llB.deltaNbBits) >> 16
dstState = int32(ll.state>>(nbBitsOut&15)) + int32(llB.deltaFindState)
wr.addBits16NC(ll.state, uint8(nbBitsOut))
ll.state = ll.stateTable[dstState]
outBits = llB.outBits & 31
extraBits = extraBits<<outBits | uint64(s.litLen&bitMask32[outBits])
extraBitsN += outBits
wr.flush32()
wr.addBits64NC(extraBits, extraBitsN)
if debugSequences {
println("Encoded seq", seq, s)
}
} else {
for seq >= 0 {
s = b.sequences[seq]
wr.flush32()
llB, ofB, mlB := llTT[s.llCode], ofTT[s.ofCode], mlTT[s.mlCode]
// tabelog max is below 8 for each.
of.encode(ofB)
ml.encode(mlB)
ll.encode(llB)
wr.flush32()
// ml+ll = max 32 bits total
wr.addBits32NC(s.litLen, llB.outBits)
wr.addBits32NC(s.matchLen, mlB.outBits)
wr.flush32()
wr.addBits32NC(s.offset, ofB.outBits)
if debugSequences {
println("Encoded seq", seq, s)
}
seq--
}
seq--
}
ml.flush(mlEnc.actualTableLog)
of.flush(ofEnc.actualTableLog)
@ -801,14 +802,13 @@ func (b *blockEnc) genCodes() {
// nothing to do
return
}
if len(b.sequences) > math.MaxUint16 {
panic("can only encode up to 64K sequences")
}
// No bounds checks after here:
llH := b.coders.llEnc.Histogram()[:256]
ofH := b.coders.ofEnc.Histogram()[:256]
mlH := b.coders.mlEnc.Histogram()[:256]
llH := b.coders.llEnc.Histogram()
ofH := b.coders.ofEnc.Histogram()
mlH := b.coders.mlEnc.Histogram()
for i := range llH {
llH[i] = 0
}
@ -820,7 +820,8 @@ func (b *blockEnc) genCodes() {
}
var llMax, ofMax, mlMax uint8
for i, seq := range b.sequences {
for i := range b.sequences {
seq := &b.sequences[i]
v := llCode(seq.litLen)
seq.llCode = v
llH[v]++
@ -844,7 +845,6 @@ func (b *blockEnc) genCodes() {
panic(fmt.Errorf("mlMax > maxMatchLengthSymbol (%d), matchlen: %d", mlMax, seq.matchLen))
}
}
b.sequences[i] = seq
}
maxCount := func(a []uint32) int {
var max uint32

View File

@ -5,6 +5,7 @@ package zstd
import (
"bytes"
"encoding/binary"
"errors"
"io"
)
@ -15,18 +16,50 @@ const HeaderMaxSize = 14 + 3
// Header contains information about the first frame and block within that.
type Header struct {
// Window Size the window of data to keep while decoding.
// Will only be set if HasFCS is false.
WindowSize uint64
// SingleSegment specifies whether the data is to be decompressed into a
// single contiguous memory segment.
// It implies that WindowSize is invalid and that FrameContentSize is valid.
SingleSegment bool
// Frame content size.
// Expected size of the entire frame.
FrameContentSize uint64
// WindowSize is the window of data to keep while decoding.
// Will only be set if SingleSegment is false.
WindowSize uint64
// Dictionary ID.
// If 0, no dictionary.
DictionaryID uint32
// HasFCS specifies whether FrameContentSize has a valid value.
HasFCS bool
// FrameContentSize is the expected uncompressed size of the entire frame.
FrameContentSize uint64
// Skippable will be true if the frame is meant to be skipped.
// This implies that FirstBlock.OK is false.
Skippable bool
// SkippableID is the user-specific ID for the skippable frame.
// Valid values are between 0 to 15, inclusive.
SkippableID int
// SkippableSize is the length of the user data to skip following
// the header.
SkippableSize uint32
// HeaderSize is the raw size of the frame header.
//
// For normal frames, it includes the size of the magic number and
// the size of the header (per section 3.1.1.1).
// It does not include the size for any data blocks (section 3.1.1.2) nor
// the size for the trailing content checksum.
//
// For skippable frames, this counts the size of the magic number
// along with the size of the size field of the payload.
// It does not include the size of the skippable payload itself.
// The total frame size is the HeaderSize plus the SkippableSize.
HeaderSize int
// First block information.
FirstBlock struct {
// OK will be set if first block could be decoded.
@ -51,17 +84,9 @@ type Header struct {
CompressedSize int
}
// Skippable will be true if the frame is meant to be skipped.
// No other information will be populated.
Skippable bool
// If set there is a checksum present for the block content.
// The checksum field at the end is always 4 bytes long.
HasCheckSum bool
// If this is true FrameContentSize will have a valid value
HasFCS bool
SingleSegment bool
}
// Decode the header from the beginning of the stream.
@ -71,39 +96,46 @@ type Header struct {
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
*h = Header{}
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
h.HeaderSize += 4
b, in := in[:4], in[4:]
if !bytes.Equal(b, frameMagic) {
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
}
*h = Header{Skippable: true}
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
h.HeaderSize += 4
h.Skippable = true
h.SkippableID = int(b[0] & 0xf)
h.SkippableSize = binary.LittleEndian.Uint32(in)
return nil
}
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
// Clear output
*h = Header{}
fhd, in := in[0], in[1:]
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
fhd, in := in[0], in[1:]
h.HeaderSize++
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
}
if !h.SingleSegment {
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
h.HeaderSize++
windowLog := 10 + (wd >> 3)
windowBase := uint64(1) << windowLog
windowAdd := (windowBase / 8) * uint64(wd&0x7)
@ -120,9 +152,7 @@ func (h *Header) Decode(in []byte) error {
return io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
if b == nil {
return io.ErrUnexpectedEOF
}
h.HeaderSize += int(size)
switch size {
case 1:
h.DictionaryID = uint32(b[0])
@ -152,9 +182,7 @@ func (h *Header) Decode(in []byte) error {
return io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
if b == nil {
return io.ErrUnexpectedEOF
}
h.HeaderSize += int(fcsSize)
switch fcsSize {
case 1:
h.FrameContentSize = uint64(b[0])

View File

@ -108,11 +108,6 @@ func (e *fastBase) UseBlock(enc *blockEnc) {
e.blk = enc
}
func (e *fastBase) matchlenNoHist(s, t int32, src []byte) int32 {
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
}
func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
if debugAsserts {
if s < 0 {
@ -131,9 +126,24 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
panic(fmt.Sprintf("len(src)-s (%d) > maxCompressedBlockSize (%d)", len(src)-int(s), maxCompressedBlockSize))
}
}
a := src[s:]
b := src[t:]
b = b[:len(a)]
end := int32((len(a) >> 3) << 3)
for i := int32(0); i < end; i += 8 {
if diff := load6432(a, i) ^ load6432(b, i); diff != 0 {
return i + int32(bits.TrailingZeros64(diff)>>3)
}
}
// Extend the match to be as long as possible.
return int32(matchLen(src[s:], src[t:]))
a = a[end:]
b = b[end:]
for i := range a {
if a[i] != b[i] {
return int32(i) + end
}
}
return int32(len(a)) + end
}
// Reset the encoding table.

View File

@ -6,8 +6,6 @@ package zstd
import (
"fmt"
"math"
"math/bits"
)
const (
@ -136,20 +134,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
@ -236,20 +221,7 @@ encodeLoop:
}
// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@ -286,20 +258,7 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
@ -418,21 +377,7 @@ encodeLoop:
if len(blk.sequences) > 2 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
// length := 4 + e.matchlen(s+6, repIndex+4, src)
// length := 4 + int32(matchLen(src[s+6:], src[repIndex+4:]))
var length int32
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length := 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
@ -522,21 +467,7 @@ encodeLoop:
panic(fmt.Sprintf("t (%d) < 0 ", t))
}
// Extend the 4-byte match as long as possible.
//l := e.matchlenNoHist(s+4, t+4, src) + 4
// l := int32(matchLen(src[s+4:], src[t+4:])) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@ -573,21 +504,7 @@ encodeLoop:
if o2 := s - offset2; len(blk.sequences) > 2 && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlenNoHist(s+4, o2+4, src)
// l := 4 + int32(matchLen(src[s+4:], src[o2+4:]))
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)
@ -731,19 +648,7 @@ encodeLoop:
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
length = 4 + e.matchlen(s+6, repIndex+4, src)
seq.matchLen = uint32(length - zstdMinMatch)
@ -831,20 +736,7 @@ encodeLoop:
}
// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
@ -881,20 +773,7 @@ encodeLoop:
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
l := 4 + e.matchlen(s+4, o2+4, src)
// Store this, since we have it.
nextHash := hashLen(cv, hashLog, tableFastHashLen)

View File

@ -24,6 +24,7 @@ type encoderOptions struct {
allLitEntropy bool
customWindow bool
customALEntropy bool
customBlockSize bool
lowMem bool
dict *dict
}
@ -33,7 +34,7 @@ func (o *encoderOptions) setDefault() {
concurrent: runtime.GOMAXPROCS(0),
crc: true,
single: nil,
blockSize: 1 << 16,
blockSize: maxCompressedBlockSize,
windowSize: 8 << 20,
level: SpeedDefault,
allLitEntropy: true,
@ -106,6 +107,7 @@ func WithWindowSize(n int) EOption {
o.customWindow = true
if o.blockSize > o.windowSize {
o.blockSize = o.windowSize
o.customBlockSize = true
}
return nil
}
@ -188,10 +190,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
return SpeedDefault
case level >= 6 && level < 10:
return SpeedBetterCompression
case level >= 10:
default:
return SpeedBestCompression
}
return SpeedDefault
}
// String provides a string representation of the compression level.
@ -222,6 +223,9 @@ func WithEncoderLevel(l EncoderLevel) EOption {
switch o.level {
case SpeedFastest:
o.windowSize = 4 << 20
if !o.customBlockSize {
o.blockSize = 1 << 16
}
case SpeedDefault:
o.windowSize = 8 << 20
case SpeedBetterCompression:

View File

@ -379,7 +379,7 @@ func (s decSymbol) final() (int, uint8) {
// This can only be used if no symbols are 0 bits.
// At least tablelog bits must be available in the bit reader.
func (s *fseState) nextFast(br *bitReader) (uint32, uint8) {
lowBits := uint16(br.getBitsFast(s.state.nbBits()))
lowBits := br.get16BitsFast(s.state.nbBits())
s.state = s.dt[s.state.newState()+lowBits]
return s.state.baseline(), s.state.addBits()
}

View File

@ -62,9 +62,8 @@ func (s symbolTransform) String() string {
// To indicate that you have populated the histogram call HistogramFinished
// with the value of the highest populated symbol, as well as the number of entries
// in the most populated entry. These are accepted at face value.
// The returned slice will always be length 256.
func (s *fseEncoder) Histogram() []uint32 {
return s.count[:]
func (s *fseEncoder) Histogram() *[256]uint32 {
return &s.count
}
// HistogramFinished can be called to indicate that the histogram has been populated.

View File

@ -1,6 +1,7 @@
// +build !appengine
// +build gc
// +build !purego
// +build !noasm
#include "textflag.h"

View File

@ -0,0 +1,186 @@
// +build gc,!purego,!noasm
#include "textflag.h"
// Register allocation.
#define digest R1
#define h R2 // Return value.
#define p R3 // Input pointer.
#define len R4
#define nblocks R5 // len / 32.
#define prime1 R7
#define prime2 R8
#define prime3 R9
#define prime4 R10
#define prime5 R11
#define v1 R12
#define v2 R13
#define v3 R14
#define v4 R15
#define x1 R20
#define x2 R21
#define x3 R22
#define x4 R23
#define round(acc, x) \
MADD prime2, acc, x, acc \
ROR $64-31, acc \
MUL prime1, acc \
// x = round(0, x).
#define round0(x) \
MUL prime2, x \
ROR $64-31, x \
MUL prime1, x \
#define mergeRound(x) \
round0(x) \
EOR x, h \
MADD h, prime4, prime1, h \
// Update v[1-4] with 32-byte blocks. Assumes len >= 32.
#define blocksLoop() \
LSR $5, len, nblocks \
PCALIGN $16 \
loop: \
LDP.P 32(p), (x1, x2) \
round(v1, x1) \
LDP -16(p), (x3, x4) \
round(v2, x2) \
SUB $1, nblocks \
round(v3, x3) \
round(v4, x4) \
CBNZ nblocks, loop \
// The primes are repeated here to ensure that they're stored
// in a contiguous array, so we can load them with LDP.
DATA primes<> +0(SB)/8, $11400714785074694791
DATA primes<> +8(SB)/8, $14029467366897019727
DATA primes<>+16(SB)/8, $1609587929392839161
DATA primes<>+24(SB)/8, $9650029242287828579
DATA primes<>+32(SB)/8, $2870177450012600261
GLOBL primes<>(SB), NOPTR+RODATA, $40
// func Sum64(b []byte) uint64
TEXT ·Sum64(SB), NOFRAME+NOSPLIT, $0-32
LDP b_base+0(FP), (p, len)
LDP primes<> +0(SB), (prime1, prime2)
LDP primes<>+16(SB), (prime3, prime4)
MOVD primes<>+32(SB), prime5
CMP $32, len
CSEL LO, prime5, ZR, h // if len < 32 { h = prime5 } else { h = 0 }
BLO afterLoop
ADD prime1, prime2, v1
MOVD prime2, v2
MOVD $0, v3
NEG prime1, v4
blocksLoop()
ROR $64-1, v1, x1
ROR $64-7, v2, x2
ADD x1, x2
ROR $64-12, v3, x3
ROR $64-18, v4, x4
ADD x3, x4
ADD x2, x4, h
mergeRound(v1)
mergeRound(v2)
mergeRound(v3)
mergeRound(v4)
afterLoop:
ADD len, h
TBZ $4, len, try8
LDP.P 16(p), (x1, x2)
round0(x1)
ROR $64-27, h
EOR x1 @> 64-27, h, h
MADD h, prime4, prime1, h
round0(x2)
ROR $64-27, h
EOR x2 @> 64-27, h
MADD h, prime4, prime1, h
try8:
TBZ $3, len, try4
MOVD.P 8(p), x1
round0(x1)
ROR $64-27, h
EOR x1 @> 64-27, h
MADD h, prime4, prime1, h
try4:
TBZ $2, len, try2
MOVWU.P 4(p), x2
MUL prime1, x2
ROR $64-23, h
EOR x2 @> 64-23, h
MADD h, prime3, prime2, h
try2:
TBZ $1, len, try1
MOVHU.P 2(p), x3
AND $255, x3, x1
LSR $8, x3, x2
MUL prime5, x1
ROR $64-11, h
EOR x1 @> 64-11, h
MUL prime1, h
MUL prime5, x2
ROR $64-11, h
EOR x2 @> 64-11, h
MUL prime1, h
try1:
TBZ $0, len, end
MOVBU (p), x4
MUL prime5, x4
ROR $64-11, h
EOR x4 @> 64-11, h
MUL prime1, h
end:
EOR h >> 33, h
MUL prime2, h
EOR h >> 29, h
MUL prime3, h
EOR h >> 32, h
MOVD h, ret+24(FP)
RET
// func writeBlocks(d *Digest, b []byte) int
//
// Assumes len(b) >= 32.
TEXT ·writeBlocks(SB), NOFRAME+NOSPLIT, $0-40
LDP primes<>(SB), (prime1, prime2)
// Load state. Assume v[1-4] are stored contiguously.
MOVD d+0(FP), digest
LDP 0(digest), (v1, v2)
LDP 16(digest), (v3, v4)
LDP b_base+8(FP), (p, len)
blocksLoop()
// Store updated state.
STP (v1, v2), 0(digest)
STP (v3, v4), 16(digest)
BIC $31, len
MOVD len, ret+32(FP)
RET

View File

@ -1,5 +1,9 @@
//go:build !appengine && gc && !purego
// +build !appengine,gc,!purego
//go:build (amd64 || arm64) && !appengine && gc && !purego && !noasm
// +build amd64 arm64
// +build !appengine
// +build gc
// +build !purego
// +build !noasm
package xxhash

View File

@ -1,5 +1,5 @@
//go:build !amd64 || appengine || !gc || purego
// +build !amd64 appengine !gc purego
//go:build (!amd64 && !arm64) || appengine || !gc || purego || noasm
// +build !amd64,!arm64 appengine !gc purego noasm
package xxhash

View File

@ -278,7 +278,7 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
mlState = mlTable[mlState.newState()&maxTableMask]
ofState = ofTable[ofState.newState()&maxTableMask]
} else {
bits := br.getBitsFast(nBits)
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((ofState.nbBits() + mlState.nbBits()) & 31))
llState = llTable[(llState.newState()+lowBits)&maxTableMask]
@ -326,7 +326,7 @@ func (s *sequenceDecs) updateAlt(br *bitReader) {
s.offsets.state.state = s.offsets.state.dt[c.newState()]
return
}
bits := br.getBitsFast(nBits)
bits := br.get32BitsFast(nBits)
lowBits := uint16(bits >> ((c.nbBits() + b.nbBits()) & 31))
s.litLengths.state.state = s.litLengths.state.dt[a.newState()+lowBits]