vendor: update buildkit to opentelemetry support

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
Tonis Tiigi
2021-06-15 21:02:39 -07:00
parent 6ba080d337
commit 334c93fbbe
829 changed files with 89541 additions and 24438 deletions

View File

@ -92,7 +92,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, tableLog uint8, first symbolTra
im := int32((nbBitsOut << 16) - first.deltaNbBits)
lu := (im >> nbBitsOut) + first.deltaFindState
c.state = c.stateTable[lu]
return
}
// encode the output symbol provided and write it to the bitstream.
@ -301,7 +300,7 @@ func (s *Scratch) writeCount() error {
out[outP+1] = byte(bitStream >> 8)
outP += (bitCount + 7) / 8
if uint16(charnum) > s.symbolLen {
if charnum > s.symbolLen {
return errors.New("internal error: charnum > s.symbolLen")
}
s.Out = out[:outP]
@ -331,7 +330,7 @@ type cTable struct {
func (s *Scratch) allocCtable() {
tableSize := 1 << s.actualTableLog
// get tableSymbol that is big enough.
if cap(s.ct.tableSymbol) < int(tableSize) {
if cap(s.ct.tableSymbol) < tableSize {
s.ct.tableSymbol = make([]byte, tableSize)
}
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
@ -565,8 +564,8 @@ func (s *Scratch) normalizeCount2() error {
distributed uint32
total = uint32(s.br.remain())
tableLog = s.actualTableLog
lowThreshold = uint32(total >> tableLog)
lowOne = uint32((total * 3) >> (tableLog + 1))
lowThreshold = total >> tableLog
lowOne = (total * 3) >> (tableLog + 1)
)
for i, cnt := range s.count[:s.symbolLen] {
if cnt == 0 {
@ -591,7 +590,7 @@ func (s *Scratch) normalizeCount2() error {
if (total / toDistribute) > lowOne {
// risk of rounding to zero
lowOne = uint32((total * 3) / (toDistribute * 2))
lowOne = (total * 3) / (toDistribute * 2)
for i, cnt := range s.count[:s.symbolLen] {
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
s.norm[i] = 1

View File

@ -172,7 +172,7 @@ type decSymbol struct {
// allocDtable will allocate decoding tables if they are not big enough.
func (s *Scratch) allocDtable() {
tableSize := 1 << s.actualTableLog
if cap(s.decTable) < int(tableSize) {
if cap(s.decTable) < tableSize {
s.decTable = make([]decSymbol, tableSize)
}
s.decTable = s.decTable[:tableSize]
@ -340,7 +340,7 @@ type decoder struct {
func (d *decoder) init(in *bitReader, dt []decSymbol, tableLog uint8) {
d.dt = dt
d.br = in
d.state = uint16(in.getBits(tableLog))
d.state = in.getBits(tableLog)
}
// next returns the next symbol and sets the next state.

View File

@ -14,7 +14,9 @@ but it can be used as a secondary step to compressors (like Snappy) that does no
## News
* Mar 2018: First implementation released. Consider this beta software for now.
This is used as part of the [zstandard](https://github.com/klauspost/compress/tree/master/zstd#zstd) compression and decompression package.
This ensures that most functionality is well tested.
# Usage

View File

@ -403,7 +403,7 @@ func (s *Scratch) buildCTable() error {
var startNode = int16(s.symbolLen)
nonNullRank := s.symbolLen - 1
nodeNb := int16(startNode)
nodeNb := startNode
huffNode := s.nodes[1 : huffNodesLen+1]
// This overlays the slice above, but allows "-1" index lookups.
@ -536,7 +536,6 @@ func (s *Scratch) huffSort() {
}
nodes[pos&huffNodesMask] = nodeElt{count: c, symbol: byte(n)}
}
return
}
func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
@ -580,7 +579,7 @@ func (s *Scratch) setMaxHeight(lastNonNull int) uint8 {
// Get pos of last (smallest) symbol per rank
{
currentNbBits := uint8(maxNbBits)
currentNbBits := maxNbBits
for pos := int(n); pos >= 0; pos-- {
if huffNode[pos].nbBits >= currentNbBits {
continue

View File

@ -1,16 +0,0 @@
cmd/snappytool/snappytool
testdata/bench
# These explicitly listed benchmark data files are for an obsolete version of
# snappy_test.go.
testdata/alice29.txt
testdata/asyoulik.txt
testdata/fireworks.jpeg
testdata/geo.protodata
testdata/html
testdata/html_x_4
testdata/kppkn.gtb
testdata/lcet10.txt
testdata/paper-100k.pdf
testdata/plrabn12.txt
testdata/urls.10K

View File

@ -1,15 +0,0 @@
# This is the official list of Snappy-Go authors for copyright purposes.
# This file is distinct from the CONTRIBUTORS files.
# See the latter for an explanation.
# Names should be added to this file as
# Name or Organization <email address>
# The email address is not required for organizations.
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Google Inc.
Jan Mercl <0xjnml@gmail.com>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Sebastien Binet <seb.binet@gmail.com>

View File

@ -1,37 +0,0 @@
# This is the official list of people who can contribute
# (and typically have contributed) code to the Snappy-Go repository.
# The AUTHORS file lists the copyright holders; this file
# lists people. For example, Google employees are listed here
# but not in AUTHORS, because Google holds the copyright.
#
# The submission process automatically checks to make sure
# that people submitting code are listed in this file (by email address).
#
# Names should be added to this file only after verifying that
# the individual or the individual's organization has agreed to
# the appropriate Contributor License Agreement, found here:
#
# http://code.google.com/legal/individual-cla-v1.0.html
# http://code.google.com/legal/corporate-cla-v1.0.html
#
# The agreement for individuals can be filled out on the web.
#
# When adding J Random Contributor's name to this file,
# either J's name or J's organization's name should be
# added to the AUTHORS file, depending on whether the
# individual or corporate CLA was used.
# Names should be added to this file like so:
# Name <email address>
# Please keep the list sorted.
Damian Gryski <dgryski@gmail.com>
Jan Mercl <0xjnml@gmail.com>
Kai Backman <kaib@golang.org>
Marc-Antoine Ruel <maruel@chromium.org>
Nigel Tao <nigeltao@golang.org>
Rob Pike <r@golang.org>
Rodolfo Carvalho <rhcarvalho@gmail.com>
Russ Cox <rsc@golang.org>
Sebastien Binet <seb.binet@gmail.com>

View File

@ -1,27 +0,0 @@
Copyright (c) 2011 The Snappy-Go Authors. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are
met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above
copyright notice, this list of conditions and the following disclaimer
in the documentation and/or other materials provided with the
distribution.
* Neither the name of Google Inc. nor the names of its
contributors may be used to endorse or promote products derived from
this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,107 +0,0 @@
The Snappy compression format in the Go programming language.
To download and install from source:
$ go get github.com/golang/snappy
Unless otherwise noted, the Snappy-Go source files are distributed
under the BSD-style license found in the LICENSE file.
Benchmarks.
The golang/snappy benchmarks include compressing (Z) and decompressing (U) ten
or so files, the same set used by the C++ Snappy code (github.com/google/snappy
and note the "google", not "golang"). On an "Intel(R) Core(TM) i7-3770 CPU @
3.40GHz", Go's GOARCH=amd64 numbers as of 2016-05-29:
"go test -test.bench=."
_UFlat0-8 2.19GB/s ± 0% html
_UFlat1-8 1.41GB/s ± 0% urls
_UFlat2-8 23.5GB/s ± 2% jpg
_UFlat3-8 1.91GB/s ± 0% jpg_200
_UFlat4-8 14.0GB/s ± 1% pdf
_UFlat5-8 1.97GB/s ± 0% html4
_UFlat6-8 814MB/s ± 0% txt1
_UFlat7-8 785MB/s ± 0% txt2
_UFlat8-8 857MB/s ± 0% txt3
_UFlat9-8 719MB/s ± 1% txt4
_UFlat10-8 2.84GB/s ± 0% pb
_UFlat11-8 1.05GB/s ± 0% gaviota
_ZFlat0-8 1.04GB/s ± 0% html
_ZFlat1-8 534MB/s ± 0% urls
_ZFlat2-8 15.7GB/s ± 1% jpg
_ZFlat3-8 740MB/s ± 3% jpg_200
_ZFlat4-8 9.20GB/s ± 1% pdf
_ZFlat5-8 991MB/s ± 0% html4
_ZFlat6-8 379MB/s ± 0% txt1
_ZFlat7-8 352MB/s ± 0% txt2
_ZFlat8-8 396MB/s ± 1% txt3
_ZFlat9-8 327MB/s ± 1% txt4
_ZFlat10-8 1.33GB/s ± 1% pb
_ZFlat11-8 605MB/s ± 1% gaviota
"go test -test.bench=. -tags=noasm"
_UFlat0-8 621MB/s ± 2% html
_UFlat1-8 494MB/s ± 1% urls
_UFlat2-8 23.2GB/s ± 1% jpg
_UFlat3-8 1.12GB/s ± 1% jpg_200
_UFlat4-8 4.35GB/s ± 1% pdf
_UFlat5-8 609MB/s ± 0% html4
_UFlat6-8 296MB/s ± 0% txt1
_UFlat7-8 288MB/s ± 0% txt2
_UFlat8-8 309MB/s ± 1% txt3
_UFlat9-8 280MB/s ± 1% txt4
_UFlat10-8 753MB/s ± 0% pb
_UFlat11-8 400MB/s ± 0% gaviota
_ZFlat0-8 409MB/s ± 1% html
_ZFlat1-8 250MB/s ± 1% urls
_ZFlat2-8 12.3GB/s ± 1% jpg
_ZFlat3-8 132MB/s ± 0% jpg_200
_ZFlat4-8 2.92GB/s ± 0% pdf
_ZFlat5-8 405MB/s ± 1% html4
_ZFlat6-8 179MB/s ± 1% txt1
_ZFlat7-8 170MB/s ± 1% txt2
_ZFlat8-8 189MB/s ± 1% txt3
_ZFlat9-8 164MB/s ± 1% txt4
_ZFlat10-8 479MB/s ± 1% pb
_ZFlat11-8 270MB/s ± 1% gaviota
For comparison (Go's encoded output is byte-for-byte identical to C++'s), here
are the numbers from C++ Snappy's
make CXXFLAGS="-O2 -DNDEBUG -g" clean snappy_unittest.log && cat snappy_unittest.log
BM_UFlat/0 2.4GB/s html
BM_UFlat/1 1.4GB/s urls
BM_UFlat/2 21.8GB/s jpg
BM_UFlat/3 1.5GB/s jpg_200
BM_UFlat/4 13.3GB/s pdf
BM_UFlat/5 2.1GB/s html4
BM_UFlat/6 1.0GB/s txt1
BM_UFlat/7 959.4MB/s txt2
BM_UFlat/8 1.0GB/s txt3
BM_UFlat/9 864.5MB/s txt4
BM_UFlat/10 2.9GB/s pb
BM_UFlat/11 1.2GB/s gaviota
BM_ZFlat/0 944.3MB/s html (22.31 %)
BM_ZFlat/1 501.6MB/s urls (47.78 %)
BM_ZFlat/2 14.3GB/s jpg (99.95 %)
BM_ZFlat/3 538.3MB/s jpg_200 (73.00 %)
BM_ZFlat/4 8.3GB/s pdf (83.30 %)
BM_ZFlat/5 903.5MB/s html4 (22.52 %)
BM_ZFlat/6 336.0MB/s txt1 (57.88 %)
BM_ZFlat/7 312.3MB/s txt2 (61.91 %)
BM_ZFlat/8 353.1MB/s txt3 (54.99 %)
BM_ZFlat/9 289.9MB/s txt4 (66.26 %)
BM_ZFlat/10 1.2GB/s pb (19.68 %)
BM_ZFlat/11 527.4MB/s gaviota (37.72 %)

View File

@ -1,237 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
"io"
)
var (
// ErrCorrupt reports that the input is invalid.
ErrCorrupt = errors.New("snappy: corrupt input")
// ErrTooLarge reports that the uncompressed length is too large.
ErrTooLarge = errors.New("snappy: decoded block is too large")
// ErrUnsupported reports that the input isn't supported.
ErrUnsupported = errors.New("snappy: unsupported input")
errUnsupportedLiteralLength = errors.New("snappy: unsupported literal length")
)
// DecodedLen returns the length of the decoded block.
func DecodedLen(src []byte) (int, error) {
v, _, err := decodedLen(src)
return v, err
}
// decodedLen returns the length of the decoded block and the number of bytes
// that the length header occupied.
func decodedLen(src []byte) (blockLen, headerLen int, err error) {
v, n := binary.Uvarint(src)
if n <= 0 || v > 0xffffffff {
return 0, 0, ErrCorrupt
}
const wordSize = 32 << (^uint(0) >> 32 & 1)
if wordSize == 32 && v > 0x7fffffff {
return 0, 0, ErrTooLarge
}
return int(v), n, nil
}
const (
decodeErrCodeCorrupt = 1
decodeErrCodeUnsupportedLiteralLength = 2
)
// Decode returns the decoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire decoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
func Decode(dst, src []byte) ([]byte, error) {
dLen, s, err := decodedLen(src)
if err != nil {
return nil, err
}
if dLen <= len(dst) {
dst = dst[:dLen]
} else {
dst = make([]byte, dLen)
}
switch decode(dst, src[s:]) {
case 0:
return dst, nil
case decodeErrCodeUnsupportedLiteralLength:
return nil, errUnsupportedLiteralLength
}
return nil, ErrCorrupt
}
// NewReader returns a new Reader that decompresses from r, using the framing
// format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
func NewReader(r io.Reader) *Reader {
return &Reader{
r: r,
decoded: make([]byte, maxBlockSize),
buf: make([]byte, maxEncodedLenOfMaxBlockSize+checksumSize),
}
}
// Reader is an io.Reader that can read Snappy-compressed bytes.
type Reader struct {
r io.Reader
err error
decoded []byte
buf []byte
// decoded[i:j] contains decoded bytes that have not yet been passed on.
i, j int
readHeader bool
}
// Reset discards any buffered data, resets all state, and switches the Snappy
// reader to read from r. This permits reusing a Reader rather than allocating
// a new one.
func (r *Reader) Reset(reader io.Reader) {
r.r = reader
r.err = nil
r.i = 0
r.j = 0
r.readHeader = false
}
func (r *Reader) readFull(p []byte, allowEOF bool) (ok bool) {
if _, r.err = io.ReadFull(r.r, p); r.err != nil {
if r.err == io.ErrUnexpectedEOF || (r.err == io.EOF && !allowEOF) {
r.err = ErrCorrupt
}
return false
}
return true
}
// Read satisfies the io.Reader interface.
func (r *Reader) Read(p []byte) (int, error) {
if r.err != nil {
return 0, r.err
}
for {
if r.i < r.j {
n := copy(p, r.decoded[r.i:r.j])
r.i += n
return n, nil
}
if !r.readFull(r.buf[:4], true) {
return 0, r.err
}
chunkType := r.buf[0]
if !r.readHeader {
if chunkType != chunkTypeStreamIdentifier {
r.err = ErrCorrupt
return 0, r.err
}
r.readHeader = true
}
chunkLen := int(r.buf[1]) | int(r.buf[2])<<8 | int(r.buf[3])<<16
if chunkLen > len(r.buf) {
r.err = ErrUnsupported
return 0, r.err
}
// The chunk types are specified at
// https://github.com/google/snappy/blob/master/framing_format.txt
switch chunkType {
case chunkTypeCompressedData:
// Section 4.2. Compressed data (chunk type 0x00).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
buf := r.buf[:chunkLen]
if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
buf = buf[checksumSize:]
n, err := DecodedLen(buf)
if err != nil {
r.err = err
return 0, r.err
}
if n > len(r.decoded) {
r.err = ErrCorrupt
return 0, r.err
}
if _, err := Decode(r.decoded, buf); err != nil {
r.err = err
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
}
r.i, r.j = 0, n
continue
case chunkTypeUncompressedData:
// Section 4.3. Uncompressed data (chunk type 0x01).
if chunkLen < checksumSize {
r.err = ErrCorrupt
return 0, r.err
}
buf := r.buf[:checksumSize]
if !r.readFull(buf, false) {
return 0, r.err
}
checksum := uint32(buf[0]) | uint32(buf[1])<<8 | uint32(buf[2])<<16 | uint32(buf[3])<<24
// Read directly into r.decoded instead of via r.buf.
n := chunkLen - checksumSize
if n > len(r.decoded) {
r.err = ErrCorrupt
return 0, r.err
}
if !r.readFull(r.decoded[:n], false) {
return 0, r.err
}
if crc(r.decoded[:n]) != checksum {
r.err = ErrCorrupt
return 0, r.err
}
r.i, r.j = 0, n
continue
case chunkTypeStreamIdentifier:
// Section 4.1. Stream identifier (chunk type 0xff).
if chunkLen != len(magicBody) {
r.err = ErrCorrupt
return 0, r.err
}
if !r.readFull(r.buf[:len(magicBody)], false) {
return 0, r.err
}
for i := 0; i < len(magicBody); i++ {
if r.buf[i] != magicBody[i] {
r.err = ErrCorrupt
return 0, r.err
}
}
continue
}
if chunkType <= 0x7f {
// Section 4.5. Reserved unskippable chunks (chunk types 0x02-0x7f).
r.err = ErrUnsupported
return 0, r.err
}
// Section 4.4 Padding (chunk type 0xfe).
// Section 4.6. Reserved skippable chunks (chunk types 0x80-0xfd).
if !r.readFull(r.buf[:chunkLen], false) {
return 0, r.err
}
}
}

View File

@ -1,14 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
package snappy
// decode has the same semantics as in decode_other.go.
//
//go:noescape
func decode(dst, src []byte) int

View File

@ -1,482 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The asm code generally follows the pure Go code in decode_other.go, except
// where marked with a "!!!".
// func decode(dst, src []byte) int
//
// All local variables fit into registers. The non-zero stack size is only to
// spill registers and push args when issuing a CALL. The register allocation:
// - AX scratch
// - BX scratch
// - CX length or x
// - DX offset
// - SI &src[s]
// - DI &dst[d]
// + R8 dst_base
// + R9 dst_len
// + R10 dst_base + dst_len
// + R11 src_base
// + R12 src_len
// + R13 src_base + src_len
// - R14 used by doCopy
// - R15 used by doCopy
//
// The registers R8-R13 (marked with a "+") are set at the start of the
// function, and after a CALL returns, and are not otherwise modified.
//
// The d variable is implicitly DI - R8, and len(dst)-d is R10 - DI.
// The s variable is implicitly SI - R11, and len(src)-s is R13 - SI.
TEXT ·decode(SB), NOSPLIT, $48-56
// Initialize SI, DI and R8-R13.
MOVQ dst_base+0(FP), R8
MOVQ dst_len+8(FP), R9
MOVQ R8, DI
MOVQ R8, R10
ADDQ R9, R10
MOVQ src_base+24(FP), R11
MOVQ src_len+32(FP), R12
MOVQ R11, SI
MOVQ R11, R13
ADDQ R12, R13
loop:
// for s < len(src)
CMPQ SI, R13
JEQ end
// CX = uint32(src[s])
//
// switch src[s] & 0x03
MOVBLZX (SI), CX
MOVL CX, BX
ANDL $3, BX
CMPL BX, $1
JAE tagCopy
// ----------------------------------------
// The code below handles literal tags.
// case tagLiteral:
// x := uint32(src[s] >> 2)
// switch
SHRL $2, CX
CMPL CX, $60
JAE tagLit60Plus
// case x < 60:
// s++
INCQ SI
doLit:
// This is the end of the inner "switch", when we have a literal tag.
//
// We assume that CX == x and x fits in a uint32, where x is the variable
// used in the pure Go decode_other.go code.
// length = int(x) + 1
//
// Unlike the pure Go code, we don't need to check if length <= 0 because
// CX can hold 64 bits, so the increment cannot overflow.
INCQ CX
// Prepare to check if copying length bytes will run past the end of dst or
// src.
//
// AX = len(dst) - d
// BX = len(src) - s
MOVQ R10, AX
SUBQ DI, AX
MOVQ R13, BX
SUBQ SI, BX
// !!! Try a faster technique for short (16 or fewer bytes) copies.
//
// if length > 16 || len(dst)-d < 16 || len(src)-s < 16 {
// goto callMemmove // Fall back on calling runtime·memmove.
// }
//
// The C++ snappy code calls this TryFastAppend. It also checks len(src)-s
// against 21 instead of 16, because it cannot assume that all of its input
// is contiguous in memory and so it needs to leave enough source bytes to
// read the next tag without refilling buffers, but Go's Decode assumes
// contiguousness (the src argument is a []byte).
CMPQ CX, $16
JGT callMemmove
CMPQ AX, $16
JLT callMemmove
CMPQ BX, $16
JLT callMemmove
// !!! Implement the copy from src to dst as a 16-byte load and store.
// (Decode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only length bytes, but that's
// OK. If the input is a valid Snappy encoding then subsequent iterations
// will fix up the overrun. Otherwise, Decode returns a nil []byte (and a
// non-nil error), so the overrun will be ignored.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(SI), X0
MOVOU X0, 0(DI)
// d += length
// s += length
ADDQ CX, DI
ADDQ CX, SI
JMP loop
callMemmove:
// if length > len(dst)-d || length > len(src)-s { etc }
CMPQ CX, AX
JGT errCorrupt
CMPQ CX, BX
JGT errCorrupt
// copy(dst[d:], src[s:s+length])
//
// This means calling runtime·memmove(&dst[d], &src[s], length), so we push
// DI, SI and CX as arguments. Coincidentally, we also need to spill those
// three registers to the stack, to save local variables across the CALL.
MOVQ DI, 0(SP)
MOVQ SI, 8(SP)
MOVQ CX, 16(SP)
MOVQ DI, 24(SP)
MOVQ SI, 32(SP)
MOVQ CX, 40(SP)
CALL runtime·memmove(SB)
// Restore local variables: unspill registers from the stack and
// re-calculate R8-R13.
MOVQ 24(SP), DI
MOVQ 32(SP), SI
MOVQ 40(SP), CX
MOVQ dst_base+0(FP), R8
MOVQ dst_len+8(FP), R9
MOVQ R8, R10
ADDQ R9, R10
MOVQ src_base+24(FP), R11
MOVQ src_len+32(FP), R12
MOVQ R11, R13
ADDQ R12, R13
// d += length
// s += length
ADDQ CX, DI
ADDQ CX, SI
JMP loop
tagLit60Plus:
// !!! This fragment does the
//
// s += x - 58; if uint(s) > uint(len(src)) { etc }
//
// checks. In the asm version, we code it once instead of once per switch case.
ADDQ CX, SI
SUBQ $58, SI
CMPQ SI, R13
JA errCorrupt
// case x == 60:
CMPL CX, $61
JEQ tagLit61
JA tagLit62Plus
// x = uint32(src[s-1])
MOVBLZX -1(SI), CX
JMP doLit
tagLit61:
// case x == 61:
// x = uint32(src[s-2]) | uint32(src[s-1])<<8
MOVWLZX -2(SI), CX
JMP doLit
tagLit62Plus:
CMPL CX, $62
JA tagLit63
// case x == 62:
// x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
MOVWLZX -3(SI), CX
MOVBLZX -1(SI), BX
SHLL $16, BX
ORL BX, CX
JMP doLit
tagLit63:
// case x == 63:
// x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
MOVL -4(SI), CX
JMP doLit
// The code above handles literal tags.
// ----------------------------------------
// The code below handles copy tags.
tagCopy4:
// case tagCopy4:
// s += 5
ADDQ $5, SI
// if uint(s) > uint(len(src)) { etc }
CMPQ SI, R13
JA errCorrupt
// length = 1 + int(src[s-5])>>2
SHRQ $2, CX
INCQ CX
// offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
MOVLQZX -4(SI), DX
JMP doCopy
tagCopy2:
// case tagCopy2:
// s += 3
ADDQ $3, SI
// if uint(s) > uint(len(src)) { etc }
CMPQ SI, R13
JA errCorrupt
// length = 1 + int(src[s-3])>>2
SHRQ $2, CX
INCQ CX
// offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
MOVWQZX -2(SI), DX
JMP doCopy
tagCopy:
// We have a copy tag. We assume that:
// - BX == src[s] & 0x03
// - CX == src[s]
CMPQ BX, $2
JEQ tagCopy2
JA tagCopy4
// case tagCopy1:
// s += 2
ADDQ $2, SI
// if uint(s) > uint(len(src)) { etc }
CMPQ SI, R13
JA errCorrupt
// offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
MOVQ CX, DX
ANDQ $0xe0, DX
SHLQ $3, DX
MOVBQZX -1(SI), BX
ORQ BX, DX
// length = 4 + int(src[s-2])>>2&0x7
SHRQ $2, CX
ANDQ $7, CX
ADDQ $4, CX
doCopy:
// This is the end of the outer "switch", when we have a copy tag.
//
// We assume that:
// - CX == length && CX > 0
// - DX == offset
// if offset <= 0 { etc }
CMPQ DX, $0
JLE errCorrupt
// if d < offset { etc }
MOVQ DI, BX
SUBQ R8, BX
CMPQ BX, DX
JLT errCorrupt
// if length > len(dst)-d { etc }
MOVQ R10, BX
SUBQ DI, BX
CMPQ CX, BX
JGT errCorrupt
// forwardCopy(dst[d:d+length], dst[d-offset:]); d += length
//
// Set:
// - R14 = len(dst)-d
// - R15 = &dst[d-offset]
MOVQ R10, R14
SUBQ DI, R14
MOVQ DI, R15
SUBQ DX, R15
// !!! Try a faster technique for short (16 or fewer bytes) forward copies.
//
// First, try using two 8-byte load/stores, similar to the doLit technique
// above. Even if dst[d:d+length] and dst[d-offset:] can overlap, this is
// still OK if offset >= 8. Note that this has to be two 8-byte load/stores
// and not one 16-byte load/store, and the first store has to be before the
// second load, due to the overlap if offset is in the range [8, 16).
//
// if length > 16 || offset < 8 || len(dst)-d < 16 {
// goto slowForwardCopy
// }
// copy 16 bytes
// d += length
CMPQ CX, $16
JGT slowForwardCopy
CMPQ DX, $8
JLT slowForwardCopy
CMPQ R14, $16
JLT slowForwardCopy
MOVQ 0(R15), AX
MOVQ AX, 0(DI)
MOVQ 8(R15), BX
MOVQ BX, 8(DI)
ADDQ CX, DI
JMP loop
slowForwardCopy:
// !!! If the forward copy is longer than 16 bytes, or if offset < 8, we
// can still try 8-byte load stores, provided we can overrun up to 10 extra
// bytes. As above, the overrun will be fixed up by subsequent iterations
// of the outermost loop.
//
// The C++ snappy code calls this technique IncrementalCopyFastPath. Its
// commentary says:
//
// ----
//
// The main part of this loop is a simple copy of eight bytes at a time
// until we've copied (at least) the requested amount of bytes. However,
// if d and d-offset are less than eight bytes apart (indicating a
// repeating pattern of length < 8), we first need to expand the pattern in
// order to get the correct results. For instance, if the buffer looks like
// this, with the eight-byte <d-offset> and <d> patterns marked as
// intervals:
//
// abxxxxxxxxxxxx
// [------] d-offset
// [------] d
//
// a single eight-byte copy from <d-offset> to <d> will repeat the pattern
// once, after which we can move <d> two bytes without moving <d-offset>:
//
// ababxxxxxxxxxx
// [------] d-offset
// [------] d
//
// and repeat the exercise until the two no longer overlap.
//
// This allows us to do very well in the special case of one single byte
// repeated many times, without taking a big hit for more general cases.
//
// The worst case of extra writing past the end of the match occurs when
// offset == 1 and length == 1; the last copy will read from byte positions
// [0..7] and write to [4..11], whereas it was only supposed to write to
// position 1. Thus, ten excess bytes.
//
// ----
//
// That "10 byte overrun" worst case is confirmed by Go's
// TestSlowForwardCopyOverrun, which also tests the fixUpSlowForwardCopy
// and finishSlowForwardCopy algorithm.
//
// if length > len(dst)-d-10 {
// goto verySlowForwardCopy
// }
SUBQ $10, R14
CMPQ CX, R14
JGT verySlowForwardCopy
makeOffsetAtLeast8:
// !!! As above, expand the pattern so that offset >= 8 and we can use
// 8-byte load/stores.
//
// for offset < 8 {
// copy 8 bytes from dst[d-offset:] to dst[d:]
// length -= offset
// d += offset
// offset += offset
// // The two previous lines together means that d-offset, and therefore
// // R15, is unchanged.
// }
CMPQ DX, $8
JGE fixUpSlowForwardCopy
MOVQ (R15), BX
MOVQ BX, (DI)
SUBQ DX, CX
ADDQ DX, DI
ADDQ DX, DX
JMP makeOffsetAtLeast8
fixUpSlowForwardCopy:
// !!! Add length (which might be negative now) to d (implied by DI being
// &dst[d]) so that d ends up at the right place when we jump back to the
// top of the loop. Before we do that, though, we save DI to AX so that, if
// length is positive, copying the remaining length bytes will write to the
// right place.
MOVQ DI, AX
ADDQ CX, DI
finishSlowForwardCopy:
// !!! Repeat 8-byte load/stores until length <= 0. Ending with a negative
// length means that we overrun, but as above, that will be fixed up by
// subsequent iterations of the outermost loop.
CMPQ CX, $0
JLE loop
MOVQ (R15), BX
MOVQ BX, (AX)
ADDQ $8, R15
ADDQ $8, AX
SUBQ $8, CX
JMP finishSlowForwardCopy
verySlowForwardCopy:
// verySlowForwardCopy is a simple implementation of forward copy. In C
// parlance, this is a do/while loop instead of a while loop, since we know
// that length > 0. In Go syntax:
//
// for {
// dst[d] = dst[d - offset]
// d++
// length--
// if length == 0 {
// break
// }
// }
MOVB (R15), BX
MOVB BX, (DI)
INCQ R15
INCQ DI
DECQ CX
JNZ verySlowForwardCopy
JMP loop
// The code above handles copy tags.
// ----------------------------------------
end:
// This is the end of the "for s < len(src)".
//
// if d != len(dst) { etc }
CMPQ DI, R10
JNE errCorrupt
// return 0
MOVQ $0, ret+48(FP)
RET
errCorrupt:
// return decodeErrCodeCorrupt
MOVQ $1, ret+48(FP)
RET

View File

@ -1,115 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 appengine !gc noasm
package snappy
// decode writes the decoding of src to dst. It assumes that the varint-encoded
// length of the decompressed bytes has already been read, and that len(dst)
// equals that length.
//
// It returns 0 on success or a decodeErrCodeXxx error code on failure.
func decode(dst, src []byte) int {
var d, s, offset, length int
for s < len(src) {
switch src[s] & 0x03 {
case tagLiteral:
x := uint32(src[s] >> 2)
switch {
case x < 60:
s++
case x == 60:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-1])
case x == 61:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-2]) | uint32(src[s-1])<<8
case x == 62:
s += 4
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-3]) | uint32(src[s-2])<<8 | uint32(src[s-1])<<16
case x == 63:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
x = uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24
}
length = int(x) + 1
if length <= 0 {
return decodeErrCodeUnsupportedLiteralLength
}
if length > len(dst)-d || length > len(src)-s {
return decodeErrCodeCorrupt
}
copy(dst[d:], src[s:s+length])
d += length
s += length
continue
case tagCopy1:
s += 2
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 4 + int(src[s-2])>>2&0x7
offset = int(uint32(src[s-2])&0xe0<<3 | uint32(src[s-1]))
case tagCopy2:
s += 3
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-3])>>2
offset = int(uint32(src[s-2]) | uint32(src[s-1])<<8)
case tagCopy4:
s += 5
if uint(s) > uint(len(src)) { // The uint conversions catch overflow from the previous line.
return decodeErrCodeCorrupt
}
length = 1 + int(src[s-5])>>2
offset = int(uint32(src[s-4]) | uint32(src[s-3])<<8 | uint32(src[s-2])<<16 | uint32(src[s-1])<<24)
}
if offset <= 0 || d < offset || length > len(dst)-d {
return decodeErrCodeCorrupt
}
// Copy from an earlier sub-slice of dst to a later sub-slice.
// If no overlap, use the built-in copy:
if offset > length {
copy(dst[d:d+length], dst[d-offset:])
d += length
continue
}
// Unlike the built-in copy function, this byte-by-byte copy always runs
// forwards, even if the slices overlap. Conceptually, this is:
//
// d += forwardCopy(dst[d:d+length], dst[d-offset:])
//
// We align the slices into a and b and show the compiler they are the same size.
// This allows the loop to run without bounds checks.
a := dst[d : d+length]
b := dst[d-offset:]
b = b[:len(a)]
for i := range a {
a[i] = b[i]
}
d += length
}
if d != len(dst) {
return decodeErrCodeCorrupt
}
return 0
}

View File

@ -1,285 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package snappy
import (
"encoding/binary"
"errors"
"io"
)
// Encode returns the encoded form of src. The returned slice may be a sub-
// slice of dst if dst was large enough to hold the entire encoded block.
// Otherwise, a newly allocated slice will be returned.
//
// The dst and src must not overlap. It is valid to pass a nil dst.
func Encode(dst, src []byte) []byte {
if n := MaxEncodedLen(len(src)); n < 0 {
panic(ErrTooLarge)
} else if len(dst) < n {
dst = make([]byte, n)
}
// The block starts with the varint-encoded length of the decompressed bytes.
d := binary.PutUvarint(dst, uint64(len(src)))
for len(src) > 0 {
p := src
src = nil
if len(p) > maxBlockSize {
p, src = p[:maxBlockSize], p[maxBlockSize:]
}
if len(p) < minNonLiteralBlockSize {
d += emitLiteral(dst[d:], p)
} else {
d += encodeBlock(dst[d:], p)
}
}
return dst[:d]
}
// inputMargin is the minimum number of extra input bytes to keep, inside
// encodeBlock's inner loop. On some architectures, this margin lets us
// implement a fast path for emitLiteral, where the copy of short (<= 16 byte)
// literals can be implemented as a single load to and store from a 16-byte
// register. That literal's actual length can be as short as 1 byte, so this
// can copy up to 15 bytes too much, but that's OK as subsequent iterations of
// the encoding loop will fix up the copy overrun, and this inputMargin ensures
// that we don't overrun the dst and src buffers.
const inputMargin = 16 - 1
// minNonLiteralBlockSize is the minimum size of the input to encodeBlock that
// could be encoded with a copy tag. This is the minimum with respect to the
// algorithm used by encodeBlock, not a minimum enforced by the file format.
//
// The encoded output must start with at least a 1 byte literal, as there are
// no previous bytes to copy. A minimal (1 byte) copy after that, generated
// from an emitCopy call in encodeBlock's main loop, would require at least
// another inputMargin bytes, for the reason above: we want any emitLiteral
// calls inside encodeBlock's main loop to use the fast path if possible, which
// requires being able to overrun by inputMargin bytes. Thus,
// minNonLiteralBlockSize equals 1 + 1 + inputMargin.
//
// The C++ code doesn't use this exact threshold, but it could, as discussed at
// https://groups.google.com/d/topic/snappy-compression/oGbhsdIJSJ8/discussion
// The difference between Go (2+inputMargin) and C++ (inputMargin) is purely an
// optimization. It should not affect the encoded form. This is tested by
// TestSameEncodingAsCppShortCopies.
const minNonLiteralBlockSize = 1 + 1 + inputMargin
// MaxEncodedLen returns the maximum length of a snappy block, given its
// uncompressed length.
//
// It will return a negative value if srcLen is too large to encode.
func MaxEncodedLen(srcLen int) int {
n := uint64(srcLen)
if n > 0xffffffff {
return -1
}
// Compressed data can be defined as:
// compressed := item* literal*
// item := literal* copy
//
// The trailing literal sequence has a space blowup of at most 62/60
// since a literal of length 60 needs one tag byte + one extra byte
// for length information.
//
// Item blowup is trickier to measure. Suppose the "copy" op copies
// 4 bytes of data. Because of a special check in the encoding code,
// we produce a 4-byte copy only if the offset is < 65536. Therefore
// the copy op takes 3 bytes to encode, and this type of item leads
// to at most the 62/60 blowup for representing literals.
//
// Suppose the "copy" op copies 5 bytes of data. If the offset is big
// enough, it will take 5 bytes to encode the copy op. Therefore the
// worst case here is a one-byte literal followed by a five-byte copy.
// That is, 6 bytes of input turn into 7 bytes of "compressed" data.
//
// This last factor dominates the blowup, so the final estimate is:
n = 32 + n + n/6
if n > 0xffffffff {
return -1
}
return int(n)
}
var errClosed = errors.New("snappy: Writer is closed")
// NewWriter returns a new Writer that compresses to w.
//
// The Writer returned does not buffer writes. There is no need to Flush or
// Close such a Writer.
//
// Deprecated: the Writer returned is not suitable for many small writes, only
// for few large writes. Use NewBufferedWriter instead, which is efficient
// regardless of the frequency and shape of the writes, and remember to Close
// that Writer when done.
func NewWriter(w io.Writer) *Writer {
return &Writer{
w: w,
obuf: make([]byte, obufLen),
}
}
// NewBufferedWriter returns a new Writer that compresses to w, using the
// framing format described at
// https://github.com/google/snappy/blob/master/framing_format.txt
//
// The Writer returned buffers writes. Users must call Close to guarantee all
// data has been forwarded to the underlying io.Writer. They may also call
// Flush zero or more times before calling Close.
func NewBufferedWriter(w io.Writer) *Writer {
return &Writer{
w: w,
ibuf: make([]byte, 0, maxBlockSize),
obuf: make([]byte, obufLen),
}
}
// Writer is an io.Writer that can write Snappy-compressed bytes.
type Writer struct {
w io.Writer
err error
// ibuf is a buffer for the incoming (uncompressed) bytes.
//
// Its use is optional. For backwards compatibility, Writers created by the
// NewWriter function have ibuf == nil, do not buffer incoming bytes, and
// therefore do not need to be Flush'ed or Close'd.
ibuf []byte
// obuf is a buffer for the outgoing (compressed) bytes.
obuf []byte
// wroteStreamHeader is whether we have written the stream header.
wroteStreamHeader bool
}
// Reset discards the writer's state and switches the Snappy writer to write to
// w. This permits reusing a Writer rather than allocating a new one.
func (w *Writer) Reset(writer io.Writer) {
w.w = writer
w.err = nil
if w.ibuf != nil {
w.ibuf = w.ibuf[:0]
}
w.wroteStreamHeader = false
}
// Write satisfies the io.Writer interface.
func (w *Writer) Write(p []byte) (nRet int, errRet error) {
if w.ibuf == nil {
// Do not buffer incoming bytes. This does not perform or compress well
// if the caller of Writer.Write writes many small slices. This
// behavior is therefore deprecated, but still supported for backwards
// compatibility with code that doesn't explicitly Flush or Close.
return w.write(p)
}
// The remainder of this method is based on bufio.Writer.Write from the
// standard library.
for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil {
var n int
if len(w.ibuf) == 0 {
// Large write, empty buffer.
// Write directly from p to avoid copy.
n, _ = w.write(p)
} else {
n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
w.ibuf = w.ibuf[:len(w.ibuf)+n]
w.Flush()
}
nRet += n
p = p[n:]
}
if w.err != nil {
return nRet, w.err
}
n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p)
w.ibuf = w.ibuf[:len(w.ibuf)+n]
nRet += n
return nRet, nil
}
func (w *Writer) write(p []byte) (nRet int, errRet error) {
if w.err != nil {
return 0, w.err
}
for len(p) > 0 {
obufStart := len(magicChunk)
if !w.wroteStreamHeader {
w.wroteStreamHeader = true
copy(w.obuf, magicChunk)
obufStart = 0
}
var uncompressed []byte
if len(p) > maxBlockSize {
uncompressed, p = p[:maxBlockSize], p[maxBlockSize:]
} else {
uncompressed, p = p, nil
}
checksum := crc(uncompressed)
// Compress the buffer, discarding the result if the improvement
// isn't at least 12.5%.
compressed := Encode(w.obuf[obufHeaderLen:], uncompressed)
chunkType := uint8(chunkTypeCompressedData)
chunkLen := 4 + len(compressed)
obufEnd := obufHeaderLen + len(compressed)
if len(compressed) >= len(uncompressed)-len(uncompressed)/8 {
chunkType = chunkTypeUncompressedData
chunkLen = 4 + len(uncompressed)
obufEnd = obufHeaderLen
}
// Fill in the per-chunk header that comes before the body.
w.obuf[len(magicChunk)+0] = chunkType
w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0)
w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8)
w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16)
w.obuf[len(magicChunk)+4] = uint8(checksum >> 0)
w.obuf[len(magicChunk)+5] = uint8(checksum >> 8)
w.obuf[len(magicChunk)+6] = uint8(checksum >> 16)
w.obuf[len(magicChunk)+7] = uint8(checksum >> 24)
if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil {
w.err = err
return nRet, err
}
if chunkType == chunkTypeUncompressedData {
if _, err := w.w.Write(uncompressed); err != nil {
w.err = err
return nRet, err
}
}
nRet += len(uncompressed)
}
return nRet, nil
}
// Flush flushes the Writer to its underlying io.Writer.
func (w *Writer) Flush() error {
if w.err != nil {
return w.err
}
if len(w.ibuf) == 0 {
return nil
}
w.write(w.ibuf)
w.ibuf = w.ibuf[:0]
return w.err
}
// Close calls Flush and then closes the Writer.
func (w *Writer) Close() error {
w.Flush()
ret := w.err
if w.err == nil {
w.err = errClosed
}
return ret
}

View File

@ -1,29 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
package snappy
// emitLiteral has the same semantics as in encode_other.go.
//
//go:noescape
func emitLiteral(dst, lit []byte) int
// emitCopy has the same semantics as in encode_other.go.
//
//go:noescape
func emitCopy(dst []byte, offset, length int) int
// extendMatch has the same semantics as in encode_other.go.
//
//go:noescape
func extendMatch(src []byte, i, j int) int
// encodeBlock has the same semantics as in encode_other.go.
//
//go:noescape
func encodeBlock(dst, src []byte) (d int)

View File

@ -1,730 +0,0 @@
// Copyright 2016 The Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !appengine
// +build gc
// +build !noasm
#include "textflag.h"
// The XXX lines assemble on Go 1.4, 1.5 and 1.7, but not 1.6, due to a
// Go toolchain regression. See https://github.com/golang/go/issues/15426 and
// https://github.com/golang/snappy/issues/29
//
// As a workaround, the package was built with a known good assembler, and
// those instructions were disassembled by "objdump -d" to yield the
// 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
// style comments, in AT&T asm syntax. Note that rsp here is a physical
// register, not Go/asm's SP pseudo-register (see https://golang.org/doc/asm).
// The instructions were then encoded as "BYTE $0x.." sequences, which assemble
// fine on Go 1.6.
// The asm code generally follows the pure Go code in encode_other.go, except
// where marked with a "!!!".
// ----------------------------------------------------------------------------
// func emitLiteral(dst, lit []byte) int
//
// All local variables fit into registers. The register allocation:
// - AX len(lit)
// - BX n
// - DX return value
// - DI &dst[i]
// - R10 &lit[0]
//
// The 24 bytes of stack space is to call runtime·memmove.
//
// The unusual register allocation of local variables, such as R10 for the
// source pointer, matches the allocation used at the call site in encodeBlock,
// which makes it easier to manually inline this function.
TEXT ·emitLiteral(SB), NOSPLIT, $24-56
MOVQ dst_base+0(FP), DI
MOVQ lit_base+24(FP), R10
MOVQ lit_len+32(FP), AX
MOVQ AX, DX
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT oneByte
CMPL BX, $256
JLT twoBytes
threeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
ADDQ $3, DX
JMP memmove
twoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
ADDQ $2, DX
JMP memmove
oneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
ADDQ $1, DX
memmove:
MOVQ DX, ret+48(FP)
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
CALL runtime·memmove(SB)
RET
// ----------------------------------------------------------------------------
// func emitCopy(dst []byte, offset, length int) int
//
// All local variables fit into registers. The register allocation:
// - AX length
// - SI &dst[0]
// - DI &dst[i]
// - R11 offset
//
// The unusual register allocation of local variables, such as R11 for the
// offset, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·emitCopy(SB), NOSPLIT, $0-48
MOVQ dst_base+0(FP), DI
MOVQ DI, SI
MOVQ offset+24(FP), R11
MOVQ length+32(FP), AX
loop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT step1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP loop0
step1:
// if length > 64 { etc }
CMPL AX, $64
JLE step2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
step2:
// if length >= 12 || offset >= 2048 { goto step3 }
CMPL AX, $12
JGE step3
CMPL R11, $2048
JGE step3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
step3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
// Return the number of bytes written.
SUBQ SI, DI
MOVQ DI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func extendMatch(src []byte, i, j int) int
//
// All local variables fit into registers. The register allocation:
// - DX &src[0]
// - SI &src[j]
// - R13 &src[len(src) - 8]
// - R14 &src[len(src)]
// - R15 &src[i]
//
// The unusual register allocation of local variables, such as R15 for a source
// pointer, matches the allocation used at the call site in encodeBlock, which
// makes it easier to manually inline this function.
TEXT ·extendMatch(SB), NOSPLIT, $0-48
MOVQ src_base+0(FP), DX
MOVQ src_len+8(FP), R14
MOVQ i+24(FP), R15
MOVQ j+32(FP), SI
ADDQ DX, R14
ADDQ DX, R15
ADDQ DX, SI
MOVQ R14, R13
SUBQ $8, R13
cmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA cmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE bsf
ADDQ $8, R15
ADDQ $8, SI
JMP cmp8
bsf:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
cmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE extendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE extendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP cmp1
extendMatchEnd:
// Convert from &src[ret] to ret.
SUBQ DX, SI
MOVQ SI, ret+40(FP)
RET
// ----------------------------------------------------------------------------
// func encodeBlock(dst, src []byte) (d int)
//
// All local variables fit into registers, other than "var table". The register
// allocation:
// - AX . .
// - BX . .
// - CX 56 shift (note that amd64 shifts by non-immediates must use CX).
// - DX 64 &src[0], tableSize
// - SI 72 &src[s]
// - DI 80 &dst[d]
// - R9 88 sLimit
// - R10 . &src[nextEmit]
// - R11 96 prevHash, currHash, nextHash, offset
// - R12 104 &src[base], skip
// - R13 . &src[nextS], &src[len(src) - 8]
// - R14 . len(src), bytesBetweenHashLookups, &src[len(src)], x
// - R15 112 candidate
//
// The second column (56, 64, etc) is the stack offset to spill the registers
// when calling other functions. We could pack this slightly tighter, but it's
// simpler to have a dedicated spill map independent of the function called.
//
// "var table [maxTableSize]uint16" takes up 32768 bytes of stack space. An
// extra 56 bytes, to call other functions, and an extra 64 bytes, to spill
// local variables (registers) during calls gives 32768 + 56 + 64 = 32888.
TEXT ·encodeBlock(SB), 0, $32888-56
MOVQ dst_base+0(FP), DI
MOVQ src_base+24(FP), SI
MOVQ src_len+32(FP), R14
// shift, tableSize := uint32(32-8), 1<<8
MOVQ $24, CX
MOVQ $256, DX
calcShift:
// for ; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
// shift--
// }
CMPQ DX, $16384
JGE varTable
CMPQ DX, R14
JGE varTable
SUBQ $1, CX
SHLQ $1, DX
JMP calcShift
varTable:
// var table [maxTableSize]uint16
//
// In the asm code, unlike the Go code, we can zero-initialize only the
// first tableSize elements. Each uint16 element is 2 bytes and each MOVOU
// writes 16 bytes, so we can do only tableSize/8 writes instead of the
// 2048 writes that would zero-initialize all of table's 32768 bytes.
SHRQ $3, DX
LEAQ table-32768(SP), BX
PXOR X0, X0
memclr:
MOVOU X0, 0(BX)
ADDQ $16, BX
SUBQ $1, DX
JNZ memclr
// !!! DX = &src[0]
MOVQ SI, DX
// sLimit := len(src) - inputMargin
MOVQ R14, R9
SUBQ $15, R9
// !!! Pre-emptively spill CX, DX and R9 to the stack. Their values don't
// change for the rest of the function.
MOVQ CX, 56(SP)
MOVQ DX, 64(SP)
MOVQ R9, 88(SP)
// nextEmit := 0
MOVQ DX, R10
// s := 1
ADDQ $1, SI
// nextHash := hash(load32(src, s), shift)
MOVL 0(SI), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
outer:
// for { etc }
// skip := 32
MOVQ $32, R12
// nextS := s
MOVQ SI, R13
// candidate := 0
MOVQ $0, R15
inner0:
// for { etc }
// s := nextS
MOVQ R13, SI
// bytesBetweenHashLookups := skip >> 5
MOVQ R12, R14
SHRQ $5, R14
// nextS = s + bytesBetweenHashLookups
ADDQ R14, R13
// skip += bytesBetweenHashLookups
ADDQ R14, R12
// if nextS > sLimit { goto emitRemainder }
MOVQ R13, AX
SUBQ DX, AX
CMPQ AX, R9
JA emitRemainder
// candidate = int(table[nextHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[nextHash] = uint16(s)
MOVQ SI, AX
SUBQ DX, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// nextHash = hash(load32(src, nextS), shift)
MOVL 0(R13), R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// if load32(src, s) != load32(src, candidate) { continue } break
MOVL 0(SI), AX
MOVL (DX)(R15*1), BX
CMPL AX, BX
JNE inner0
fourByteMatch:
// As per the encode_other.go code:
//
// A 4-byte match has been found. We'll later see etc.
// !!! Jump to a fast path for short (<= 16 byte) literals. See the comment
// on inputMargin in encode.go.
MOVQ SI, AX
SUBQ R10, AX
CMPQ AX, $16
JLE emitLiteralFastPath
// ----------------------------------------
// Begin inline of the emitLiteral call.
//
// d += emitLiteral(dst[d:], src[nextEmit:s])
MOVL AX, BX
SUBL $1, BX
CMPL BX, $60
JLT inlineEmitLiteralOneByte
CMPL BX, $256
JLT inlineEmitLiteralTwoBytes
inlineEmitLiteralThreeBytes:
MOVB $0xf4, 0(DI)
MOVW BX, 1(DI)
ADDQ $3, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralTwoBytes:
MOVB $0xf0, 0(DI)
MOVB BX, 1(DI)
ADDQ $2, DI
JMP inlineEmitLiteralMemmove
inlineEmitLiteralOneByte:
SHLB $2, BX
MOVB BX, 0(DI)
ADDQ $1, DI
inlineEmitLiteralMemmove:
// Spill local variables (registers) onto the stack; call; unspill.
//
// copy(dst[i:], lit)
//
// This means calling runtime·memmove(&dst[i], &lit[0], len(lit)), so we push
// DI, R10 and AX as arguments.
MOVQ DI, 0(SP)
MOVQ R10, 8(SP)
MOVQ AX, 16(SP)
ADDQ AX, DI // Finish the "d +=" part of "d += emitLiteral(etc)".
MOVQ SI, 72(SP)
MOVQ DI, 80(SP)
MOVQ R15, 112(SP)
CALL runtime·memmove(SB)
MOVQ 56(SP), CX
MOVQ 64(SP), DX
MOVQ 72(SP), SI
MOVQ 80(SP), DI
MOVQ 88(SP), R9
MOVQ 112(SP), R15
JMP inner1
inlineEmitLiteralEnd:
// End inline of the emitLiteral call.
// ----------------------------------------
emitLiteralFastPath:
// !!! Emit the 1-byte encoding "uint8(len(lit)-1)<<2".
MOVB AX, BX
SUBB $1, BX
SHLB $2, BX
MOVB BX, (DI)
ADDQ $1, DI
// !!! Implement the copy from lit to dst as a 16-byte load and store.
// (Encode's documentation says that dst and src must not overlap.)
//
// This always copies 16 bytes, instead of only len(lit) bytes, but that's
// OK. Subsequent iterations will fix up the overrun.
//
// Note that on amd64, it is legal and cheap to issue unaligned 8-byte or
// 16-byte loads and stores. This technique probably wouldn't be as
// effective on architectures that are fussier about alignment.
MOVOU 0(R10), X0
MOVOU X0, 0(DI)
ADDQ AX, DI
inner1:
// for { etc }
// base := s
MOVQ SI, R12
// !!! offset := base - candidate
MOVQ R12, R11
SUBQ R15, R11
SUBQ DX, R11
// ----------------------------------------
// Begin inline of the extendMatch call.
//
// s = extendMatch(src, candidate+4, s+4)
// !!! R14 = &src[len(src)]
MOVQ src_len+32(FP), R14
ADDQ DX, R14
// !!! R13 = &src[len(src) - 8]
MOVQ R14, R13
SUBQ $8, R13
// !!! R15 = &src[candidate + 4]
ADDQ $4, R15
ADDQ DX, R15
// !!! s += 4
ADDQ $4, SI
inlineExtendMatchCmp8:
// As long as we are 8 or more bytes before the end of src, we can load and
// compare 8 bytes at a time. If those 8 bytes are equal, repeat.
CMPQ SI, R13
JA inlineExtendMatchCmp1
MOVQ (R15), AX
MOVQ (SI), BX
CMPQ AX, BX
JNE inlineExtendMatchBSF
ADDQ $8, R15
ADDQ $8, SI
JMP inlineExtendMatchCmp8
inlineExtendMatchBSF:
// If those 8 bytes were not equal, XOR the two 8 byte values, and return
// the index of the first byte that differs. The BSF instruction finds the
// least significant 1 bit, the amd64 architecture is little-endian, and
// the shift by 3 converts a bit index to a byte index.
XORQ AX, BX
BSFQ BX, BX
SHRQ $3, BX
ADDQ BX, SI
JMP inlineExtendMatchEnd
inlineExtendMatchCmp1:
// In src's tail, compare 1 byte at a time.
CMPQ SI, R14
JAE inlineExtendMatchEnd
MOVB (R15), AX
MOVB (SI), BX
CMPB AX, BX
JNE inlineExtendMatchEnd
ADDQ $1, R15
ADDQ $1, SI
JMP inlineExtendMatchCmp1
inlineExtendMatchEnd:
// End inline of the extendMatch call.
// ----------------------------------------
// ----------------------------------------
// Begin inline of the emitCopy call.
//
// d += emitCopy(dst[d:], base-candidate, s-base)
// !!! length := s - base
MOVQ SI, AX
SUBQ R12, AX
inlineEmitCopyLoop0:
// for length >= 68 { etc }
CMPL AX, $68
JLT inlineEmitCopyStep1
// Emit a length 64 copy, encoded as 3 bytes.
MOVB $0xfe, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $64, AX
JMP inlineEmitCopyLoop0
inlineEmitCopyStep1:
// if length > 64 { etc }
CMPL AX, $64
JLE inlineEmitCopyStep2
// Emit a length 60 copy, encoded as 3 bytes.
MOVB $0xee, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
SUBL $60, AX
inlineEmitCopyStep2:
// if length >= 12 || offset >= 2048 { goto inlineEmitCopyStep3 }
CMPL AX, $12
JGE inlineEmitCopyStep3
CMPL R11, $2048
JGE inlineEmitCopyStep3
// Emit the remaining copy, encoded as 2 bytes.
MOVB R11, 1(DI)
SHRL $8, R11
SHLB $5, R11
SUBB $4, AX
SHLB $2, AX
ORB AX, R11
ORB $1, R11
MOVB R11, 0(DI)
ADDQ $2, DI
JMP inlineEmitCopyEnd
inlineEmitCopyStep3:
// Emit the remaining copy, encoded as 3 bytes.
SUBL $1, AX
SHLB $2, AX
ORB $2, AX
MOVB AX, 0(DI)
MOVW R11, 1(DI)
ADDQ $3, DI
inlineEmitCopyEnd:
// End inline of the emitCopy call.
// ----------------------------------------
// nextEmit = s
MOVQ SI, R10
// if s >= sLimit { goto emitRemainder }
MOVQ SI, AX
SUBQ DX, AX
CMPQ AX, R9
JAE emitRemainder
// As per the encode_other.go code:
//
// We could immediately etc.
// x := load64(src, s-1)
MOVQ -1(SI), R14
// prevHash := hash(uint32(x>>0), shift)
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// table[prevHash] = uint16(s-1)
MOVQ SI, AX
SUBQ DX, AX
SUBQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// currHash := hash(uint32(x>>8), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// candidate = int(table[currHash])
// XXX: MOVWQZX table-32768(SP)(R11*2), R15
// XXX: 4e 0f b7 7c 5c 78 movzwq 0x78(%rsp,%r11,2),%r15
BYTE $0x4e
BYTE $0x0f
BYTE $0xb7
BYTE $0x7c
BYTE $0x5c
BYTE $0x78
// table[currHash] = uint16(s)
ADDQ $1, AX
// XXX: MOVW AX, table-32768(SP)(R11*2)
// XXX: 66 42 89 44 5c 78 mov %ax,0x78(%rsp,%r11,2)
BYTE $0x66
BYTE $0x42
BYTE $0x89
BYTE $0x44
BYTE $0x5c
BYTE $0x78
// if uint32(x>>8) == load32(src, candidate) { continue }
MOVL (DX)(R15*1), BX
CMPL R14, BX
JEQ inner1
// nextHash = hash(uint32(x>>16), shift)
SHRQ $8, R14
MOVL R14, R11
IMULL $0x1e35a7bd, R11
SHRL CX, R11
// s++
ADDQ $1, SI
// break out of the inner1 for loop, i.e. continue the outer loop.
JMP outer
emitRemainder:
// if nextEmit < len(src) { etc }
MOVQ src_len+32(FP), AX
ADDQ DX, AX
CMPQ R10, AX
JEQ encodeBlockEnd
// d += emitLiteral(dst[d:], src[nextEmit:])
//
// Push args.
MOVQ DI, 0(SP)
MOVQ $0, 8(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ $0, 16(SP) // Unnecessary, as the callee ignores it, but conservative.
MOVQ R10, 24(SP)
SUBQ R10, AX
MOVQ AX, 32(SP)
MOVQ AX, 40(SP) // Unnecessary, as the callee ignores it, but conservative.
// Spill local variables (registers) onto the stack; call; unspill.
MOVQ DI, 80(SP)
CALL ·emitLiteral(SB)
MOVQ 80(SP), DI
// Finish the "d +=" part of "d += emitLiteral(etc)".
ADDQ 48(SP), DI
encodeBlockEnd:
MOVQ dst_base+0(FP), AX
SUBQ AX, DI
MOVQ DI, d+48(FP)
RET

View File

@ -1,238 +0,0 @@
// Copyright 2016 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// +build !amd64 appengine !gc noasm
package snappy
func load32(b []byte, i int) uint32 {
b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
}
func load64(b []byte, i int) uint64 {
b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
}
// emitLiteral writes a literal chunk and returns the number of bytes written.
//
// It assumes that:
// dst is long enough to hold the encoded bytes
// 1 <= len(lit) && len(lit) <= 65536
func emitLiteral(dst, lit []byte) int {
i, n := 0, uint(len(lit)-1)
switch {
case n < 60:
dst[0] = uint8(n)<<2 | tagLiteral
i = 1
case n < 1<<8:
dst[0] = 60<<2 | tagLiteral
dst[1] = uint8(n)
i = 2
default:
dst[0] = 61<<2 | tagLiteral
dst[1] = uint8(n)
dst[2] = uint8(n >> 8)
i = 3
}
return i + copy(dst[i:], lit)
}
// emitCopy writes a copy chunk and returns the number of bytes written.
//
// It assumes that:
// dst is long enough to hold the encoded bytes
// 1 <= offset && offset <= 65535
// 4 <= length && length <= 65535
func emitCopy(dst []byte, offset, length int) int {
i := 0
// The maximum length for a single tagCopy1 or tagCopy2 op is 64 bytes. The
// threshold for this loop is a little higher (at 68 = 64 + 4), and the
// length emitted down below is is a little lower (at 60 = 64 - 4), because
// it's shorter to encode a length 67 copy as a length 60 tagCopy2 followed
// by a length 7 tagCopy1 (which encodes as 3+2 bytes) than to encode it as
// a length 64 tagCopy2 followed by a length 3 tagCopy2 (which encodes as
// 3+3 bytes). The magic 4 in the 64±4 is because the minimum length for a
// tagCopy1 op is 4 bytes, which is why a length 3 copy has to be an
// encodes-as-3-bytes tagCopy2 instead of an encodes-as-2-bytes tagCopy1.
for length >= 68 {
// Emit a length 64 copy, encoded as 3 bytes.
dst[i+0] = 63<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= 64
}
if length > 64 {
// Emit a length 60 copy, encoded as 3 bytes.
dst[i+0] = 59<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
i += 3
length -= 60
}
if length >= 12 || offset >= 2048 {
// Emit the remaining copy, encoded as 3 bytes.
dst[i+0] = uint8(length-1)<<2 | tagCopy2
dst[i+1] = uint8(offset)
dst[i+2] = uint8(offset >> 8)
return i + 3
}
// Emit the remaining copy, encoded as 2 bytes.
dst[i+0] = uint8(offset>>8)<<5 | uint8(length-4)<<2 | tagCopy1
dst[i+1] = uint8(offset)
return i + 2
}
// extendMatch returns the largest k such that k <= len(src) and that
// src[i:i+k-j] and src[j:k] have the same contents.
//
// It assumes that:
// 0 <= i && i < j && j <= len(src)
func extendMatch(src []byte, i, j int) int {
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
}
return j
}
func hash(u, shift uint32) uint32 {
return (u * 0x1e35a7bd) >> shift
}
// encodeBlock encodes a non-empty src to a guaranteed-large-enough dst. It
// assumes that the varint-encoded length of the decompressed bytes has already
// been written.
//
// It also assumes that:
// len(dst) >= MaxEncodedLen(len(src)) &&
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
func encodeBlock(dst, src []byte) (d int) {
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
// The table element type is uint16, as s < sLimit and sLimit < len(src)
// and len(src) <= maxBlockSize and maxBlockSize == 65536.
const (
maxTableSize = 1 << 14
// tableMask is redundant, but helps the compiler eliminate bounds
// checks.
tableMask = maxTableSize - 1
)
shift := uint32(32 - 8)
for tableSize := 1 << 8; tableSize < maxTableSize && tableSize < len(src); tableSize *= 2 {
shift--
}
// In Go, all array elements are zero-initialized, so there is no advantage
// to a smaller tableSize per se. However, it matches the C++ algorithm,
// and in the asm versions of this code, we can get away with zeroing only
// the first tableSize elements.
var table [maxTableSize]uint16
// sLimit is when to stop looking for offset/length copies. The inputMargin
// lets us use a fast path for emitLiteral in the main loop, while we are
// looking for copies.
sLimit := len(src) - inputMargin
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := 0
// The encoded form must start with a literal, as there are no previous
// bytes to copy, so we start looking for hash matches at s == 1.
s := 1
nextHash := hash(load32(src, s), shift)
for {
// Copied from the C++ snappy implementation:
//
// Heuristic match skipping: If 32 bytes are scanned with no matches
// found, start looking only at every other byte. If 32 more bytes are
// scanned (or skipped), look at every third byte, etc.. When a match
// is found, immediately go back to looking at every byte. This is a
// small loss (~5% performance, ~0.1% density) for compressible data
// due to more bookkeeping, but for non-compressible data (such as
// JPEG) it's a huge win since the compressor quickly "realizes" the
// data is incompressible and doesn't bother looking for matches
// everywhere.
//
// The "skip" variable keeps track of how many bytes there are since
// the last match; dividing it by 32 (ie. right-shifting by five) gives
// the number of bytes to move ahead for each iteration.
skip := 32
nextS := s
candidate := 0
for {
s = nextS
bytesBetweenHashLookups := skip >> 5
nextS = s + bytesBetweenHashLookups
skip += bytesBetweenHashLookups
if nextS > sLimit {
goto emitRemainder
}
candidate = int(table[nextHash&tableMask])
table[nextHash&tableMask] = uint16(s)
nextHash = hash(load32(src, nextS), shift)
if load32(src, s) == load32(src, candidate) {
break
}
}
// A 4-byte match has been found. We'll later see if more than 4 bytes
// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
// them as literal bytes.
d += emitLiteral(dst[d:], src[nextEmit:s])
// Call emitCopy, and then see if another emitCopy could be our next
// move. Repeat until we find no match for the input immediately after
// what was consumed by the last emitCopy call.
//
// If we exit this loop normally then we need to call emitLiteral next,
// though we don't yet know how big the literal will be. We handle that
// by proceeding to the next iteration of the main loop. We also can
// exit this loop via goto if we get close to exhausting the input.
for {
// Invariant: we have a 4-byte match at s, and no need to emit any
// literal bytes prior to s.
base := s
// Extend the 4-byte match as long as possible.
//
// This is an inlined version of:
// s = extendMatch(src, candidate+4, s+4)
s += 4
for i := candidate + 4; s < len(src) && src[i] == src[s]; i, s = i+1, s+1 {
}
d += emitCopy(dst[d:], base-candidate, s-base)
nextEmit = s
if s >= sLimit {
goto emitRemainder
}
// We could immediately start working at s now, but to improve
// compression we first update the hash table at s-1 and at s. If
// another emitCopy is not our next move, also calculate nextHash
// at s+1. At least on GOARCH=amd64, these three hash calculations
// are faster as one load64 call (with some shifts) instead of
// three load32 calls.
x := load64(src, s-1)
prevHash := hash(uint32(x>>0), shift)
table[prevHash&tableMask] = uint16(s - 1)
currHash := hash(uint32(x>>8), shift)
candidate = int(table[currHash&tableMask])
table[currHash&tableMask] = uint16(s)
if uint32(x>>8) != load32(src, candidate) {
nextHash = hash(uint32(x>>16), shift)
s++
break
}
}
}
emitRemainder:
if nextEmit < len(src) {
d += emitLiteral(dst[d:], src[nextEmit:])
}
return d
}

View File

@ -1,2 +0,0 @@
del old.txt
go test -bench=. >>old.txt && go test -bench=. >>old.txt && go test -bench=. >>old.txt && benchstat -delta-test=ttest old.txt new.txt

View File

@ -1,98 +0,0 @@
// Copyright 2011 The Snappy-Go Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
// Package snappy implements the Snappy compression format. It aims for very
// high speeds and reasonable compression.
//
// There are actually two Snappy formats: block and stream. They are related,
// but different: trying to decompress block-compressed data as a Snappy stream
// will fail, and vice versa. The block format is the Decode and Encode
// functions and the stream format is the Reader and Writer types.
//
// The block format, the more common case, is used when the complete size (the
// number of bytes) of the original data is known upfront, at the time
// compression starts. The stream format, also known as the framing format, is
// for when that isn't always true.
//
// The canonical, C++ implementation is at https://github.com/google/snappy and
// it only implements the block format.
package snappy
import (
"hash/crc32"
)
/*
Each encoded block begins with the varint-encoded length of the decoded data,
followed by a sequence of chunks. Chunks begin and end on byte boundaries. The
first byte of each chunk is broken into its 2 least and 6 most significant bits
called l and m: l ranges in [0, 4) and m ranges in [0, 64). l is the chunk tag.
Zero means a literal tag. All other values mean a copy tag.
For literal tags:
- If m < 60, the next 1 + m bytes are literal bytes.
- Otherwise, let n be the little-endian unsigned integer denoted by the next
m - 59 bytes. The next 1 + n bytes after that are literal bytes.
For copy tags, length bytes are copied from offset bytes ago, in the style of
Lempel-Ziv compression algorithms. In particular:
- For l == 1, the offset ranges in [0, 1<<11) and the length in [4, 12).
The length is 4 + the low 3 bits of m. The high 3 bits of m form bits 8-10
of the offset. The next byte is bits 0-7 of the offset.
- For l == 2, the offset ranges in [0, 1<<16) and the length in [1, 65).
The length is 1 + m. The offset is the little-endian unsigned integer
denoted by the next 2 bytes.
- For l == 3, this tag is a legacy format that is no longer issued by most
encoders. Nonetheless, the offset ranges in [0, 1<<32) and the length in
[1, 65). The length is 1 + m. The offset is the little-endian unsigned
integer denoted by the next 4 bytes.
*/
const (
tagLiteral = 0x00
tagCopy1 = 0x01
tagCopy2 = 0x02
tagCopy4 = 0x03
)
const (
checksumSize = 4
chunkHeaderSize = 4
magicChunk = "\xff\x06\x00\x00" + magicBody
magicBody = "sNaPpY"
// maxBlockSize is the maximum size of the input to encodeBlock. It is not
// part of the wire format per se, but some parts of the encoder assume
// that an offset fits into a uint16.
//
// Also, for the framing format (Writer type instead of Encode function),
// https://github.com/google/snappy/blob/master/framing_format.txt says
// that "the uncompressed data in a chunk must be no longer than 65536
// bytes".
maxBlockSize = 65536
// maxEncodedLenOfMaxBlockSize equals MaxEncodedLen(maxBlockSize), but is
// hard coded to be a const instead of a variable, so that obufLen can also
// be a const. Their equivalence is confirmed by
// TestMaxEncodedLenOfMaxBlockSize.
maxEncodedLenOfMaxBlockSize = 76490
obufHeaderLen = len(magicChunk) + checksumSize + chunkHeaderSize
obufLen = obufHeaderLen + maxEncodedLenOfMaxBlockSize
)
const (
chunkTypeCompressedData = 0x00
chunkTypeUncompressedData = 0x01
chunkTypePadding = 0xfe
chunkTypeStreamIdentifier = 0xff
)
var crcTable = crc32.MakeTable(crc32.Castagnoli)
// crc implements the checksum specified in section 3 of
// https://github.com/google/snappy/blob/master/framing_format.txt
func crc(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
return uint32(c>>15|c<<17) + 0xa282ead8
}

View File

@ -16,30 +16,28 @@ Currently the package is heavily optimized for 64 bit processors and will be sig
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
Godoc Documentation: https://godoc.org/github.com/klauspost/compress/zstd
[![Go Reference](https://pkg.go.dev/badge/github.com/klauspost/compress/zstd.svg)](https://pkg.go.dev/github.com/klauspost/compress/zstd)
## Compressor
### Status:
STABLE - there may always be subtle bugs, a wide variety of content has been tested and the library is actively
used by several projects. This library is being continuously [fuzz-tested](https://github.com/klauspost/compress-fuzz),
kindly supplied by [fuzzit.dev](https://fuzzit.dev/).
used by several projects. This library is being [fuzz-tested](https://github.com/klauspost/compress-fuzz) for all updates.
There may still be specific combinations of data types/size/settings that could lead to edge cases,
so as always, testing is recommended.
For now, a high speed (fastest) and medium-fast (default) compressor has been implemented.
The "Fastest" compression ratio is roughly equivalent to zstd level 1.
The "Default" compression ratio is roughly equivalent to zstd level 3 (default).
* The "Fastest" compression ratio is roughly equivalent to zstd level 1.
* The "Default" compression ratio is roughly equivalent to zstd level 3 (default).
* The "Better" compression ratio is roughly equivalent to zstd level 7.
* The "Best" compression ratio is roughly equivalent to zstd level 11.
In terms of speed, it is typically 2x as fast as the stdlib deflate/gzip in its fastest mode.
The compression ratio compared to stdlib is around level 3, but usually 3x as fast.
Compared to cgo zstd, the speed is around level 3 (default), but compression slightly worse, between level 1&2.
### Usage
@ -140,7 +138,7 @@ I have collected some speed examples to compare speed and compression against ot
* `file` is the input file.
* `out` is the compressor used. `zskp` is this package. `zstd` is the Datadog cgo library. `gzstd/gzkp` is gzip standard and this library.
* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default".
* `level` is the compression level used. For `zskp` level 1 is "fastest", level 2 is "default"; 3 is "better", 4 is "best".
* `insize`/`outsize` is the input/output size.
* `millis` is the number of milliseconds used for compression.
* `mb/s` is megabytes (2^20 bytes) per second.
@ -153,12 +151,14 @@ This package:
file out level insize outsize millis mb/s
silesia.tar zskp 1 211947520 73101992 643 313.87
silesia.tar zskp 2 211947520 67504318 969 208.38
silesia.tar zskp 3 211947520 65177448 1899 106.44
silesia.tar zskp 3 211947520 64595893 2007 100.68
silesia.tar zskp 4 211947520 60995370 7691 26.28
cgo zstd:
silesia.tar zstd 1 211947520 73605392 543 371.56
silesia.tar zstd 3 211947520 66793289 864 233.68
silesia.tar zstd 6 211947520 62916450 1913 105.66
silesia.tar zstd 9 211947520 60212393 5063 39.92
gzip, stdlib/this package:
silesia.tar gzstd 1 211947520 80007735 1654 122.21
@ -170,10 +170,12 @@ https://files.klauspost.com/compress/gob-stream.7z
file out level insize outsize millis mb/s
gob-stream zskp 1 1911399616 235022249 3088 590.30
gob-stream zskp 2 1911399616 205669791 3786 481.34
gob-stream zskp 3 1911399616 185792019 9324 195.48
gob-stream zskp 3 1911399616 175034659 9636 189.17
gob-stream zskp 4 1911399616 167273881 29337 62.13
gob-stream zstd 1 1911399616 249810424 2637 691.26
gob-stream zstd 3 1911399616 208192146 3490 522.31
gob-stream zstd 6 1911399616 193632038 6687 272.56
gob-stream zstd 9 1911399616 177620386 16175 112.70
gob-stream gzstd 1 1911399616 357382641 10251 177.82
gob-stream gzkp 1 1911399616 362156523 5695 320.08
@ -184,10 +186,12 @@ http://mattmahoney.net/dc/textdata.html
file out level insize outsize millis mb/s
enwik9 zskp 1 1000000000 343848582 3609 264.18
enwik9 zskp 2 1000000000 317276632 5746 165.97
enwik9 zskp 3 1000000000 294540704 11725 81.34
enwik9 zskp 3 1000000000 292243069 12162 78.41
enwik9 zskp 4 1000000000 275241169 36430 26.18
enwik9 zstd 1 1000000000 358072021 3110 306.65
enwik9 zstd 3 1000000000 313734672 4784 199.35
enwik9 zstd 6 1000000000 295138875 10290 92.68
enwik9 zstd 9 1000000000 278348700 28549 33.40
enwik9 gzstd 1 1000000000 382578136 9604 99.30
enwik9 gzkp 1 1000000000 383825945 6544 145.73
@ -197,10 +201,12 @@ https://files.klauspost.com/compress/github-june-2days-2019.json.zst
file out level insize outsize millis mb/s
github-june-2days-2019.json zskp 1 6273951764 699045015 10620 563.40
github-june-2days-2019.json zskp 2 6273951764 617881763 11687 511.96
github-june-2days-2019.json zskp 3 6273951764 537511906 29252 204.54
github-june-2days-2019.json zskp 3 6273951764 524340691 34043 175.75
github-june-2days-2019.json zskp 4 6273951764 503314661 93811 63.78
github-june-2days-2019.json zstd 1 6273951764 766284037 8450 708.00
github-june-2days-2019.json zstd 3 6273951764 661889476 10927 547.57
github-june-2days-2019.json zstd 6 6273951764 642756859 22996 260.18
github-june-2days-2019.json zstd 9 6273951764 601974523 52413 114.16
github-june-2days-2019.json gzstd 1 6273951764 1164400847 29948 199.79
github-june-2days-2019.json gzkp 1 6273951764 1128755542 19236 311.03
@ -210,10 +216,12 @@ https://files.klauspost.com/compress/rawstudio-mint14.7z
file out level insize outsize millis mb/s
rawstudio-mint14.tar zskp 1 8558382592 3667489370 20210 403.84
rawstudio-mint14.tar zskp 2 8558382592 3364592300 31873 256.07
rawstudio-mint14.tar zskp 3 8558382592 3224594213 71751 113.75
rawstudio-mint14.tar zskp 3 8558382592 3158085214 77675 105.08
rawstudio-mint14.tar zskp 4 8558382592 3020370044 404956 20.16
rawstudio-mint14.tar zstd 1 8558382592 3609250104 17136 476.27
rawstudio-mint14.tar zstd 3 8558382592 3341679997 29262 278.92
rawstudio-mint14.tar zstd 6 8558382592 3235846406 77904 104.77
rawstudio-mint14.tar zstd 9 8558382592 3160778861 140946 57.91
rawstudio-mint14.tar gzstd 1 8558382592 3926257486 57722 141.40
rawstudio-mint14.tar gzkp 1 8558382592 3970463184 41749 195.49
@ -223,10 +231,12 @@ https://files.klauspost.com/compress/nyc-taxi-data-10M.csv.zst
file out level insize outsize millis mb/s
nyc-taxi-data-10M.csv zskp 1 3325605752 641339945 8925 355.35
nyc-taxi-data-10M.csv zskp 2 3325605752 591748091 11268 281.44
nyc-taxi-data-10M.csv zskp 3 3325605752 538490114 19880 159.53
nyc-taxi-data-10M.csv zskp 3 3325605752 530289687 25239 125.66
nyc-taxi-data-10M.csv zskp 4 3325605752 490907191 65939 48.10
nyc-taxi-data-10M.csv zstd 1 3325605752 687399637 8233 385.18
nyc-taxi-data-10M.csv zstd 3 3325605752 598514411 10065 315.07
nyc-taxi-data-10M.csv zstd 6 3325605752 570522953 20038 158.27
nyc-taxi-data-10M.csv zstd 9 3325605752 517554797 64565 49.12
nyc-taxi-data-10M.csv gzstd 1 3325605752 928656485 23876 132.83
nyc-taxi-data-10M.csv gzkp 1 3325605752 924718719 16388 193.53
```
@ -394,13 +404,28 @@ BenchmarkDecoder_DecodeAllParallelCgo/comp-data.bin.zst-16 749938
This reflects the performance around May 2020, but this may be out of date.
## Zstd inside ZIP files
It is possible to use zstandard to compress individual files inside zip archives.
While this isn't widely supported it can be useful for internal files.
To support the compression and decompression of these files you must register a compressor and decompressor.
It is highly recommended registering the (de)compressors on individual zip Reader/Writer and NOT
use the global registration functions. The main reason for this is that 2 registrations from
different packages will result in a panic.
It is a good idea to only have a single compressor and decompressor, since they can be used for multiple zip
files concurrently, and using a single instance will allow reusing some resources.
See [this example](https://pkg.go.dev/github.com/klauspost/compress/zstd#example-ZipCompressor) for
how to compress and decompress files inside zip archives.
# Contributions
Contributions are always welcome.
For new features/fixes, remember to add tests and for performance enhancements include benchmarks.
For sending files for reproducing errors use a service like [goobox](https://goobox.io/#/upload) or similar to share your files.
For general feedback and experience reports, feel free to open an issue or write me on [Twitter](https://twitter.com/sh0dan).
This package includes the excellent [`github.com/cespare/xxhash`](https://github.com/cespare/xxhash) package Copyright (c) 2016 Caleb Spare.

View File

@ -123,12 +123,10 @@ func newBlockDec(lowMem bool) *blockDec {
// Input must be a start of a block and will be at the end of the block when returned.
func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
b.WindowSize = windowSize
tmp := br.readSmall(3)
if tmp == nil {
if debug {
println("Reading block header:", io.ErrUnexpectedEOF)
}
return io.ErrUnexpectedEOF
tmp, err := br.readSmall(3)
if err != nil {
println("Reading block header:", err)
return err
}
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
b.Last = bh&1 != 0
@ -179,7 +177,6 @@ func (b *blockDec) reset(br byteBuffer, windowSize uint64) error {
if cap(b.dst) <= maxSize {
b.dst = make([]byte, 0, maxSize+1)
}
var err error
b.data, err = br.readBig(cSize, b.dataStorage)
if err != nil {
if debug {
@ -613,7 +610,7 @@ func (b *blockDec) decodeCompressed(hist *history) error {
// Decode treeless literal block.
if litType == literalsBlockTreeless {
// TODO: We could send the history early WITHOUT the stream history.
// This would allow decoding treeless literials before the byte history is available.
// This would allow decoding treeless literals before the byte history is available.
// Silencia stats: Treeless 4393, with: 32775, total: 37168, 11% treeless.
// So not much obvious gain here.

View File

@ -22,28 +22,44 @@ type blockEnc struct {
dictLitEnc *huff0.Scratch
wr bitWriter
extraLits int
last bool
extraLits int
output []byte
recentOffsets [3]uint32
prevRecentOffsets [3]uint32
last bool
lowMem bool
}
// init should be used once the block has been created.
// If called more than once, the effect is the same as calling reset.
func (b *blockEnc) init() {
if cap(b.literals) < maxCompressedLiteralSize {
b.literals = make([]byte, 0, maxCompressedLiteralSize)
}
const defSeqs = 200
b.literals = b.literals[:0]
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
if cap(b.output) < maxCompressedBlockSize {
b.output = make([]byte, 0, maxCompressedBlockSize)
if b.lowMem {
// 1K literals
if cap(b.literals) < 1<<10 {
b.literals = make([]byte, 0, 1<<10)
}
const defSeqs = 20
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
// 1K
if cap(b.output) < 1<<10 {
b.output = make([]byte, 0, 1<<10)
}
} else {
if cap(b.literals) < maxCompressedBlockSize {
b.literals = make([]byte, 0, maxCompressedBlockSize)
}
const defSeqs = 200
if cap(b.sequences) < defSeqs {
b.sequences = make([]seq, 0, defSeqs)
}
if cap(b.output) < maxCompressedBlockSize {
b.output = make([]byte, 0, maxCompressedBlockSize)
}
}
if b.coders.mlEnc == nil {
b.coders.mlEnc = &fseEncoder{}
b.coders.mlPrev = &fseEncoder{}
@ -76,6 +92,7 @@ func (b *blockEnc) reset(prev *blockEnc) {
if prev != nil {
b.recentOffsets = prev.prevRecentOffsets
}
b.dictLitEnc = nil
}
// reset will reset the block for a new encode, but in the same stream,
@ -369,9 +386,9 @@ func (b *blockEnc) encodeLits(lits []byte, raw bool) error {
b.output = bh.appendTo(b.output)
b.output = append(b.output, lits[0])
return nil
case nil:
default:
return err
case nil:
}
// Compressed...
// Now, allow reuse
@ -511,11 +528,6 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if debug {
println("Adding literals RLE")
}
default:
if debug {
println("Adding literals ERROR:", err)
}
return err
case nil:
// Compressed litLen...
if reUsed {
@ -546,6 +558,11 @@ func (b *blockEnc) encode(org []byte, raw, rawAllLits bool) error {
if debug {
println("Adding literals compressed")
}
default:
if debug {
println("Adding literals ERROR:", err)
}
return err
}
// Sequence compression

View File

@ -12,8 +12,8 @@ import (
type byteBuffer interface {
// Read up to 8 bytes.
// Returns nil if no more input is available.
readSmall(n int) []byte
// Returns io.ErrUnexpectedEOF if this cannot be satisfied.
readSmall(n int) ([]byte, error)
// Read >8 bytes.
// MAY use the destination slice.
@ -29,17 +29,17 @@ type byteBuffer interface {
// in-memory buffer
type byteBuf []byte
func (b *byteBuf) readSmall(n int) []byte {
func (b *byteBuf) readSmall(n int) ([]byte, error) {
if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
bb := *b
if len(bb) < n {
return nil
return nil, io.ErrUnexpectedEOF
}
r := bb[:n]
*b = bb[n:]
return r
return r, nil
}
func (b *byteBuf) readBig(n int, dst []byte) ([]byte, error) {
@ -81,19 +81,22 @@ type readerWrapper struct {
tmp [8]byte
}
func (r *readerWrapper) readSmall(n int) []byte {
func (r *readerWrapper) readSmall(n int) ([]byte, error) {
if debugAsserts && n > 8 {
panic(fmt.Errorf("small read > 8 (%d). use readBig", n))
}
n2, err := io.ReadFull(r.r, r.tmp[:n])
// We only really care about the actual bytes read.
if n2 != n {
if err != nil {
if err == io.EOF {
return nil, io.ErrUnexpectedEOF
}
if debug {
println("readSmall: got", n2, "want", n, "err", err)
}
return nil
return nil, err
}
return r.tmp[:n]
return r.tmp[:n], nil
}
func (r *readerWrapper) readBig(n int, dst []byte) ([]byte, error) {

View File

@ -0,0 +1,202 @@
// Copyright 2020+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package zstd
import (
"bytes"
"errors"
"io"
)
// HeaderMaxSize is the maximum size of a Frame and Block Header.
// If less is sent to Header.Decode it *may* still contain enough information.
const HeaderMaxSize = 14 + 3
// Header contains information about the first frame and block within that.
type Header struct {
// Window Size the window of data to keep while decoding.
// Will only be set if HasFCS is false.
WindowSize uint64
// Frame content size.
// Expected size of the entire frame.
FrameContentSize uint64
// Dictionary ID.
// If 0, no dictionary.
DictionaryID uint32
// First block information.
FirstBlock struct {
// OK will be set if first block could be decoded.
OK bool
// Is this the last block of a frame?
Last bool
// Is the data compressed?
// If true CompressedSize will be populated.
// Unfortunately DecompressedSize cannot be determined
// without decoding the blocks.
Compressed bool
// DecompressedSize is the expected decompressed size of the block.
// Will be 0 if it cannot be determined.
DecompressedSize int
// CompressedSize of the data in the block.
// Does not include the block header.
// Will be equal to DecompressedSize if not Compressed.
CompressedSize int
}
// Skippable will be true if the frame is meant to be skipped.
// No other information will be populated.
Skippable bool
// If set there is a checksum present for the block content.
HasCheckSum bool
// If this is true FrameContentSize will have a valid value
HasFCS bool
SingleSegment bool
}
// Decode the header from the beginning of the stream.
// This will decode the frame header and the first block header if enough bytes are provided.
// It is recommended to provide at least HeaderMaxSize bytes.
// If the frame header cannot be read an error will be returned.
// If there isn't enough input, io.ErrUnexpectedEOF is returned.
// The FirstBlock.OK will indicate if enough information was available to decode the first block header.
func (h *Header) Decode(in []byte) error {
if len(in) < 4 {
return io.ErrUnexpectedEOF
}
b, in := in[:4], in[4:]
if !bytes.Equal(b, frameMagic) {
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
return ErrMagicMismatch
}
*h = Header{Skippable: true}
return nil
}
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
// Clear output
*h = Header{}
fhd, in := in[0], in[1:]
h.SingleSegment = fhd&(1<<5) != 0
h.HasCheckSum = fhd&(1<<2) != 0
if fhd&(1<<3) != 0 {
return errors.New("reserved bit set on frame header")
}
// Read Window_Descriptor
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#window_descriptor
if !h.SingleSegment {
if len(in) < 1 {
return io.ErrUnexpectedEOF
}
var wd byte
wd, in = in[0], in[1:]
windowLog := 10 + (wd >> 3)
windowBase := uint64(1) << windowLog
windowAdd := (windowBase / 8) * uint64(wd&0x7)
h.WindowSize = windowBase + windowAdd
}
// Read Dictionary_ID
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#dictionary_id
if size := fhd & 3; size != 0 {
if size == 3 {
size = 4
}
if len(in) < int(size) {
return io.ErrUnexpectedEOF
}
b, in = in[:size], in[size:]
if b == nil {
return io.ErrUnexpectedEOF
}
switch size {
case 1:
h.DictionaryID = uint32(b[0])
case 2:
h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8)
case 4:
h.DictionaryID = uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
}
}
// Read Frame_Content_Size
// https://github.com/facebook/zstd/blob/dev/doc/zstd_compression_format.md#frame_content_size
var fcsSize int
v := fhd >> 6
switch v {
case 0:
if h.SingleSegment {
fcsSize = 1
}
default:
fcsSize = 1 << v
}
if fcsSize > 0 {
h.HasFCS = true
if len(in) < fcsSize {
return io.ErrUnexpectedEOF
}
b, in = in[:fcsSize], in[fcsSize:]
if b == nil {
return io.ErrUnexpectedEOF
}
switch fcsSize {
case 1:
h.FrameContentSize = uint64(b[0])
case 2:
// When FCS_Field_Size is 2, the offset of 256 is added.
h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) + 256
case 4:
h.FrameContentSize = uint64(b[0]) | (uint64(b[1]) << 8) | (uint64(b[2]) << 16) | (uint64(b[3]) << 24)
case 8:
d1 := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
d2 := uint32(b[4]) | (uint32(b[5]) << 8) | (uint32(b[6]) << 16) | (uint32(b[7]) << 24)
h.FrameContentSize = uint64(d1) | (uint64(d2) << 32)
}
}
// Frame Header done, we will not fail from now on.
if len(in) < 3 {
return nil
}
tmp := in[:3]
bh := uint32(tmp[0]) | (uint32(tmp[1]) << 8) | (uint32(tmp[2]) << 16)
h.FirstBlock.Last = bh&1 != 0
blockType := blockType((bh >> 1) & 3)
// find size.
cSize := int(bh >> 3)
switch blockType {
case blockTypeReserved:
return nil
case blockTypeRLE:
h.FirstBlock.Compressed = true
h.FirstBlock.DecompressedSize = cSize
h.FirstBlock.CompressedSize = 1
case blockTypeCompressed:
h.FirstBlock.Compressed = true
h.FirstBlock.CompressedSize = cSize
case blockTypeRaw:
h.FirstBlock.DecompressedSize = cSize
h.FirstBlock.CompressedSize = cSize
default:
panic("Invalid block type")
}
h.FirstBlock.OK = true
return nil
}

View File

@ -5,7 +5,6 @@
package zstd
import (
"bytes"
"errors"
"io"
"sync"
@ -85,6 +84,10 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
d.current.output = make(chan decodeOutput, d.o.concurrent)
d.current.flushed = true
if r == nil {
d.current.err = ErrDecoderNilInput
}
// Transfer option dicts.
d.dicts = make(map[uint32]dict, len(d.o.dicts))
for _, dc := range d.o.dicts {
@ -111,7 +114,7 @@ func NewReader(r io.Reader, opts ...DOption) (*Decoder, error) {
// When the stream is done, io.EOF will be returned.
func (d *Decoder) Read(p []byte) (int, error) {
if d.stream == nil {
return 0, errors.New("no input has been initialized")
return 0, ErrDecoderNilInput
}
var n int
for {
@ -152,12 +155,20 @@ func (d *Decoder) Read(p []byte) (int, error) {
// Reset will reset the decoder the supplied stream after the current has finished processing.
// Note that this functionality cannot be used after Close has been called.
// Reset can be called with a nil reader to release references to the previous reader.
// After being called with a nil reader, no other operations than Reset or DecodeAll or Close
// should be used.
func (d *Decoder) Reset(r io.Reader) error {
if d.current.err == ErrDecoderClosed {
return d.current.err
}
d.drainOutput()
if r == nil {
return errors.New("nil Reader sent as input")
d.current.err = ErrDecoderNilInput
d.current.flushed = true
return nil
}
if d.stream == nil {
@ -166,14 +177,13 @@ func (d *Decoder) Reset(r io.Reader) error {
go d.startStreamDecoder(d.stream)
}
d.drainOutput()
// If bytes buffer and < 1MB, do sync decoding anyway.
if bb, ok := r.(*bytes.Buffer); ok && bb.Len() < 1<<20 {
if bb, ok := r.(byter); ok && bb.Len() < 1<<20 {
bb2 := bb
if debug {
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
}
b := bb.Bytes()
b := bb2.Bytes()
var dst []byte
if cap(d.current.b) > 0 {
dst = d.current.b
@ -226,20 +236,17 @@ func (d *Decoder) drainOutput() {
println("current already flushed")
return
}
for {
select {
case v := <-d.current.output:
if v.d != nil {
if debug {
printf("re-adding decoder %p", v.d)
}
d.decoders <- v.d
}
if v.err == errEndOfStream {
println("current flushed")
d.current.flushed = true
return
for v := range d.current.output {
if v.d != nil {
if debug {
printf("re-adding decoder %p", v.d)
}
d.decoders <- v.d
}
if v.err == errEndOfStream {
println("current flushed")
d.current.flushed = true
return
}
}
}
@ -249,7 +256,7 @@ func (d *Decoder) drainOutput() {
// Any error encountered during the write is also returned.
func (d *Decoder) WriteTo(w io.Writer) (int64, error) {
if d.stream == nil {
return 0, errors.New("no input has been initialized")
return 0, ErrDecoderNilInput
}
var n int64
for {

View File

@ -6,7 +6,6 @@ package zstd
import (
"errors"
"fmt"
"runtime"
)
@ -43,7 +42,7 @@ func WithDecoderLowmem(b bool) DOption {
func WithDecoderConcurrency(n int) DOption {
return func(o *decoderOptions) error {
if n <= 0 {
return fmt.Errorf("Concurrency must be at least 1")
return errors.New("concurrency must be at least 1")
}
o.concurrent = n
return nil
@ -61,7 +60,7 @@ func WithDecoderMaxMemory(n uint64) DOption {
return errors.New("WithDecoderMaxMemory must be at least 1")
}
if n > 1<<63 {
return fmt.Errorf("WithDecoderMaxmemory must be less than 1 << 63")
return errors.New("WithDecoderMaxmemory must be less than 1 << 63")
}
o.maxDecodedSize = n
return nil

View File

@ -7,6 +7,10 @@ import (
"github.com/klauspost/compress/zstd/internal/xxhash"
)
const (
dictShardBits = 6
)
type fastBase struct {
// cur is the offset at the start of hist
cur int32
@ -17,6 +21,7 @@ type fastBase struct {
tmp [8]byte
blk *blockEnc
lastDictID uint32
lowMem bool
}
// CRC returns the underlying CRC writer.
@ -57,15 +62,10 @@ func (e *fastBase) addBlock(src []byte) int32 {
// check if we have space already
if len(e.hist)+len(src) > cap(e.hist) {
if cap(e.hist) == 0 {
l := e.maxMatchOff * 2
// Make it at least 1MB.
if l < 1<<20 {
l = 1 << 20
}
e.hist = make([]byte, 0, l)
e.ensureHist(len(src))
} else {
if cap(e.hist) < int(e.maxMatchOff*2) {
panic("unexpected buffer size")
if cap(e.hist) < int(e.maxMatchOff+maxCompressedBlockSize) {
panic(fmt.Errorf("unexpected buffer cap %d, want at least %d with window %d", cap(e.hist), e.maxMatchOff+maxCompressedBlockSize, e.maxMatchOff))
}
// Move down
offset := int32(len(e.hist)) - e.maxMatchOff
@ -79,6 +79,28 @@ func (e *fastBase) addBlock(src []byte) int32 {
return s
}
// ensureHist will ensure that history can keep at least this many bytes.
func (e *fastBase) ensureHist(n int) {
if cap(e.hist) >= n {
return
}
l := e.maxMatchOff
if (e.lowMem && e.maxMatchOff > maxCompressedBlockSize) || e.maxMatchOff <= maxCompressedBlockSize {
l += maxCompressedBlockSize
} else {
l += e.maxMatchOff
}
// Make it at least 1MB.
if l < 1<<20 && !e.lowMem {
l = 1 << 20
}
// Make it at least the requested size.
if l < int32(n) {
l = int32(n)
}
e.hist = make([]byte, 0, l)
}
// useBlock will replace the block with the provided one,
// but transfer recent offsets from the previous.
func (e *fastBase) UseBlock(enc *blockEnc) {
@ -117,7 +139,7 @@ func (e *fastBase) matchlen(s, t int32, src []byte) int32 {
// Reset the encoding table.
func (e *fastBase) resetBase(d *dict, singleBlock bool) {
if e.blk == nil {
e.blk = &blockEnc{}
e.blk = &blockEnc{lowMem: e.lowMem}
e.blk.init()
} else {
e.blk.reset(nil)
@ -128,14 +150,15 @@ func (e *fastBase) resetBase(d *dict, singleBlock bool) {
} else {
e.crc.Reset()
}
if (!singleBlock || d.DictContentSize() > 0) && cap(e.hist) < int(e.maxMatchOff*2)+d.DictContentSize() {
l := e.maxMatchOff*2 + int32(d.DictContentSize())
// Make it at least 1MB.
if l < 1<<20 {
l = 1 << 20
if d != nil {
low := e.lowMem
if singleBlock {
e.lowMem = true
}
e.hist = make([]byte, 0, l)
e.ensureHist(d.DictContentSize() + maxCompressedBlockSize)
e.lowMem = low
}
// We offset current position so everything will be out of reach.
// If above reset line, history will be purged.
if e.cur < bufferReset {

501
vendor/github.com/klauspost/compress/zstd/enc_best.go generated vendored Normal file
View File

@ -0,0 +1,501 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
// Based on work by Yann Collet, released under BSD License.
package zstd
import (
"fmt"
"math/bits"
)
const (
bestLongTableBits = 20 // Bits used in the long match table
bestLongTableSize = 1 << bestLongTableBits // Size of the table
// Note: Increasing the short table bits or making the hash shorter
// can actually lead to compression degradation since it will 'steal' more from the
// long match table and match offsets are quite big.
// This greatly depends on the type of input.
bestShortTableBits = 16 // Bits used in the short match table
bestShortTableSize = 1 << bestShortTableBits // Size of the table
)
// bestFastEncoder uses 2 tables, one for short matches (5 bytes) and one for long matches.
// The long match table contains the previous entry with the same hash,
// effectively making it a "chain" of length 2.
// When we find a long match we choose between the two values and select the longest.
// When we find a short match, after checking the long, we check if we can find a long at n+1
// and that it is longer (lazy matching).
type bestFastEncoder struct {
fastBase
table [bestShortTableSize]prevEntry
longTable [bestLongTableSize]prevEntry
dictTable []prevEntry
dictLongTable []prevEntry
}
// Encode improves compression...
func (e *bestFastEncoder) Encode(blk *blockEnc, src []byte) {
const (
// Input margin is the number of bytes we read (8)
// and the maximum we will read ahead (2)
inputMargin = 8 + 4
minNonLiteralBlockSize = 16
)
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = prevEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = prevEntry{}
}
e.cur = e.maxMatchOff
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
for i := range e.table[:] {
v := e.table[i].offset
v2 := e.table[i].prev
if v < minOff {
v = 0
v2 = 0
} else {
v = v - e.cur + e.maxMatchOff
if v2 < minOff {
v2 = 0
} else {
v2 = v2 - e.cur + e.maxMatchOff
}
}
e.table[i] = prevEntry{
offset: v,
prev: v2,
}
}
for i := range e.longTable[:] {
v := e.longTable[i].offset
v2 := e.longTable[i].prev
if v < minOff {
v = 0
v2 = 0
} else {
v = v - e.cur + e.maxMatchOff
if v2 < minOff {
v2 = 0
} else {
v2 = v2 - e.cur + e.maxMatchOff
}
}
e.longTable[i] = prevEntry{
offset: v,
prev: v2,
}
}
e.cur = e.maxMatchOff
break
}
s := e.addBlock(src)
blk.size = len(src)
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
copy(blk.literals, src)
return
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
const kSearchStrength = 10
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])
offset3 := int32(blk.recentOffsets[2])
addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
}
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
_ = addLiterals
if debug {
println("recent offsets:", blk.recentOffsets)
}
encodeLoop:
for {
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
type match struct {
offset int32
s int32
length int32
rep int32
}
matchAt := func(offset int32, s int32, first uint32, rep int32) match {
if s-offset >= e.maxMatchOff || load3232(src, offset) != first {
return match{offset: offset, s: s}
}
return match{offset: offset, s: s, length: 4 + e.matchlen(s+4, offset+4, src), rep: rep}
}
bestOf := func(a, b match) match {
aScore := b.s - a.s + a.length
bScore := a.s - b.s + b.length
if a.rep < 0 {
aScore = aScore - int32(bits.Len32(uint32(a.offset)))/8
}
if b.rep < 0 {
bScore = bScore - int32(bits.Len32(uint32(b.offset)))/8
}
if aScore >= bScore {
return a
}
return b
}
const goodEnough = 100
nextHashL := hash8(cv, bestLongTableBits)
nextHashS := hash4x64(cv, bestShortTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
best := bestOf(matchAt(candidateL.offset-e.cur, s, uint32(cv), -1), matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateS.prev-e.cur, s, uint32(cv), -1))
if canRepeat && best.length < goodEnough {
best = bestOf(best, matchAt(s-offset1+1, s+1, uint32(cv>>8), 1))
best = bestOf(best, matchAt(s-offset2+1, s+1, uint32(cv>>8), 2))
best = bestOf(best, matchAt(s-offset3+1, s+1, uint32(cv>>8), 3))
if best.length > 0 {
best = bestOf(best, matchAt(s-offset1+3, s+3, uint32(cv>>24), 1))
best = bestOf(best, matchAt(s-offset2+3, s+3, uint32(cv>>24), 2))
best = bestOf(best, matchAt(s-offset3+3, s+3, uint32(cv>>24), 3))
}
}
// Load next and check...
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: candidateL.offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: candidateS.offset}
// Look far ahead, unless we have a really long match already...
if best.length < goodEnough {
// No match found, move forward on input, no need to check forward...
if best.length < 4 {
s += 1 + (s-nextEmit)>>(kSearchStrength-1)
if s >= sLimit {
break encodeLoop
}
cv = load6432(src, s)
continue
}
s++
candidateS = e.table[hash4x64(cv>>8, bestShortTableBits)]
cv = load6432(src, s)
cv2 := load6432(src, s+1)
candidateL = e.longTable[hash8(cv, bestLongTableBits)]
candidateL2 := e.longTable[hash8(cv2, bestLongTableBits)]
best = bestOf(best, matchAt(candidateS.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.offset-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL.prev-e.cur, s, uint32(cv), -1))
best = bestOf(best, matchAt(candidateL2.offset-e.cur, s+1, uint32(cv2), -1))
best = bestOf(best, matchAt(candidateL2.prev-e.cur, s+1, uint32(cv2), -1))
// See if we can find a better match by checking where the current best ends.
// Use that offset to see if we can find a better full match.
if sAt := best.s + best.length; sAt < sLimit {
nextHashL := hash8(load6432(src, sAt), bestLongTableBits)
candidateEnd := e.longTable[nextHashL]
if pos := candidateEnd.offset - e.cur - best.length; pos >= 0 {
bestEnd := bestOf(best, matchAt(pos, best.s, load3232(src, best.s), -1))
if pos := candidateEnd.prev - e.cur - best.length; pos >= 0 {
bestEnd = bestOf(bestEnd, matchAt(pos, best.s, load3232(src, best.s), -1))
}
best = bestEnd
}
}
}
// We have a match, we can store the forward value
if best.rep > 0 {
s = best.s
var seq seq
seq.matchLen = uint32(best.length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := best.s
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
repIndex := best.offset
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 0
seq.offset = uint32(best.rep)
if debugSequences {
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
index0 := s
s = best.s + best.length
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, best.length)
}
break encodeLoop
}
// Index skipped...
off := index0 + e.cur
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
off++
index0++
}
switch best.rep {
case 2:
offset1, offset2 = offset2, offset1
case 3:
offset1, offset2, offset3 = offset3, offset1, offset2
}
cv = load6432(src, s)
continue
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
s = best.s
t := best.offset
offset1, offset2, offset3 = s-t, offset1, offset2
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
// Extend the n-byte match as long as possible.
l := best.length
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}
// Write our sequence
var seq seq
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
blk.literals = append(blk.literals, src[nextEmit:s]...)
}
seq.offset = uint32(s-t) + 3
s += l
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
// every entry
for index0 < s-1 {
cv0 := load6432(src, index0)
h0 := hash8(cv0, bestLongTableBits)
h1 := hash4x64(cv0, bestShortTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.table[h1] = prevEntry{offset: off, prev: e.table[h1].offset}
index0++
}
cv = load6432(src, s)
if !canRepeat {
continue
}
// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
// Store this, since we have it.
nextHashS := hash4x64(cv, bestShortTableBits)
nextHashL := hash8(cv, bestLongTableBits)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
e.table[nextHashS] = prevEntry{offset: s + e.cur, prev: e.table[nextHashS].offset}
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
seq.offset = 1
s += l
nextEmit = s
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Swap offset 1 and 2.
offset1, offset2 = offset2, offset1
if s >= sLimit {
// Finished
break encodeLoop
}
cv = load6432(src, s)
}
}
if int(nextEmit) < len(src) {
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
blk.recentOffsets[2] = uint32(offset3)
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
// EncodeNoHist will encode a block with no history and no following blocks.
// Most notable difference is that src will not be copied for history and
// we do not need to check for max match length.
func (e *bestFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
e.ensureHist(len(src))
e.Encode(blk, src)
}
// ResetDict will reset and set a dictionary if not nil
func (e *bestFastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d == nil {
return
}
// Init or copy dict table
if len(e.dictTable) != len(e.table) || d.id != e.lastDictID {
if len(e.dictTable) != len(e.table) {
e.dictTable = make([]prevEntry, len(e.table))
}
end := int32(len(d.content)) - 8 + e.maxMatchOff
for i := e.maxMatchOff; i < end; i += 4 {
const hashLog = bestShortTableBits
cv := load6432(d.content, i-e.maxMatchOff)
nextHash := hash4x64(cv, hashLog) // 0 -> 4
nextHash1 := hash4x64(cv>>8, hashLog) // 1 -> 5
nextHash2 := hash4x64(cv>>16, hashLog) // 2 -> 6
nextHash3 := hash4x64(cv>>24, hashLog) // 3 -> 7
e.dictTable[nextHash] = prevEntry{
prev: e.dictTable[nextHash].offset,
offset: i,
}
e.dictTable[nextHash1] = prevEntry{
prev: e.dictTable[nextHash1].offset,
offset: i + 1,
}
e.dictTable[nextHash2] = prevEntry{
prev: e.dictTable[nextHash2].offset,
offset: i + 2,
}
e.dictTable[nextHash3] = prevEntry{
prev: e.dictTable[nextHash3].offset,
offset: i + 3,
}
}
e.lastDictID = d.id
}
// Init or copy dict table
if len(e.dictLongTable) != len(e.longTable) || d.id != e.lastDictID {
if len(e.dictLongTable) != len(e.longTable) {
e.dictLongTable = make([]prevEntry, len(e.longTable))
}
if len(d.content) >= 8 {
cv := load6432(d.content, 0)
h := hash8(cv, bestLongTableBits)
e.dictLongTable[h] = prevEntry{
offset: e.maxMatchOff,
prev: e.dictLongTable[h].offset,
}
end := int32(len(d.content)) - 8 + e.maxMatchOff
off := 8 // First to read
for i := e.maxMatchOff + 1; i < end; i++ {
cv = cv>>8 | (uint64(d.content[off]) << 56)
h := hash8(cv, bestLongTableBits)
e.dictLongTable[h] = prevEntry{
offset: i,
prev: e.dictLongTable[h].offset,
}
off++
}
}
e.lastDictID = d.id
}
// Reset table to initial state
copy(e.longTable[:], e.dictLongTable)
e.cur = e.maxMatchOff
// Reset table to initial state
copy(e.table[:], e.dictTable)
}

View File

@ -16,6 +16,12 @@ const (
// This greatly depends on the type of input.
betterShortTableBits = 13 // Bits used in the short match table
betterShortTableSize = 1 << betterShortTableBits // Size of the table
betterLongTableShardCnt = 1 << (betterLongTableBits - dictShardBits) // Number of shards in the table
betterLongTableShardSize = betterLongTableSize / betterLongTableShardCnt // Size of an individual shard
betterShortTableShardCnt = 1 << (betterShortTableBits - dictShardBits) // Number of shards in the table
betterShortTableShardSize = betterShortTableSize / betterShortTableShardCnt // Size of an individual shard
)
type prevEntry struct {
@ -31,10 +37,17 @@ type prevEntry struct {
// and that it is longer (lazy matching).
type betterFastEncoder struct {
fastBase
table [betterShortTableSize]tableEntry
longTable [betterLongTableSize]prevEntry
dictTable []tableEntry
dictLongTable []prevEntry
table [betterShortTableSize]tableEntry
longTable [betterLongTableSize]prevEntry
}
type betterFastEncoderDict struct {
betterFastEncoder
dictTable []tableEntry
dictLongTable []prevEntry
shortTableShardDirty [betterShortTableShardCnt]bool
longTableShardDirty [betterLongTableShardCnt]bool
allDirty bool
}
// Encode improves compression...
@ -399,8 +412,41 @@ encodeLoop:
cv = load6432(src, s)
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
// Try to find a better match by searching for a long match at the end of the current best match
if true && s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
}
}
}
// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
}
}
}
}
}
// A match has been found. Update recent offsets.
offset2 = offset1
offset1 = s - t
@ -516,11 +562,543 @@ encodeLoop:
// Most notable difference is that src will not be copied for history and
// we do not need to check for max match length.
func (e *betterFastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
e.ensureHist(len(src))
e.Encode(blk, src)
}
// Encode improves compression...
func (e *betterFastEncoderDict) Encode(blk *blockEnc, src []byte) {
const (
// Input margin is the number of bytes we read (8)
// and the maximum we will read ahead (2)
inputMargin = 8 + 2
minNonLiteralBlockSize = 16
)
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = prevEntry{}
}
e.cur = e.maxMatchOff
e.allDirty = true
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
for i := range e.table[:] {
v := e.table[i].offset
if v < minOff {
v = 0
} else {
v = v - e.cur + e.maxMatchOff
}
e.table[i].offset = v
}
for i := range e.longTable[:] {
v := e.longTable[i].offset
v2 := e.longTable[i].prev
if v < minOff {
v = 0
v2 = 0
} else {
v = v - e.cur + e.maxMatchOff
if v2 < minOff {
v2 = 0
} else {
v2 = v2 - e.cur + e.maxMatchOff
}
}
e.longTable[i] = prevEntry{
offset: v,
prev: v2,
}
}
e.allDirty = true
e.cur = e.maxMatchOff
break
}
s := e.addBlock(src)
blk.size = len(src)
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
copy(blk.literals, src)
return
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 1.
const stepSize = 1
const kSearchStrength = 9
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])
addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
}
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
println("recent offsets:", blk.recentOffsets)
}
encodeLoop:
for {
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
var matched int32
for {
if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
const repOff = 1
repIndex := s - offset1 + repOff
off := s + e.cur
e.longTable[nextHashL] = prevEntry{offset: off, prev: candidateL.offset}
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: off, val: uint32(cv)}
e.markShortShardDirty(nextHashS)
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
seq.matchLen = uint32(lenght - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := s + repOff
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 0
seq.offset = 1
if debugSequences {
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Index match start+1 (long) -> s - 1
index0 := s + repOff
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
}
break encodeLoop
}
// Index skipped...
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
}
cv = load6432(src, s)
continue
}
const repOff2 = 1
// We deviate from the reference encoder and also check offset 2.
// Still slower and not much better, so disabled.
// repIndex = s - offset2 + repOff2
if false && repIndex >= 0 && load6432(src, repIndex) == load6432(src, s+repOff) {
// Consider history as well.
var seq seq
lenght := 8 + e.matchlen(s+8+repOff2, repIndex+8, src)
seq.matchLen = uint32(lenght - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := s + repOff2
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 2
seq.offset = 2
if debugSequences {
println("repeat sequence 2", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
index0 := s + repOff2
s += lenght + repOff2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
}
break encodeLoop
}
// Index skipped...
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
}
cv = load6432(src, s)
// Swap offsets
offset1, offset2 = offset2, offset1
continue
}
}
// Find the offsets of our two matches.
coffsetL := candidateL.offset - e.cur
coffsetLP := candidateL.prev - e.cur
// Check if we have a long match.
if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
// Found a long match, at least 8 bytes.
matched = e.matchlen(s+8, coffsetL+8, src) + 8
t = coffsetL
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
println("long match")
}
if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
// Found a long match, at least 8 bytes.
prevMatch := e.matchlen(s+8, coffsetLP+8, src) + 8
if prevMatch > matched {
matched = prevMatch
t = coffsetLP
}
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
println("long match")
}
}
break
}
// Check if we have a long match on prev.
if s-coffsetLP < e.maxMatchOff && cv == load6432(src, coffsetLP) {
// Found a long match, at least 8 bytes.
matched = e.matchlen(s+8, coffsetLP+8, src) + 8
t = coffsetLP
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
println("long match")
}
break
}
coffsetS := candidateS.offset - e.cur
// Check if we have a short match.
if s-coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
// found a regular match
matched = e.matchlen(s+4, coffsetS+4, src) + 4
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, betterLongTableBits)
candidateL = e.longTable[nextHashL]
coffsetL = candidateL.offset - e.cur
// We can store it, since we have at least a 4 byte match.
e.longTable[nextHashL] = prevEntry{offset: s + checkAt + e.cur, prev: candidateL.offset}
e.markLongShardDirty(nextHashL)
if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
// Found a long match, at least 8 bytes.
matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
if matchedNext > matched {
t = coffsetL
s += checkAt
matched = matchedNext
if debugMatches {
println("long match (after short)")
}
break
}
}
// Check prev long...
coffsetL = candidateL.prev - e.cur
if s-coffsetL < e.maxMatchOff && cv == load6432(src, coffsetL) {
// Found a long match, at least 8 bytes.
matchedNext := e.matchlen(s+8+checkAt, coffsetL+8, src) + 8
if matchedNext > matched {
t = coffsetL
s += checkAt
matched = matchedNext
if debugMatches {
println("prev long match (after short)")
}
break
}
}
t = coffsetS
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugAsserts && t < 0 {
panic("t<0")
}
if debugMatches {
println("short match")
}
break
}
// No match found, move forward in input.
s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
if s >= sLimit {
break encodeLoop
}
cv = load6432(src, s)
}
// Try to find a better match by searching for a long match at the end of the current best match
if s+matched < sLimit {
nextHashL := hash8(load6432(src, s+matched), betterLongTableBits)
cv := load3232(src, s)
candidateL := e.longTable[nextHashL]
coffsetL := candidateL.offset - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("long match at end-of-match")
}
}
}
// Check prev long...
if true {
coffsetL = candidateL.prev - e.cur - matched
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
// Found a long match, at least 4 bytes.
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
if matchedNext > matched {
t = coffsetL
matched = matchedNext
if debugMatches {
println("prev long match at end-of-match")
}
}
}
}
}
// A match has been found. Update recent offsets.
offset2 = offset1
offset1 = s - t
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
// Extend the n-byte match as long as possible.
l := matched
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}
// Write our sequence
var seq seq
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
blk.literals = append(blk.literals, src[nextEmit:s]...)
}
seq.offset = uint32(s-t) + 3
s += l
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
}
// Index match start+1 (long) -> s - 1
index0 := s - l + 1
for index0 < s-1 {
cv0 := load6432(src, index0)
cv1 := cv0 >> 8
h0 := hash8(cv0, betterLongTableBits)
off := index0 + e.cur
e.longTable[h0] = prevEntry{offset: off, prev: e.longTable[h0].offset}
e.markLongShardDirty(h0)
h1 := hash5(cv1, betterShortTableBits)
e.table[h1] = tableEntry{offset: off + 1, val: uint32(cv1)}
e.markShortShardDirty(h1)
index0 += 2
}
cv = load6432(src, s)
if !canRepeat {
continue
}
// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
// Store this, since we have it.
nextHashS := hash5(cv, betterShortTableBits)
nextHashL := hash8(cv, betterLongTableBits)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
e.longTable[nextHashL] = prevEntry{offset: s + e.cur, prev: e.longTable[nextHashL].offset}
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.markShortShardDirty(nextHashS)
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
seq.offset = 1
s += l
nextEmit = s
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Swap offset 1 and 2.
offset1, offset2 = offset2, offset1
if s >= sLimit {
// Finished
break encodeLoop
}
cv = load6432(src, s)
}
}
if int(nextEmit) < len(src) {
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
// ResetDict will reset and set a dictionary if not nil
func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d != nil {
panic("betterFastEncoder: Reset with dict")
}
}
// ResetDict will reset and set a dictionary if not nil
func (e *betterFastEncoderDict) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d == nil {
return
@ -557,6 +1135,7 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
e.allDirty = true
}
// Init or copy dict table
@ -585,11 +1164,72 @@ func (e *betterFastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
e.allDirty = true
}
// Reset table to initial state
copy(e.longTable[:], e.dictLongTable)
e.cur = e.maxMatchOff
// Reset table to initial state
copy(e.table[:], e.dictTable)
{
dirtyShardCnt := 0
if !e.allDirty {
for i := range e.shortTableShardDirty {
if e.shortTableShardDirty[i] {
dirtyShardCnt++
}
}
}
const shardCnt = betterShortTableShardCnt
const shardSize = betterShortTableShardSize
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
copy(e.table[:], e.dictTable)
for i := range e.shortTableShardDirty {
e.shortTableShardDirty[i] = false
}
} else {
for i := range e.shortTableShardDirty {
if !e.shortTableShardDirty[i] {
continue
}
copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
e.shortTableShardDirty[i] = false
}
}
}
{
dirtyShardCnt := 0
if !e.allDirty {
for i := range e.shortTableShardDirty {
if e.shortTableShardDirty[i] {
dirtyShardCnt++
}
}
}
const shardCnt = betterLongTableShardCnt
const shardSize = betterLongTableShardSize
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
copy(e.longTable[:], e.dictLongTable)
for i := range e.longTableShardDirty {
e.longTableShardDirty[i] = false
}
} else {
for i := range e.longTableShardDirty {
if !e.longTableShardDirty[i] {
continue
}
copy(e.longTable[i*shardSize:(i+1)*shardSize], e.dictLongTable[i*shardSize:(i+1)*shardSize])
e.longTableShardDirty[i] = false
}
}
}
e.cur = e.maxMatchOff
e.allDirty = false
}
func (e *betterFastEncoderDict) markLongShardDirty(entryNum uint32) {
e.longTableShardDirty[entryNum/betterLongTableShardSize] = true
}
func (e *betterFastEncoderDict) markShortShardDirty(entryNum uint32) {
e.shortTableShardDirty[entryNum/betterShortTableShardSize] = true
}

View File

@ -11,6 +11,9 @@ const (
dFastLongTableSize = 1 << dFastLongTableBits // Size of the table
dFastLongTableMask = dFastLongTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
dLongTableShardCnt = 1 << (dFastLongTableBits - dictShardBits) // Number of shards in the table
dLongTableShardSize = dFastLongTableSize / tableShardCnt // Size of an individual shard
dFastShortTableBits = tableBits // Bits used in the short match table
dFastShortTableSize = 1 << dFastShortTableBits // Size of the table
dFastShortTableMask = dFastShortTableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
@ -18,8 +21,14 @@ const (
type doubleFastEncoder struct {
fastEncoder
longTable [dFastLongTableSize]tableEntry
dictLongTable []tableEntry
longTable [dFastLongTableSize]tableEntry
}
type doubleFastEncoderDict struct {
fastEncoderDict
longTable [dFastLongTableSize]tableEntry
dictLongTable []tableEntry
longTableShardDirty [dLongTableShardCnt]bool
}
// Encode mimmics functionality in zstd_dfast.c
@ -678,9 +687,379 @@ encodeLoop:
}
}
// Encode will encode the content, with a dictionary if initialized for it.
func (e *doubleFastEncoderDict) Encode(blk *blockEnc, src []byte) {
const (
// Input margin is the number of bytes we read (8)
// and the maximum we will read ahead (2)
inputMargin = 8 + 2
minNonLiteralBlockSize = 16
)
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
for i := range e.longTable[:] {
e.longTable[i] = tableEntry{}
}
e.markAllShardsDirty()
e.cur = e.maxMatchOff
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
for i := range e.table[:] {
v := e.table[i].offset
if v < minOff {
v = 0
} else {
v = v - e.cur + e.maxMatchOff
}
e.table[i].offset = v
}
for i := range e.longTable[:] {
v := e.longTable[i].offset
if v < minOff {
v = 0
} else {
v = v - e.cur + e.maxMatchOff
}
e.longTable[i].offset = v
}
e.markAllShardsDirty()
e.cur = e.maxMatchOff
break
}
s := e.addBlock(src)
blk.size = len(src)
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
copy(blk.literals, src)
return
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 1.
const stepSize = 1
const kSearchStrength = 8
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])
addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
}
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
println("recent offsets:", blk.recentOffsets)
}
encodeLoop:
for {
var t int32
// We allow the encoder to optionally turn off repeat offsets across blocks
canRepeat := len(blk.sequences) > 2
for {
if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
candidateL := e.longTable[nextHashL]
candidateS := e.table[nextHashS]
const repOff = 1
repIndex := s - offset1 + repOff
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.longTable[nextHashL] = entry
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = entry
e.markShardDirty(nextHashS)
if canRepeat {
if repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>(repOff*8)) {
// Consider history as well.
var seq seq
lenght := 4 + e.matchlen(s+4+repOff, repIndex+4, src)
seq.matchLen = uint32(lenght - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := s + repOff
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for repIndex > tMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch-1 {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 0
seq.offset = 1
if debugSequences {
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
s += lenght + repOff
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, lenght)
}
break encodeLoop
}
cv = load6432(src, s)
continue
}
}
// Find the offsets of our two matches.
coffsetL := s - (candidateL.offset - e.cur)
coffsetS := s - (candidateS.offset - e.cur)
// Check if we have a long match.
if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
// Found a long match, likely at least 8 bytes.
// Reference encoder checks all 8 bytes, we only check 4,
// but the likelihood of both the first 4 bytes and the hash matching should be enough.
t = candidateL.offset - e.cur
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugMatches {
println("long match")
}
break
}
// Check if we have a short match.
if coffsetS < e.maxMatchOff && uint32(cv) == candidateS.val {
// found a regular match
// See if we can find a long match at s+1
const checkAt = 1
cv := load6432(src, s+checkAt)
nextHashL = hash8(cv, dFastLongTableBits)
candidateL = e.longTable[nextHashL]
coffsetL = s - (candidateL.offset - e.cur) + checkAt
// We can store it, since we have at least a 4 byte match.
e.longTable[nextHashL] = tableEntry{offset: s + checkAt + e.cur, val: uint32(cv)}
e.markLongShardDirty(nextHashL)
if coffsetL < e.maxMatchOff && uint32(cv) == candidateL.val {
// Found a long match, likely at least 8 bytes.
// Reference encoder checks all 8 bytes, we only check 4,
// but the likelihood of both the first 4 bytes and the hash matching should be enough.
t = candidateL.offset - e.cur
s += checkAt
if debugMatches {
println("long match (after short)")
}
break
}
t = candidateS.offset - e.cur
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugAsserts && t < 0 {
panic("t<0")
}
if debugMatches {
println("short match")
}
break
}
// No match found, move forward in input.
s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
if s >= sLimit {
break encodeLoop
}
cv = load6432(src, s)
}
// A 4-byte match has been found. Update recent offsets.
// We'll later see if more than 4 bytes.
offset2 = offset1
offset1 = s - t
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
// Extend the 4-byte match as long as possible.
l := e.matchlen(s+4, t+4, src) + 4
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}
// Write our sequence
var seq seq
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
blk.literals = append(blk.literals, src[nextEmit:s]...)
}
seq.offset = uint32(s-t) + 3
s += l
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
}
// Index match start+1 (long) and start+2 (short)
index0 := s - l + 1
// Index match end-2 (long) and end-1 (short)
index1 := s - 2
cv0 := load6432(src, index0)
cv1 := load6432(src, index1)
te0 := tableEntry{offset: index0 + e.cur, val: uint32(cv0)}
te1 := tableEntry{offset: index1 + e.cur, val: uint32(cv1)}
longHash1 := hash8(cv0, dFastLongTableBits)
longHash2 := hash8(cv0, dFastLongTableBits)
e.longTable[longHash1] = te0
e.longTable[longHash2] = te1
e.markLongShardDirty(longHash1)
e.markLongShardDirty(longHash2)
cv0 >>= 8
cv1 >>= 8
te0.offset++
te1.offset++
te0.val = uint32(cv0)
te1.val = uint32(cv1)
hashVal1 := hash5(cv0, dFastShortTableBits)
hashVal2 := hash5(cv1, dFastShortTableBits)
e.table[hashVal1] = te0
e.markShardDirty(hashVal1)
e.table[hashVal2] = te1
e.markShardDirty(hashVal2)
cv = load6432(src, s)
if !canRepeat {
continue
}
// Check offset 2
for {
o2 := s - offset2
if load3232(src, o2) != uint32(cv) {
// Do regular search
break
}
// Store this, since we have it.
nextHashS := hash5(cv, dFastShortTableBits)
nextHashL := hash8(cv, dFastLongTableBits)
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
l := 4 + e.matchlen(s+4, o2+4, src)
entry := tableEntry{offset: s + e.cur, val: uint32(cv)}
e.longTable[nextHashL] = entry
e.markLongShardDirty(nextHashL)
e.table[nextHashS] = entry
e.markShardDirty(nextHashS)
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
seq.offset = 1
s += l
nextEmit = s
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Swap offset 1 and 2.
offset1, offset2 = offset2, offset1
if s >= sLimit {
// Finished
break encodeLoop
}
cv = load6432(src, s)
}
}
if int(nextEmit) < len(src) {
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
// If we encoded more than 64K mark all dirty.
if len(src) > 64<<10 {
e.markAllShardsDirty()
}
}
// ResetDict will reset and set a dictionary if not nil
func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
e.fastEncoder.Reset(d, singleBlock)
if d != nil {
panic("doubleFastEncoder: Reset with dict not supported")
}
}
// ResetDict will reset and set a dictionary if not nil
func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
allDirty := e.allDirty
e.fastEncoderDict.Reset(d, singleBlock)
if d == nil {
return
}
@ -706,8 +1085,37 @@ func (e *doubleFastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
e.allDirty = true
}
// Reset table to initial state
e.cur = e.maxMatchOff
copy(e.longTable[:], e.dictLongTable)
dirtyShardCnt := 0
if !allDirty {
for i := range e.longTableShardDirty {
if e.longTableShardDirty[i] {
dirtyShardCnt++
}
}
}
if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
copy(e.longTable[:], e.dictLongTable)
for i := range e.longTableShardDirty {
e.longTableShardDirty[i] = false
}
return
}
for i := range e.longTableShardDirty {
if !e.longTableShardDirty[i] {
continue
}
copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
e.longTableShardDirty[i] = false
}
}
func (e *doubleFastEncoderDict) markLongShardDirty(entryNum uint32) {
e.longTableShardDirty[entryNum/dLongTableShardSize] = true
}

View File

@ -11,9 +11,11 @@ import (
)
const (
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
tableBits = 15 // Bits used in the table
tableSize = 1 << tableBits // Size of the table
tableShardCnt = 1 << (tableBits - dictShardBits) // Number of shards in the table
tableShardSize = tableSize / tableShardCnt // Size of an individual shard
tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks.
maxMatchLength = 131074
)
@ -24,8 +26,14 @@ type tableEntry struct {
type fastEncoder struct {
fastBase
table [tableSize]tableEntry
dictTable []tableEntry
table [tableSize]tableEntry
}
type fastEncoderDict struct {
fastEncoder
dictTable []tableEntry
tableShardDirty [tableShardCnt]bool
allDirty bool
}
// Encode mimmics functionality in zstd_fast.c
@ -78,7 +86,7 @@ func (e *fastEncoder) Encode(blk *blockEnc, src []byte) {
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 8
const kSearchStrength = 7
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
@ -617,8 +625,322 @@ encodeLoop:
}
}
// Encode will encode the content, with a dictionary if initialized for it.
func (e *fastEncoderDict) Encode(blk *blockEnc, src []byte) {
const (
inputMargin = 8
minNonLiteralBlockSize = 1 + 1 + inputMargin
)
if e.allDirty || len(src) > 32<<10 {
e.fastEncoder.Encode(blk, src)
e.allDirty = true
return
}
// Protect against e.cur wraparound.
for e.cur >= bufferReset {
if len(e.hist) == 0 {
for i := range e.table[:] {
e.table[i] = tableEntry{}
}
e.cur = e.maxMatchOff
break
}
// Shift down everything in the table that isn't already too far away.
minOff := e.cur + int32(len(e.hist)) - e.maxMatchOff
for i := range e.table[:] {
v := e.table[i].offset
if v < minOff {
v = 0
} else {
v = v - e.cur + e.maxMatchOff
}
e.table[i].offset = v
}
e.cur = e.maxMatchOff
break
}
s := e.addBlock(src)
blk.size = len(src)
if len(src) < minNonLiteralBlockSize {
blk.extraLits = len(src)
blk.literals = blk.literals[:len(src)]
copy(blk.literals, src)
return
}
// Override src
src = e.hist
sLimit := int32(len(src)) - inputMargin
// stepSize is the number of bytes to skip on every main loop iteration.
// It should be >= 2.
const stepSize = 2
// TEMPLATE
const hashLog = tableBits
// seems global, but would be nice to tweak.
const kSearchStrength = 7
// nextEmit is where in src the next emitLiteral should start from.
nextEmit := s
cv := load6432(src, s)
// Relative offsets
offset1 := int32(blk.recentOffsets[0])
offset2 := int32(blk.recentOffsets[1])
addLiterals := func(s *seq, until int32) {
if until == nextEmit {
return
}
blk.literals = append(blk.literals, src[nextEmit:until]...)
s.litLen = uint32(until - nextEmit)
}
if debug {
println("recent offsets:", blk.recentOffsets)
}
encodeLoop:
for {
// t will contain the match offset when we find one.
// When existing the search loop, we have already checked 4 bytes.
var t int32
// We will not use repeat offsets across blocks.
// By not using them for the first 3 matches
canRepeat := len(blk.sequences) > 2
for {
if debugAsserts && canRepeat && offset1 == 0 {
panic("offset0 was 0")
}
nextHash := hash6(cv, hashLog)
nextHash2 := hash6(cv>>8, hashLog)
candidate := e.table[nextHash]
candidate2 := e.table[nextHash2]
repIndex := s - offset1 + 2
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.markShardDirty(nextHash)
e.table[nextHash2] = tableEntry{offset: s + e.cur + 1, val: uint32(cv >> 8)}
e.markShardDirty(nextHash2)
if canRepeat && repIndex >= 0 && load3232(src, repIndex) == uint32(cv>>16) {
// Consider history as well.
var seq seq
var length int32
// length = 4 + e.matchlen(s+6, repIndex+4, src)
{
a := src[s+6:]
b := src[repIndex+4:]
endI := len(a) & (math.MaxInt32 - 7)
length = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
length = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
seq.matchLen = uint32(length - zstdMinMatch)
// We might be able to match backwards.
// Extend as long as we can.
start := s + 2
// We end the search early, so we don't risk 0 literals
// and have to do special offset treatment.
startLimit := nextEmit + 1
sMin := s - e.maxMatchOff
if sMin < 0 {
sMin = 0
}
for repIndex > sMin && start > startLimit && src[repIndex-1] == src[start-1] && seq.matchLen < maxMatchLength-zstdMinMatch {
repIndex--
start--
seq.matchLen++
}
addLiterals(&seq, start)
// rep 0
seq.offset = 1
if debugSequences {
println("repeat sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
s += length + 2
nextEmit = s
if s >= sLimit {
if debug {
println("repeat ended", s, length)
}
break encodeLoop
}
cv = load6432(src, s)
continue
}
coffset0 := s - (candidate.offset - e.cur)
coffset1 := s - (candidate2.offset - e.cur) + 1
if coffset0 < e.maxMatchOff && uint32(cv) == candidate.val {
// found a regular match
t = candidate.offset - e.cur
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
break
}
if coffset1 < e.maxMatchOff && uint32(cv>>8) == candidate2.val {
// found a regular match
t = candidate2.offset - e.cur
s++
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && s-t > e.maxMatchOff {
panic("s - t >e.maxMatchOff")
}
if debugAsserts && t < 0 {
panic("t<0")
}
break
}
s += stepSize + ((s - nextEmit) >> (kSearchStrength - 1))
if s >= sLimit {
break encodeLoop
}
cv = load6432(src, s)
}
// A 4-byte match has been found. We'll later see if more than 4 bytes.
offset2 = offset1
offset1 = s - t
if debugAsserts && s <= t {
panic(fmt.Sprintf("s (%d) <= t (%d)", s, t))
}
if debugAsserts && canRepeat && int(offset1) > len(src) {
panic("invalid offset")
}
// Extend the 4-byte match as long as possible.
//l := e.matchlen(s+4, t+4, src) + 4
var l int32
{
a := src[s+4:]
b := src[t+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Extend backwards
tMin := s - e.maxMatchOff
if tMin < 0 {
tMin = 0
}
for t > tMin && s > nextEmit && src[t-1] == src[s-1] && l < maxMatchLength {
s--
t--
l++
}
// Write our sequence.
var seq seq
seq.litLen = uint32(s - nextEmit)
seq.matchLen = uint32(l - zstdMinMatch)
if seq.litLen > 0 {
blk.literals = append(blk.literals, src[nextEmit:s]...)
}
// Don't use repeat offsets
seq.offset = uint32(s-t) + 3
s += l
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
nextEmit = s
if s >= sLimit {
break encodeLoop
}
cv = load6432(src, s)
// Check offset 2
if o2 := s - offset2; canRepeat && load3232(src, o2) == uint32(cv) {
// We have at least 4 byte match.
// No need to check backwards. We come straight from a match
//l := 4 + e.matchlen(s+4, o2+4, src)
var l int32
{
a := src[s+4:]
b := src[o2+4:]
endI := len(a) & (math.MaxInt32 - 7)
l = int32(endI) + 4
for i := 0; i < endI; i += 8 {
if diff := load64(a, i) ^ load64(b, i); diff != 0 {
l = int32(i+bits.TrailingZeros64(diff)>>3) + 4
break
}
}
}
// Store this, since we have it.
nextHash := hash6(cv, hashLog)
e.table[nextHash] = tableEntry{offset: s + e.cur, val: uint32(cv)}
e.markShardDirty(nextHash)
seq.matchLen = uint32(l) - zstdMinMatch
seq.litLen = 0
// Since litlen is always 0, this is offset 1.
seq.offset = 1
s += l
nextEmit = s
if debugSequences {
println("sequence", seq, "next s:", s)
}
blk.sequences = append(blk.sequences, seq)
// Swap offset 1 and 2.
offset1, offset2 = offset2, offset1
if s >= sLimit {
break encodeLoop
}
// Prepare next loop.
cv = load6432(src, s)
}
}
if int(nextEmit) < len(src) {
blk.literals = append(blk.literals, src[nextEmit:]...)
blk.extraLits = len(src) - int(nextEmit)
}
blk.recentOffsets[0] = uint32(offset1)
blk.recentOffsets[1] = uint32(offset2)
if debug {
println("returning, recent offsets:", blk.recentOffsets, "extra literals:", blk.extraLits)
}
}
// ResetDict will reset and set a dictionary if not nil
func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d != nil {
panic("fastEncoder: Reset with dict")
}
}
// ResetDict will reset and set a dictionary if not nil
func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
e.resetBase(d, singleBlock)
if d == nil {
return
@ -653,9 +975,44 @@ func (e *fastEncoder) Reset(d *dict, singleBlock bool) {
}
}
e.lastDictID = d.id
e.allDirty = true
}
e.cur = e.maxMatchOff
// Reset table to initial state
copy(e.table[:], e.dictTable)
dirtyShardCnt := 0
if !e.allDirty {
for i := range e.tableShardDirty {
if e.tableShardDirty[i] {
dirtyShardCnt++
}
}
}
const shardCnt = tableShardCnt
const shardSize = tableShardSize
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
copy(e.table[:], e.dictTable)
for i := range e.tableShardDirty {
e.tableShardDirty[i] = false
}
e.allDirty = false
return
}
for i := range e.tableShardDirty {
if !e.tableShardDirty[i] {
continue
}
copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
e.tableShardDirty[i] = false
}
e.allDirty = false
}
func (e *fastEncoderDict) markAllShardsDirty() {
e.allDirty = true
}
func (e *fastEncoderDict) markShardDirty(entryNum uint32) {
e.tableShardDirty[entryNum/tableShardSize] = true
}

View File

@ -106,7 +106,7 @@ func (e *Encoder) Reset(w io.Writer) {
s.encoder = e.o.encoder()
}
if s.writing == nil {
s.writing = &blockEnc{}
s.writing = &blockEnc{lowMem: e.o.lowMem}
s.writing.init()
}
s.writing.initNewEncode()
@ -176,6 +176,12 @@ func (e *Encoder) nextBlock(final bool) error {
}
if !s.headerWritten {
// If we have a single block encode, do a sync compression.
if final && len(s.filling) == 0 && !e.o.fullZero {
s.headerWritten = true
s.fullFrameWritten = true
s.eofWritten = true
return nil
}
if final && len(s.filling) > 0 {
s.current = e.EncodeAll(s.filling, s.current[:0])
var n2 int
@ -334,13 +340,13 @@ func (e *Encoder) ReadFrom(r io.Reader) (n int64, err error) {
println("ReadFrom: got EOF final block:", len(e.state.filling))
}
return n, nil
case nil:
default:
if debug {
println("ReadFrom: got error:", err)
}
e.state.err = err
return n, err
case nil:
}
if len(src) > 0 {
if debug {
@ -471,7 +477,7 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
}
// If less than 1MB, allocate a buffer up front.
if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 {
if len(dst) == 0 && cap(dst) == 0 && len(src) < 1<<20 && !e.o.lowMem {
dst = make([]byte, 0, len(src))
}
dst, err := fh.appendTo(dst)

View File

@ -24,12 +24,12 @@ type encoderOptions struct {
allLitEntropy bool
customWindow bool
customALEntropy bool
lowMem bool
dict *dict
}
func (o *encoderOptions) setDefault() {
*o = encoderOptions{
// use less ram: true for now, but may change.
concurrent: runtime.GOMAXPROCS(0),
crc: true,
single: nil,
@ -37,18 +37,31 @@ func (o *encoderOptions) setDefault() {
windowSize: 8 << 20,
level: SpeedDefault,
allLitEntropy: true,
lowMem: false,
}
}
// encoder returns an encoder with the selected options.
func (o encoderOptions) encoder() encoder {
switch o.level {
case SpeedDefault:
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}}
case SpeedBetterCompression:
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
case SpeedFastest:
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize)}}
if o.dict != nil {
return &fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
}
return &fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
case SpeedDefault:
if o.dict != nil {
return &doubleFastEncoderDict{fastEncoderDict: fastEncoderDict{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}}
}
return &doubleFastEncoder{fastEncoder: fastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
case SpeedBetterCompression:
if o.dict != nil {
return &betterFastEncoderDict{betterFastEncoder: betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}}
}
return &betterFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
case SpeedBestCompression:
return &bestFastEncoder{fastBase: fastBase{maxMatchOff: int32(o.windowSize), lowMem: o.lowMem}}
}
panic("unknown compression level")
}
@ -60,7 +73,7 @@ func WithEncoderCRC(b bool) EOption {
}
// WithEncoderConcurrency will set the concurrency,
// meaning the maximum number of decoders to run concurrently.
// meaning the maximum number of encoders to run concurrently.
// The value supplied must be at least 1.
// By default this will be set to GOMAXPROCS.
func WithEncoderConcurrency(n int) EOption {
@ -143,20 +156,20 @@ const (
// By using this, notice that CPU usage may go up in the future.
SpeedBetterCompression
// SpeedBestCompression will choose the best available compression option.
// This will offer the best compression no matter the CPU cost.
SpeedBestCompression
// speedLast should be kept as the last actual compression option.
// The is not for external usage, but is used to keep track of the valid options.
speedLast
// SpeedBestCompression will choose the best available compression option.
// For now this is not implemented.
SpeedBestCompression = SpeedBetterCompression
)
// EncoderLevelFromString will convert a string representation of an encoding level back
// to a compression level. The compare is not case sensitive.
// If the string wasn't recognized, (false, SpeedDefault) will be returned.
func EncoderLevelFromString(s string) (bool, EncoderLevel) {
for l := EncoderLevel(speedNotSet + 1); l < speedLast; l++ {
for l := speedNotSet + 1; l < speedLast; l++ {
if strings.EqualFold(s, l.String()) {
return true, l
}
@ -173,7 +186,9 @@ func EncoderLevelFromZstd(level int) EncoderLevel {
return SpeedFastest
case level >= 3 && level < 6:
return SpeedDefault
case level > 5:
case level >= 6 && level < 10:
return SpeedBetterCompression
case level >= 10:
return SpeedBetterCompression
}
return SpeedDefault
@ -188,6 +203,8 @@ func (e EncoderLevel) String() string {
return "default"
case SpeedBetterCompression:
return "better"
case SpeedBestCompression:
return "best"
default:
return "invalid"
}
@ -209,6 +226,8 @@ func WithEncoderLevel(l EncoderLevel) EOption {
o.windowSize = 8 << 20
case SpeedBetterCompression:
o.windowSize = 16 << 20
case SpeedBestCompression:
o.windowSize = 32 << 20
}
}
if !o.customALEntropy {
@ -268,6 +287,17 @@ func WithSingleSegment(b bool) EOption {
}
}
// WithLowerEncoderMem will trade in some memory cases trade less memory usage for
// slower encoding speed.
// This will not change the window size which is the primary function for reducing
// memory usage. See WithWindowSize.
func WithLowerEncoderMem(b bool) EOption {
return func(o *encoderOptions) error {
o.lowMem = b
return nil
}
}
// WithEncoderDict allows to register a dictionary that will be used for the encode.
// The encoder *may* choose to use no dictionary instead for certain payloads.
func WithEncoderDict(dict []byte) EOption {

View File

@ -80,9 +80,14 @@ func (d *frameDec) reset(br byteBuffer) error {
d.WindowSize = 0
var b []byte
for {
b = br.readSmall(4)
if b == nil {
var err error
b, err = br.readSmall(4)
switch err {
case io.EOF, io.ErrUnexpectedEOF:
return io.EOF
default:
return err
case nil:
}
if !bytes.Equal(b[1:4], skippableFrameMagic) || b[0]&0xf0 != 0x50 {
if debug {
@ -92,14 +97,14 @@ func (d *frameDec) reset(br byteBuffer) error {
break
}
// Read size to skip
b = br.readSmall(4)
if b == nil {
println("Reading Frame Size EOF")
return io.ErrUnexpectedEOF
b, err = br.readSmall(4)
if err != nil {
println("Reading Frame Size", err)
return err
}
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
println("Skipping frame with", n, "bytes.")
err := br.skipN(int(n))
err = br.skipN(int(n))
if err != nil {
if debug {
println("Reading discarded frame", err)
@ -121,7 +126,7 @@ func (d *frameDec) reset(br byteBuffer) error {
d.SingleSegment = fhd&(1<<5) != 0
if fhd&(1<<3) != 0 {
return errors.New("Reserved bit set on frame header")
return errors.New("reserved bit set on frame header")
}
// Read Window_Descriptor
@ -147,12 +152,11 @@ func (d *frameDec) reset(br byteBuffer) error {
if size == 3 {
size = 4
}
b = br.readSmall(int(size))
if b == nil {
if debug {
println("Reading Dictionary_ID", io.ErrUnexpectedEOF)
}
return io.ErrUnexpectedEOF
b, err = br.readSmall(int(size))
if err != nil {
println("Reading Dictionary_ID", err)
return err
}
var id uint32
switch size {
@ -187,10 +191,10 @@ func (d *frameDec) reset(br byteBuffer) error {
}
d.FrameContentSize = 0
if fcsSize > 0 {
b := br.readSmall(fcsSize)
if b == nil {
println("Reading Frame content", io.ErrUnexpectedEOF)
return io.ErrUnexpectedEOF
b, err = br.readSmall(fcsSize)
if err != nil {
println("Reading Frame content", err)
return err
}
switch fcsSize {
case 1:
@ -307,10 +311,10 @@ func (d *frameDec) checkCRC() error {
tmp[3] = byte(got >> 24)
// We can overwrite upper tmp now
want := d.rawInput.readSmall(4)
if want == nil {
println("CRC missing?")
return io.ErrUnexpectedEOF
want, err := d.rawInput.readSmall(4)
if err != nil {
println("CRC missing?", err)
return err
}
if !bytes.Equal(tmp[:], want) {

View File

@ -97,7 +97,7 @@ func (s *fseEncoder) prepare() (*fseEncoder, error) {
func (s *fseEncoder) allocCtable() {
tableSize := 1 << s.actualTableLog
// get tableSymbol that is big enough.
if cap(s.ct.tableSymbol) < int(tableSize) {
if cap(s.ct.tableSymbol) < tableSize {
s.ct.tableSymbol = make([]byte, tableSize)
}
s.ct.tableSymbol = s.ct.tableSymbol[:tableSize]
@ -202,13 +202,13 @@ func (s *fseEncoder) buildCTable() error {
case 0:
case -1, 1:
symbolTT[i].deltaNbBits = tl
symbolTT[i].deltaFindState = int16(total - 1)
symbolTT[i].deltaFindState = total - 1
total++
default:
maxBitsOut := uint32(tableLog) - highBit(uint32(v-1))
minStatePlus := uint32(v) << maxBitsOut
symbolTT[i].deltaNbBits = (maxBitsOut << 16) - minStatePlus
symbolTT[i].deltaFindState = int16(total - v)
symbolTT[i].deltaFindState = total - v
total += v
}
}
@ -353,8 +353,8 @@ func (s *fseEncoder) normalizeCount2(length int) error {
distributed uint32
total = uint32(length)
tableLog = s.actualTableLog
lowThreshold = uint32(total >> tableLog)
lowOne = uint32((total * 3) >> (tableLog + 1))
lowThreshold = total >> tableLog
lowOne = (total * 3) >> (tableLog + 1)
)
for i, cnt := range s.count[:s.symbolLen] {
if cnt == 0 {
@ -379,7 +379,7 @@ func (s *fseEncoder) normalizeCount2(length int) error {
if (total / toDistribute) > lowOne {
// risk of rounding to zero
lowOne = uint32((total * 3) / (toDistribute * 2))
lowOne = (total * 3) / (toDistribute * 2)
for i, cnt := range s.count[:s.symbolLen] {
if (s.norm[i] == notYetAssigned) && (cnt <= lowOne) {
s.norm[i] = 1
@ -708,7 +708,6 @@ func (c *cState) init(bw *bitWriter, ct *cTable, first symbolTransform) {
im := int32((nbBitsOut << 16) - first.deltaNbBits)
lu := (im >> nbBitsOut) + int32(first.deltaFindState)
c.state = c.stateTable[lu]
return
}
// encode the output symbol provided and write it to the bitstream.

View File

@ -59,7 +59,7 @@ func fillBase(dst []baseOffset, base uint32, bits ...uint8) {
}
for i, bit := range bits {
if base > math.MaxInt32 {
panic(fmt.Sprintf("invalid decoding table, base overflows int32"))
panic("invalid decoding table, base overflows int32")
}
dst[i] = baseOffset{

View File

@ -181,11 +181,18 @@ func (s *sequenceDecs) decode(seqs int, br *bitReader, hist []byte) error {
return fmt.Errorf("output (%d) bigger than max block size", size)
}
if size > cap(s.out) {
// Not enough size, will be extremely rarely triggered,
// Not enough size, which can happen under high volume block streaming conditions
// but could be if destination slice is too small for sync operations.
// We add maxBlockSize to the capacity.
s.out = append(s.out, make([]byte, maxBlockSize)...)
s.out = s.out[:len(s.out)-maxBlockSize]
// over-allocating here can create a large amount of GC pressure so we try to keep
// it as contained as possible
used := len(s.out) - startSize
addBytes := 256 + ll + ml + used>>2
// Clamp to max block size.
if used+addBytes > maxBlockSize {
addBytes = maxBlockSize - used
}
s.out = append(s.out, make([]byte, addBytes)...)
s.out = s.out[:len(s.out)-addBytes]
}
if ml > maxMatchLen {
return fmt.Errorf("match len (%d) bigger than max allowed length", ml)

View File

@ -35,7 +35,6 @@ func (s *seqCoders) setPrev(ll, ml, of *fseEncoder) {
// Ensure we cannot reuse by accident
prevEnc := *prev
prevEnc.symbolLen = 0
return
}
compareSwap(ll, &s.llEnc, &s.llPrev)
compareSwap(ml, &s.mlEnc, &s.mlPrev)

View File

@ -10,8 +10,8 @@ import (
"hash/crc32"
"io"
"github.com/golang/snappy"
"github.com/klauspost/compress/huff0"
"github.com/klauspost/compress/snappy"
)
const (
@ -185,7 +185,6 @@ func (r *SnappyConverter) Convert(in io.Reader, w io.Writer) (int64, error) {
r.block.reset(nil)
r.block.literals, err = snappy.Decode(r.block.literals[:n], r.buf[snappyChecksumSize:chunkLen])
if err != nil {
println("snappy.Decode:", err)
return written, err
}
err = r.block.encodeLits(r.block.literals, false)
@ -417,7 +416,7 @@ var crcTable = crc32.MakeTable(crc32.Castagnoli)
// https://github.com/google/snappy/blob/master/framing_format.txt
func snappyCRC(b []byte) uint32 {
c := crc32.Update(0, crcTable, b)
return uint32(c>>15|c<<17) + 0xa282ead8
return c>>15 | c<<17 + 0xa282ead8
}
// snappyDecodedLen returns the length of the decoded block and the number of bytes

121
vendor/github.com/klauspost/compress/zstd/zip.go generated vendored Normal file
View File

@ -0,0 +1,121 @@
// Copyright 2019+ Klaus Post. All rights reserved.
// License information can be found in the LICENSE file.
package zstd
import (
"errors"
"io"
"sync"
)
// ZipMethodWinZip is the method for Zstandard compressed data inside Zip files for WinZip.
// See https://www.winzip.com/win/en/comp_info.html
const ZipMethodWinZip = 93
// ZipMethodPKWare is the original method number used by PKWARE to indicate Zstandard compression.
// Deprecated: This has been deprecated by PKWARE, use ZipMethodWinZip instead for compression.
// See https://pkware.cachefly.net/webdocs/APPNOTE/APPNOTE-6.3.9.TXT
const ZipMethodPKWare = 20
var zipReaderPool sync.Pool
// newZipReader cannot be used since we would leak goroutines...
func newZipReader(r io.Reader) io.ReadCloser {
dec, ok := zipReaderPool.Get().(*Decoder)
if ok {
dec.Reset(r)
} else {
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
if err != nil {
panic(err)
}
dec = d
}
return &pooledZipReader{dec: dec}
}
type pooledZipReader struct {
mu sync.Mutex // guards Close and Read
dec *Decoder
}
func (r *pooledZipReader) Read(p []byte) (n int, err error) {
r.mu.Lock()
defer r.mu.Unlock()
if r.dec == nil {
return 0, errors.New("Read after Close")
}
dec, err := r.dec.Read(p)
return dec, err
}
func (r *pooledZipReader) Close() error {
r.mu.Lock()
defer r.mu.Unlock()
var err error
if r.dec != nil {
err = r.dec.Reset(nil)
zipReaderPool.Put(r.dec)
r.dec = nil
}
return err
}
type pooledZipWriter struct {
mu sync.Mutex // guards Close and Read
enc *Encoder
}
func (w *pooledZipWriter) Write(p []byte) (n int, err error) {
w.mu.Lock()
defer w.mu.Unlock()
if w.enc == nil {
return 0, errors.New("Write after Close")
}
return w.enc.Write(p)
}
func (w *pooledZipWriter) Close() error {
w.mu.Lock()
defer w.mu.Unlock()
var err error
if w.enc != nil {
err = w.enc.Close()
zipReaderPool.Put(w.enc)
w.enc = nil
}
return err
}
// ZipCompressor returns a compressor that can be registered with zip libraries.
// The provided encoder options will be used on all encodes.
func ZipCompressor(opts ...EOption) func(w io.Writer) (io.WriteCloser, error) {
var pool sync.Pool
return func(w io.Writer) (io.WriteCloser, error) {
enc, ok := pool.Get().(*Encoder)
if ok {
enc.Reset(w)
} else {
var err error
enc, err = NewWriter(w, opts...)
if err != nil {
return nil, err
}
}
return &pooledZipWriter{enc: enc}, nil
}
}
// ZipDecompressor returns a decompressor that can be registered with zip libraries.
// See ZipCompressor for example.
func ZipDecompressor() func(r io.Reader) io.ReadCloser {
return func(r io.Reader) io.ReadCloser {
d, err := NewReader(r, WithDecoderConcurrency(1), WithDecoderLowmem(true))
if err != nil {
panic(err)
}
return d.IOReadCloser()
}
}

View File

@ -4,6 +4,8 @@
package zstd
import (
"bytes"
"encoding/binary"
"errors"
"log"
"math"
@ -73,6 +75,10 @@ var (
// ErrDecoderClosed will be returned if the Decoder was used after
// Close has been called.
ErrDecoderClosed = errors.New("decoder used after Close")
// ErrDecoderNilInput is returned when a nil Reader was provided
// and an operation other than Reset/DecodeAll/Close was attempted.
ErrDecoderNilInput = errors.New("nil input provided as reader")
)
func println(a ...interface{}) {
@ -121,24 +127,20 @@ func matchLen(a, b []byte) int {
}
func load3232(b []byte, i int32) uint32 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:4]
return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
return binary.LittleEndian.Uint32(b[i:])
}
func load6432(b []byte, i int32) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
return binary.LittleEndian.Uint64(b[i:])
}
func load64(b []byte, i int) uint64 {
// Help the compiler eliminate bounds checks on the read so it can be done in a single read.
b = b[i:]
b = b[:8]
return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
return binary.LittleEndian.Uint64(b[i:])
}
type byter interface {
Bytes() []byte
Len() int
}
var _ byter = &bytes.Buffer{}