mirror of
https://gitea.com/Lydanne/buildx.git
synced 2025-07-09 21:17:09 +08:00
vendor: update buildkit to master@ae9d0f5
Signed-off-by: Justin Chadwell <me@jedevc.com>
This commit is contained in:
50
vendor/github.com/klauspost/compress/README.md
generated
vendored
50
vendor/github.com/klauspost/compress/README.md
generated
vendored
@ -17,6 +17,49 @@ This package provides various compression algorithms.
|
||||
|
||||
# changelog
|
||||
|
||||
* Sept 26, 2022 (v1.15.11)
|
||||
|
||||
* flate: Improve level 1-3 compression https://github.com/klauspost/compress/pull/678
|
||||
* zstd: Improve "best" compression by @nightwolfz in https://github.com/klauspost/compress/pull/677
|
||||
* zstd: Fix+reduce decompression allocations https://github.com/klauspost/compress/pull/668
|
||||
* zstd: Fix non-effective noescape tag https://github.com/klauspost/compress/pull/667
|
||||
|
||||
* Sept 16, 2022 (v1.15.10)
|
||||
|
||||
* zstd: Add [WithDecodeAllCapLimit](https://pkg.go.dev/github.com/klauspost/compress@v1.15.10/zstd#WithDecodeAllCapLimit) https://github.com/klauspost/compress/pull/649
|
||||
* Add Go 1.19 - deprecate Go 1.16 https://github.com/klauspost/compress/pull/651
|
||||
* flate: Improve level 5+6 compression https://github.com/klauspost/compress/pull/656
|
||||
* zstd: Improve "better" compresssion https://github.com/klauspost/compress/pull/657
|
||||
* s2: Improve "best" compression https://github.com/klauspost/compress/pull/658
|
||||
* s2: Improve "better" compression. https://github.com/klauspost/compress/pull/635
|
||||
* s2: Slightly faster non-assembly decompression https://github.com/klauspost/compress/pull/646
|
||||
* Use arrays for constant size copies https://github.com/klauspost/compress/pull/659
|
||||
|
||||
* July 21, 2022 (v1.15.9)
|
||||
|
||||
* zstd: Fix decoder crash on amd64 (no BMI) on invalid input https://github.com/klauspost/compress/pull/645
|
||||
* zstd: Disable decoder extended memory copies (amd64) due to possible crashes https://github.com/klauspost/compress/pull/644
|
||||
* zstd: Allow single segments up to "max decoded size" by @klauspost in https://github.com/klauspost/compress/pull/643
|
||||
|
||||
* July 13, 2022 (v1.15.8)
|
||||
|
||||
* gzip: fix stack exhaustion bug in Reader.Read https://github.com/klauspost/compress/pull/641
|
||||
* s2: Add Index header trim/restore https://github.com/klauspost/compress/pull/638
|
||||
* zstd: Optimize seqdeq amd64 asm by @greatroar in https://github.com/klauspost/compress/pull/636
|
||||
* zstd: Improve decoder memcopy https://github.com/klauspost/compress/pull/637
|
||||
* huff0: Pass a single bitReader pointer to asm by @greatroar in https://github.com/klauspost/compress/pull/634
|
||||
* zstd: Branchless getBits for amd64 w/o BMI2 by @greatroar in https://github.com/klauspost/compress/pull/640
|
||||
* gzhttp: Remove header before writing https://github.com/klauspost/compress/pull/639
|
||||
|
||||
* June 29, 2022 (v1.15.7)
|
||||
|
||||
* s2: Fix absolute forward seeks https://github.com/klauspost/compress/pull/633
|
||||
* zip: Merge upstream https://github.com/klauspost/compress/pull/631
|
||||
* zip: Re-add zip64 fix https://github.com/klauspost/compress/pull/624
|
||||
* zstd: translate fseDecoder.buildDtable into asm by @WojciechMula in https://github.com/klauspost/compress/pull/598
|
||||
* flate: Faster histograms https://github.com/klauspost/compress/pull/620
|
||||
* deflate: Use compound hcode https://github.com/klauspost/compress/pull/622
|
||||
|
||||
* June 3, 2022 (v1.15.6)
|
||||
* s2: Improve coding for long, close matches https://github.com/klauspost/compress/pull/613
|
||||
* s2c: Add Snappy/S2 stream recompression https://github.com/klauspost/compress/pull/611
|
||||
@ -72,15 +115,15 @@ This package provides various compression algorithms.
|
||||
* gzhttp: Add zstd to transport by @klauspost in [#400](https://github.com/klauspost/compress/pull/400)
|
||||
* gzhttp: Make content-type optional by @klauspost in [#510](https://github.com/klauspost/compress/pull/510)
|
||||
|
||||
<details>
|
||||
<summary>See Details</summary>
|
||||
Both compression and decompression now supports "synchronous" stream operations. This means that whenever "concurrency" is set to 1, they will operate without spawning goroutines.
|
||||
|
||||
Stream decompression is now faster on asynchronous, since the goroutine allocation much more effectively splits the workload. On typical streams this will typically use 2 cores fully for decompression. When a stream has finished decoding no goroutines will be left over, so decoders can now safely be pooled and still be garbage collected.
|
||||
|
||||
While the release has been extensively tested, it is recommended to testing when upgrading.
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>See changes to v1.14.x</summary>
|
||||
|
||||
* Feb 22, 2022 (v1.14.4)
|
||||
* flate: Fix rare huffman only (-2) corruption. [#503](https://github.com/klauspost/compress/pull/503)
|
||||
* zip: Update deprecated CreateHeaderRaw to correctly call CreateRaw by @saracen in [#502](https://github.com/klauspost/compress/pull/502)
|
||||
@ -106,6 +149,7 @@ While the release has been extensively tested, it is recommended to testing when
|
||||
* zstd: Performance improvement in [#420]( https://github.com/klauspost/compress/pull/420) [#456](https://github.com/klauspost/compress/pull/456) [#437](https://github.com/klauspost/compress/pull/437) [#467](https://github.com/klauspost/compress/pull/467) [#468](https://github.com/klauspost/compress/pull/468)
|
||||
* zstd: add arm64 xxhash assembly in [#464](https://github.com/klauspost/compress/pull/464)
|
||||
* Add garbled for binaries for s2 in [#445](https://github.com/klauspost/compress/pull/445)
|
||||
</details>
|
||||
|
||||
<details>
|
||||
<summary>See changes to v1.13.x</summary>
|
||||
|
36
vendor/github.com/klauspost/compress/huff0/decompress.go
generated
vendored
36
vendor/github.com/klauspost/compress/huff0/decompress.go
generated
vendored
@ -763,17 +763,20 @@ func (d *Decoder) decompress4X8bit(dst, src []byte) ([]byte, error) {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
if len(out)-bufoff < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
//copy(out, buf[0][:])
|
||||
//copy(out[dstEvery:], buf[1][:])
|
||||
//copy(out[dstEvery*2:], buf[2][:])
|
||||
*(*[bufoff]byte)(out) = buf[0]
|
||||
*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
|
||||
*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
|
||||
*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
@ -997,17 +1000,22 @@ func (d *Decoder) decompress4X8bitExactly(dst, src []byte) ([]byte, error) {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
if len(out)-bufoff < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
|
||||
//copy(out, buf[0][:])
|
||||
//copy(out[dstEvery:], buf[1][:])
|
||||
//copy(out[dstEvery*2:], buf[2][:])
|
||||
// copy(out[dstEvery*3:], buf[3][:])
|
||||
*(*[bufoff]byte)(out) = buf[0]
|
||||
*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
|
||||
*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
|
||||
*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
|
14
vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
generated
vendored
14
vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
generated
vendored
@ -14,12 +14,14 @@ import (
|
||||
|
||||
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
||||
// of Decompress4X when tablelog > 8.
|
||||
//
|
||||
//go:noescape
|
||||
func decompress4x_main_loop_amd64(ctx *decompress4xContext)
|
||||
|
||||
// decompress4x_8b_loop_x86 is an x86 assembler implementation
|
||||
// of Decompress4X when tablelog <= 8 which decodes 4 entries
|
||||
// per loop.
|
||||
//
|
||||
//go:noescape
|
||||
func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
|
||||
|
||||
@ -27,10 +29,7 @@ func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
|
||||
const fallback8BitSize = 800
|
||||
|
||||
type decompress4xContext struct {
|
||||
pbr0 *bitReaderShifted
|
||||
pbr1 *bitReaderShifted
|
||||
pbr2 *bitReaderShifted
|
||||
pbr3 *bitReaderShifted
|
||||
pbr *[4]bitReaderShifted
|
||||
peekBits uint8
|
||||
out *byte
|
||||
dstEvery int
|
||||
@ -89,10 +88,7 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
|
||||
if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) {
|
||||
ctx := decompress4xContext{
|
||||
pbr0: &br[0],
|
||||
pbr1: &br[1],
|
||||
pbr2: &br[2],
|
||||
pbr3: &br[3],
|
||||
pbr: &br,
|
||||
peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
|
||||
out: &out[0],
|
||||
dstEvery: dstEvery,
|
||||
@ -151,11 +147,13 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
|
||||
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
||||
// of Decompress1X when tablelog > 8.
|
||||
//
|
||||
//go:noescape
|
||||
func decompress1x_main_loop_amd64(ctx *decompress1xContext)
|
||||
|
||||
// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
|
||||
// of Decompress1X when tablelog > 8.
|
||||
//
|
||||
//go:noescape
|
||||
func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
|
||||
|
||||
|
667
vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
generated
vendored
667
vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
generated
vendored
@ -1,48 +1,42 @@
|
||||
// Code generated by command: go run gen.go -out ../decompress_amd64.s -pkg=huff0. DO NOT EDIT.
|
||||
|
||||
//go:build amd64 && !appengine && !noasm && gc
|
||||
// +build amd64,!appengine,!noasm,gc
|
||||
|
||||
// func decompress4x_main_loop_amd64(ctx *decompress4xContext)
|
||||
TEXT ·decompress4x_main_loop_amd64(SB), $8-8
|
||||
TEXT ·decompress4x_main_loop_amd64(SB), $0-8
|
||||
XORQ DX, DX
|
||||
|
||||
// Preload values
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVBQZX 32(AX), SI
|
||||
MOVQ 40(AX), DI
|
||||
MOVQ DI, BX
|
||||
MOVQ 72(AX), CX
|
||||
MOVQ CX, (SP)
|
||||
MOVQ 48(AX), R8
|
||||
MOVQ 56(AX), R9
|
||||
MOVQ (AX), R10
|
||||
MOVQ 8(AX), R11
|
||||
MOVQ 16(AX), R12
|
||||
MOVQ 24(AX), R13
|
||||
MOVBQZX 8(AX), DI
|
||||
MOVQ 16(AX), SI
|
||||
MOVQ 48(AX), BX
|
||||
MOVQ 24(AX), R9
|
||||
MOVQ 32(AX), R10
|
||||
MOVQ (AX), R11
|
||||
|
||||
// Main loop
|
||||
main_loop:
|
||||
MOVQ BX, DI
|
||||
CMPQ DI, (SP)
|
||||
MOVQ SI, R8
|
||||
CMPQ R8, BX
|
||||
SETGE DL
|
||||
|
||||
// br0.fillFast32()
|
||||
MOVQ 32(R10), R14
|
||||
MOVBQZX 40(R10), R15
|
||||
CMPQ R15, $0x20
|
||||
MOVQ 32(R11), R12
|
||||
MOVBQZX 40(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill0
|
||||
MOVQ 24(R10), AX
|
||||
SUBQ $0x20, R15
|
||||
MOVQ 24(R11), AX
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, AX
|
||||
MOVQ (R10), BP
|
||||
MOVQ (R11), R14
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (AX)(BP*1), BP
|
||||
MOVQ R15, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ AX, 24(R10)
|
||||
ORQ BP, R14
|
||||
MOVL (AX)(R14*1), R14
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R14
|
||||
MOVQ AX, 24(R11)
|
||||
ORQ R14, R12
|
||||
|
||||
// exhausted = exhausted || (br0.off < 4)
|
||||
CMPQ AX, $0x04
|
||||
@ -51,57 +45,57 @@ main_loop:
|
||||
|
||||
skip_fill0:
|
||||
// val0 := br0.peekTopBits(peekBits)
|
||||
MOVQ R14, BP
|
||||
MOVQ SI, CX
|
||||
SHRQ CL, BP
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br0.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br0.peekTopBits(peekBits)
|
||||
MOVQ SI, CX
|
||||
MOVQ R14, BP
|
||||
SHRQ CL, BP
|
||||
MOVQ DI, CX
|
||||
MOVQ R12, R14
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br0.advance(uint8(v1.entry))
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// these two writes get coalesced
|
||||
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
MOVW AX, (DI)
|
||||
MOVW AX, (R8)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVQ R14, 32(R10)
|
||||
MOVB R15, 40(R10)
|
||||
ADDQ R8, DI
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 32(R11)
|
||||
MOVB R13, 40(R11)
|
||||
ADDQ R9, R8
|
||||
|
||||
// br1.fillFast32()
|
||||
MOVQ 32(R11), R14
|
||||
MOVBQZX 40(R11), R15
|
||||
CMPQ R15, $0x20
|
||||
MOVQ 80(R11), R12
|
||||
MOVBQZX 88(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill1
|
||||
MOVQ 24(R11), AX
|
||||
SUBQ $0x20, R15
|
||||
MOVQ 72(R11), AX
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, AX
|
||||
MOVQ (R11), BP
|
||||
MOVQ 48(R11), R14
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (AX)(BP*1), BP
|
||||
MOVQ R15, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ AX, 24(R11)
|
||||
ORQ BP, R14
|
||||
MOVL (AX)(R14*1), R14
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R14
|
||||
MOVQ AX, 72(R11)
|
||||
ORQ R14, R12
|
||||
|
||||
// exhausted = exhausted || (br1.off < 4)
|
||||
CMPQ AX, $0x04
|
||||
@ -110,57 +104,57 @@ skip_fill0:
|
||||
|
||||
skip_fill1:
|
||||
// val0 := br1.peekTopBits(peekBits)
|
||||
MOVQ R14, BP
|
||||
MOVQ SI, CX
|
||||
SHRQ CL, BP
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br1.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br1.peekTopBits(peekBits)
|
||||
MOVQ SI, CX
|
||||
MOVQ R14, BP
|
||||
SHRQ CL, BP
|
||||
MOVQ DI, CX
|
||||
MOVQ R12, R14
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br1.advance(uint8(v1.entry))
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// these two writes get coalesced
|
||||
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
MOVW AX, (DI)
|
||||
MOVW AX, (R8)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVQ R14, 32(R11)
|
||||
MOVB R15, 40(R11)
|
||||
ADDQ R8, DI
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 80(R11)
|
||||
MOVB R13, 88(R11)
|
||||
ADDQ R9, R8
|
||||
|
||||
// br2.fillFast32()
|
||||
MOVQ 32(R12), R14
|
||||
MOVBQZX 40(R12), R15
|
||||
CMPQ R15, $0x20
|
||||
MOVQ 128(R11), R12
|
||||
MOVBQZX 136(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill2
|
||||
MOVQ 24(R12), AX
|
||||
SUBQ $0x20, R15
|
||||
MOVQ 120(R11), AX
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, AX
|
||||
MOVQ (R12), BP
|
||||
MOVQ 96(R11), R14
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (AX)(BP*1), BP
|
||||
MOVQ R15, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ AX, 24(R12)
|
||||
ORQ BP, R14
|
||||
MOVL (AX)(R14*1), R14
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R14
|
||||
MOVQ AX, 120(R11)
|
||||
ORQ R14, R12
|
||||
|
||||
// exhausted = exhausted || (br2.off < 4)
|
||||
CMPQ AX, $0x04
|
||||
@ -169,57 +163,57 @@ skip_fill1:
|
||||
|
||||
skip_fill2:
|
||||
// val0 := br2.peekTopBits(peekBits)
|
||||
MOVQ R14, BP
|
||||
MOVQ SI, CX
|
||||
SHRQ CL, BP
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br2.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br2.peekTopBits(peekBits)
|
||||
MOVQ SI, CX
|
||||
MOVQ R14, BP
|
||||
SHRQ CL, BP
|
||||
MOVQ DI, CX
|
||||
MOVQ R12, R14
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br2.advance(uint8(v1.entry))
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// these two writes get coalesced
|
||||
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
MOVW AX, (DI)
|
||||
MOVW AX, (R8)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVQ R14, 32(R12)
|
||||
MOVB R15, 40(R12)
|
||||
ADDQ R8, DI
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 128(R11)
|
||||
MOVB R13, 136(R11)
|
||||
ADDQ R9, R8
|
||||
|
||||
// br3.fillFast32()
|
||||
MOVQ 32(R13), R14
|
||||
MOVBQZX 40(R13), R15
|
||||
CMPQ R15, $0x20
|
||||
MOVQ 176(R11), R12
|
||||
MOVBQZX 184(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill3
|
||||
MOVQ 24(R13), AX
|
||||
SUBQ $0x20, R15
|
||||
MOVQ 168(R11), AX
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, AX
|
||||
MOVQ (R13), BP
|
||||
MOVQ 144(R11), R14
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (AX)(BP*1), BP
|
||||
MOVQ R15, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ AX, 24(R13)
|
||||
ORQ BP, R14
|
||||
MOVL (AX)(R14*1), R14
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R14
|
||||
MOVQ AX, 168(R11)
|
||||
ORQ R14, R12
|
||||
|
||||
// exhausted = exhausted || (br3.off < 4)
|
||||
CMPQ AX, $0x04
|
||||
@ -228,149 +222,142 @@ skip_fill2:
|
||||
|
||||
skip_fill3:
|
||||
// val0 := br3.peekTopBits(peekBits)
|
||||
MOVQ R14, BP
|
||||
MOVQ SI, CX
|
||||
SHRQ CL, BP
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br3.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br3.peekTopBits(peekBits)
|
||||
MOVQ SI, CX
|
||||
MOVQ R14, BP
|
||||
SHRQ CL, BP
|
||||
MOVQ DI, CX
|
||||
MOVQ R12, R14
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW (R9)(BP*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br3.advance(uint8(v1.entry))
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R14
|
||||
ADDB CL, R15
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// these two writes get coalesced
|
||||
// out[id * dstEvery + 0] = uint8(v0.entry >> 8)
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
MOVW AX, (DI)
|
||||
MOVW AX, (R8)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVQ R14, 32(R13)
|
||||
MOVB R15, 40(R13)
|
||||
ADDQ $0x02, BX
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 176(R11)
|
||||
MOVB R13, 184(R11)
|
||||
ADDQ $0x02, SI
|
||||
TESTB DL, DL
|
||||
JZ main_loop
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ 40(AX), CX
|
||||
MOVQ BX, DX
|
||||
SUBQ CX, DX
|
||||
SHLQ $0x02, DX
|
||||
MOVQ DX, 64(AX)
|
||||
SUBQ 16(AX), SI
|
||||
SHLQ $0x02, SI
|
||||
MOVQ SI, 40(AX)
|
||||
RET
|
||||
|
||||
// func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
|
||||
TEXT ·decompress4x_8b_main_loop_amd64(SB), $16-8
|
||||
TEXT ·decompress4x_8b_main_loop_amd64(SB), $0-8
|
||||
XORQ DX, DX
|
||||
|
||||
// Preload values
|
||||
MOVQ ctx+0(FP), CX
|
||||
MOVBQZX 32(CX), BX
|
||||
MOVQ 40(CX), SI
|
||||
MOVQ SI, (SP)
|
||||
MOVQ 72(CX), DX
|
||||
MOVQ DX, 8(SP)
|
||||
MOVQ 48(CX), DI
|
||||
MOVQ 56(CX), R8
|
||||
MOVQ (CX), R9
|
||||
MOVQ 8(CX), R10
|
||||
MOVQ 16(CX), R11
|
||||
MOVQ 24(CX), R12
|
||||
MOVBQZX 8(CX), DI
|
||||
MOVQ 16(CX), BX
|
||||
MOVQ 48(CX), SI
|
||||
MOVQ 24(CX), R9
|
||||
MOVQ 32(CX), R10
|
||||
MOVQ (CX), R11
|
||||
|
||||
// Main loop
|
||||
main_loop:
|
||||
MOVQ (SP), SI
|
||||
CMPQ SI, 8(SP)
|
||||
MOVQ BX, R8
|
||||
CMPQ R8, SI
|
||||
SETGE DL
|
||||
|
||||
// br1000.fillFast32()
|
||||
MOVQ 32(R9), R13
|
||||
MOVBQZX 40(R9), R14
|
||||
CMPQ R14, $0x20
|
||||
JBE skip_fill1000
|
||||
MOVQ 24(R9), R15
|
||||
SUBQ $0x20, R14
|
||||
SUBQ $0x04, R15
|
||||
MOVQ (R9), BP
|
||||
// br0.fillFast32()
|
||||
MOVQ 32(R11), R12
|
||||
MOVBQZX 40(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill0
|
||||
MOVQ 24(R11), R14
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, R14
|
||||
MOVQ (R11), R15
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (R15)(BP*1), BP
|
||||
MOVQ R14, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ R15, 24(R9)
|
||||
ORQ BP, R13
|
||||
MOVL (R14)(R15*1), R15
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R15
|
||||
MOVQ R14, 24(R11)
|
||||
ORQ R15, R12
|
||||
|
||||
// exhausted = exhausted || (br1000.off < 4)
|
||||
CMPQ R15, $0x04
|
||||
// exhausted = exhausted || (br0.off < 4)
|
||||
CMPQ R14, $0x04
|
||||
SETLT AL
|
||||
ORB AL, DL
|
||||
|
||||
skip_fill1000:
|
||||
skip_fill0:
|
||||
// val0 := br0.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br0.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br0.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br0.advance(uint8(v1.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// val2 := br0.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br0.advance(uint8(v2.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val3 := br0.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v3 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br0.advance(uint8(v3.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// these four writes get coalesced
|
||||
@ -378,88 +365,88 @@ skip_fill1000:
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
||||
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
||||
MOVL AX, (SI)
|
||||
MOVL AX, (R8)
|
||||
|
||||
// update the bitreader reader structure
|
||||
MOVQ R13, 32(R9)
|
||||
MOVB R14, 40(R9)
|
||||
ADDQ DI, SI
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 32(R11)
|
||||
MOVB R13, 40(R11)
|
||||
ADDQ R9, R8
|
||||
|
||||
// br1001.fillFast32()
|
||||
MOVQ 32(R10), R13
|
||||
MOVBQZX 40(R10), R14
|
||||
CMPQ R14, $0x20
|
||||
JBE skip_fill1001
|
||||
MOVQ 24(R10), R15
|
||||
SUBQ $0x20, R14
|
||||
SUBQ $0x04, R15
|
||||
MOVQ (R10), BP
|
||||
// br1.fillFast32()
|
||||
MOVQ 80(R11), R12
|
||||
MOVBQZX 88(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill1
|
||||
MOVQ 72(R11), R14
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, R14
|
||||
MOVQ 48(R11), R15
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (R15)(BP*1), BP
|
||||
MOVQ R14, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ R15, 24(R10)
|
||||
ORQ BP, R13
|
||||
MOVL (R14)(R15*1), R15
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R15
|
||||
MOVQ R14, 72(R11)
|
||||
ORQ R15, R12
|
||||
|
||||
// exhausted = exhausted || (br1001.off < 4)
|
||||
CMPQ R15, $0x04
|
||||
// exhausted = exhausted || (br1.off < 4)
|
||||
CMPQ R14, $0x04
|
||||
SETLT AL
|
||||
ORB AL, DL
|
||||
|
||||
skip_fill1001:
|
||||
skip_fill1:
|
||||
// val0 := br1.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br1.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br1.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br1.advance(uint8(v1.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// val2 := br1.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br1.advance(uint8(v2.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val3 := br1.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v3 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br1.advance(uint8(v3.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// these four writes get coalesced
|
||||
@ -467,88 +454,88 @@ skip_fill1001:
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
||||
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
||||
MOVL AX, (SI)
|
||||
MOVL AX, (R8)
|
||||
|
||||
// update the bitreader reader structure
|
||||
MOVQ R13, 32(R10)
|
||||
MOVB R14, 40(R10)
|
||||
ADDQ DI, SI
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 80(R11)
|
||||
MOVB R13, 88(R11)
|
||||
ADDQ R9, R8
|
||||
|
||||
// br1002.fillFast32()
|
||||
MOVQ 32(R11), R13
|
||||
MOVBQZX 40(R11), R14
|
||||
CMPQ R14, $0x20
|
||||
JBE skip_fill1002
|
||||
MOVQ 24(R11), R15
|
||||
SUBQ $0x20, R14
|
||||
SUBQ $0x04, R15
|
||||
MOVQ (R11), BP
|
||||
// br2.fillFast32()
|
||||
MOVQ 128(R11), R12
|
||||
MOVBQZX 136(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill2
|
||||
MOVQ 120(R11), R14
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, R14
|
||||
MOVQ 96(R11), R15
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (R15)(BP*1), BP
|
||||
MOVQ R14, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ R15, 24(R11)
|
||||
ORQ BP, R13
|
||||
MOVL (R14)(R15*1), R15
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R15
|
||||
MOVQ R14, 120(R11)
|
||||
ORQ R15, R12
|
||||
|
||||
// exhausted = exhausted || (br1002.off < 4)
|
||||
CMPQ R15, $0x04
|
||||
// exhausted = exhausted || (br2.off < 4)
|
||||
CMPQ R14, $0x04
|
||||
SETLT AL
|
||||
ORB AL, DL
|
||||
|
||||
skip_fill1002:
|
||||
skip_fill2:
|
||||
// val0 := br2.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br2.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br2.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br2.advance(uint8(v1.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// val2 := br2.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br2.advance(uint8(v2.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val3 := br2.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v3 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br2.advance(uint8(v3.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// these four writes get coalesced
|
||||
@ -556,88 +543,88 @@ skip_fill1002:
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
||||
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
||||
MOVL AX, (SI)
|
||||
MOVL AX, (R8)
|
||||
|
||||
// update the bitreader reader structure
|
||||
MOVQ R13, 32(R11)
|
||||
MOVB R14, 40(R11)
|
||||
ADDQ DI, SI
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 128(R11)
|
||||
MOVB R13, 136(R11)
|
||||
ADDQ R9, R8
|
||||
|
||||
// br1003.fillFast32()
|
||||
MOVQ 32(R12), R13
|
||||
MOVBQZX 40(R12), R14
|
||||
CMPQ R14, $0x20
|
||||
JBE skip_fill1003
|
||||
MOVQ 24(R12), R15
|
||||
SUBQ $0x20, R14
|
||||
SUBQ $0x04, R15
|
||||
MOVQ (R12), BP
|
||||
// br3.fillFast32()
|
||||
MOVQ 176(R11), R12
|
||||
MOVBQZX 184(R11), R13
|
||||
CMPQ R13, $0x20
|
||||
JBE skip_fill3
|
||||
MOVQ 168(R11), R14
|
||||
SUBQ $0x20, R13
|
||||
SUBQ $0x04, R14
|
||||
MOVQ 144(R11), R15
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVL (R15)(BP*1), BP
|
||||
MOVQ R14, CX
|
||||
SHLQ CL, BP
|
||||
MOVQ R15, 24(R12)
|
||||
ORQ BP, R13
|
||||
MOVL (R14)(R15*1), R15
|
||||
MOVQ R13, CX
|
||||
SHLQ CL, R15
|
||||
MOVQ R14, 168(R11)
|
||||
ORQ R15, R12
|
||||
|
||||
// exhausted = exhausted || (br1003.off < 4)
|
||||
CMPQ R15, $0x04
|
||||
// exhausted = exhausted || (br3.off < 4)
|
||||
CMPQ R14, $0x04
|
||||
SETLT AL
|
||||
ORB AL, DL
|
||||
|
||||
skip_fill1003:
|
||||
skip_fill3:
|
||||
// val0 := br3.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br3.advance(uint8(v0.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val1 := br3.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v1 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br3.advance(uint8(v1.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// val2 := br3.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br3.advance(uint8(v2.entry)
|
||||
MOVB CH, AH
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
|
||||
// val3 := br3.peekTopBits(peekBits)
|
||||
MOVQ R13, R15
|
||||
MOVQ BX, CX
|
||||
SHRQ CL, R15
|
||||
MOVQ R12, R14
|
||||
MOVQ DI, CX
|
||||
SHRQ CL, R14
|
||||
|
||||
// v3 := table[val0&mask]
|
||||
MOVW (R8)(R15*2), CX
|
||||
MOVW (R10)(R14*2), CX
|
||||
|
||||
// br3.advance(uint8(v3.entry)
|
||||
MOVB CH, AL
|
||||
SHLQ CL, R13
|
||||
ADDB CL, R14
|
||||
SHLQ CL, R12
|
||||
ADDB CL, R13
|
||||
BSWAPL AX
|
||||
|
||||
// these four writes get coalesced
|
||||
@ -645,20 +632,18 @@ skip_fill1003:
|
||||
// out[id * dstEvery + 1] = uint8(v1.entry >> 8)
|
||||
// out[id * dstEvery + 3] = uint8(v2.entry >> 8)
|
||||
// out[id * dstEvery + 4] = uint8(v3.entry >> 8)
|
||||
MOVL AX, (SI)
|
||||
MOVL AX, (R8)
|
||||
|
||||
// update the bitreader reader structure
|
||||
MOVQ R13, 32(R12)
|
||||
MOVB R14, 40(R12)
|
||||
ADDQ $0x04, (SP)
|
||||
// update the bitreader structure
|
||||
MOVQ R12, 176(R11)
|
||||
MOVB R13, 184(R11)
|
||||
ADDQ $0x04, BX
|
||||
TESTB DL, DL
|
||||
JZ main_loop
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ 40(AX), CX
|
||||
MOVQ (SP), DX
|
||||
SUBQ CX, DX
|
||||
SHLQ $0x02, DX
|
||||
MOVQ DX, 64(AX)
|
||||
SUBQ 16(AX), BX
|
||||
SHLQ $0x02, BX
|
||||
MOVQ BX, 40(AX)
|
||||
RET
|
||||
|
||||
// func decompress1x_main_loop_amd64(ctx *decompress1xContext)
|
||||
@ -750,10 +735,8 @@ loop_condition:
|
||||
|
||||
// Update ctx structure
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, CX
|
||||
MOVQ 16(AX), DX
|
||||
SUBQ DX, CX
|
||||
MOVQ CX, 40(AX)
|
||||
SUBQ 16(AX), DX
|
||||
MOVQ DX, 40(AX)
|
||||
MOVQ (AX), AX
|
||||
MOVQ R9, 24(AX)
|
||||
MOVQ R10, 32(AX)
|
||||
@ -847,10 +830,8 @@ loop_condition:
|
||||
|
||||
// Update ctx structure
|
||||
MOVQ ctx+0(FP), AX
|
||||
MOVQ DX, CX
|
||||
MOVQ 16(AX), DX
|
||||
SUBQ DX, CX
|
||||
MOVQ CX, 40(AX)
|
||||
SUBQ 16(AX), DX
|
||||
MOVQ DX, 40(AX)
|
||||
MOVQ (AX), AX
|
||||
MOVQ R9, 24(AX)
|
||||
MOVQ R10, 32(AX)
|
||||
|
18
vendor/github.com/klauspost/compress/huff0/decompress_generic.go
generated
vendored
18
vendor/github.com/klauspost/compress/huff0/decompress_generic.go
generated
vendored
@ -122,17 +122,21 @@ func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
if len(out)-bufoff < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
//copy(out, buf[0][:])
|
||||
//copy(out[dstEvery:], buf[1][:])
|
||||
//copy(out[dstEvery*2:], buf[2][:])
|
||||
//copy(out[dstEvery*3:], buf[3][:])
|
||||
*(*[bufoff]byte)(out) = buf[0]
|
||||
*(*[bufoff]byte)(out[dstEvery:]) = buf[1]
|
||||
*(*[bufoff]byte)(out[dstEvery*2:]) = buf[2]
|
||||
*(*[bufoff]byte)(out[dstEvery*3:]) = buf[3]
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
|
6
vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
generated
vendored
6
vendor/github.com/klauspost/compress/internal/snapref/encode_other.go
generated
vendored
@ -18,6 +18,7 @@ func load64(b []byte, i int) uint64 {
|
||||
// emitLiteral writes a literal chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= len(lit) && len(lit) <= 65536
|
||||
func emitLiteral(dst, lit []byte) int {
|
||||
@ -42,6 +43,7 @@ func emitLiteral(dst, lit []byte) int {
|
||||
// emitCopy writes a copy chunk and returns the number of bytes written.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// dst is long enough to hold the encoded bytes
|
||||
// 1 <= offset && offset <= 65535
|
||||
// 4 <= length && length <= 65535
|
||||
@ -89,6 +91,7 @@ func emitCopy(dst []byte, offset, length int) int {
|
||||
// src[i:i+k-j] and src[j:k] have the same contents.
|
||||
//
|
||||
// It assumes that:
|
||||
//
|
||||
// 0 <= i && i < j && j <= len(src)
|
||||
func extendMatch(src []byte, i, j int) int {
|
||||
for ; j < len(src) && src[i] == src[j]; i, j = i+1, j+1 {
|
||||
@ -105,8 +108,9 @@ func hash(u, shift uint32) uint32 {
|
||||
// been written.
|
||||
//
|
||||
// It also assumes that:
|
||||
//
|
||||
// len(dst) >= MaxEncodedLen(len(src)) &&
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
// minNonLiteralBlockSize <= len(src) && len(src) <= maxBlockSize
|
||||
func encodeBlock(dst, src []byte) (d int) {
|
||||
// Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive.
|
||||
// The table element type is uint16, as s < sLimit and sLimit < len(src)
|
||||
|
2
vendor/github.com/klauspost/compress/zstd/README.md
generated
vendored
2
vendor/github.com/klauspost/compress/zstd/README.md
generated
vendored
@ -12,6 +12,8 @@ The `zstd` package is provided as open source software using a Go standard licen
|
||||
|
||||
Currently the package is heavily optimized for 64 bit processors and will be significantly slower on 32 bit processors.
|
||||
|
||||
For seekable zstd streams, see [this excellent package](https://github.com/SaveTheRbtz/zstd-seekable-format-go).
|
||||
|
||||
## Installation
|
||||
|
||||
Install using `go get -u github.com/klauspost/compress`. The package is located in `github.com/klauspost/compress/zstd`.
|
||||
|
5
vendor/github.com/klauspost/compress/zstd/blockdec.go
generated
vendored
5
vendor/github.com/klauspost/compress/zstd/blockdec.go
generated
vendored
@ -10,7 +10,6 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
@ -233,7 +232,7 @@ func (b *blockDec) decodeBuf(hist *history) error {
|
||||
if b.lowMem {
|
||||
b.dst = make([]byte, b.RLESize)
|
||||
} else {
|
||||
b.dst = make([]byte, maxBlockSize)
|
||||
b.dst = make([]byte, maxCompressedBlockSize)
|
||||
}
|
||||
}
|
||||
b.dst = b.dst[:b.RLESize]
|
||||
@ -651,7 +650,7 @@ func (b *blockDec) prepareSequences(in []byte, hist *history) (err error) {
|
||||
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.matchLengths.fse))
|
||||
fatalErr(binary.Write(&buf, binary.LittleEndian, hist.decoders.offsets.fse))
|
||||
buf.Write(in)
|
||||
ioutil.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
|
||||
os.WriteFile(filepath.Join("testdata", "seqs", fn), buf.Bytes(), os.ModePerm)
|
||||
}
|
||||
|
||||
return nil
|
||||
|
16
vendor/github.com/klauspost/compress/zstd/bytebuf.go
generated
vendored
16
vendor/github.com/klauspost/compress/zstd/bytebuf.go
generated
vendored
@ -7,7 +7,6 @@ package zstd
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
)
|
||||
|
||||
type byteBuffer interface {
|
||||
@ -23,7 +22,7 @@ type byteBuffer interface {
|
||||
readByte() (byte, error)
|
||||
|
||||
// Skip n bytes.
|
||||
skipN(n int) error
|
||||
skipN(n int64) error
|
||||
}
|
||||
|
||||
// in-memory buffer
|
||||
@ -62,9 +61,12 @@ func (b *byteBuf) readByte() (byte, error) {
|
||||
return r, nil
|
||||
}
|
||||
|
||||
func (b *byteBuf) skipN(n int) error {
|
||||
func (b *byteBuf) skipN(n int64) error {
|
||||
bb := *b
|
||||
if len(bb) < n {
|
||||
if n < 0 {
|
||||
return fmt.Errorf("negative skip (%d) requested", n)
|
||||
}
|
||||
if int64(len(bb)) < n {
|
||||
return io.ErrUnexpectedEOF
|
||||
}
|
||||
*b = bb[n:]
|
||||
@ -120,9 +122,9 @@ func (r *readerWrapper) readByte() (byte, error) {
|
||||
return r.tmp[0], nil
|
||||
}
|
||||
|
||||
func (r *readerWrapper) skipN(n int) error {
|
||||
n2, err := io.CopyN(ioutil.Discard, r.r, int64(n))
|
||||
if n2 != int64(n) {
|
||||
func (r *readerWrapper) skipN(n int64) error {
|
||||
n2, err := io.CopyN(io.Discard, r.r, n)
|
||||
if n2 != n {
|
||||
err = io.ErrUnexpectedEOF
|
||||
}
|
||||
return err
|
||||
|
47
vendor/github.com/klauspost/compress/zstd/decoder.go
generated
vendored
47
vendor/github.com/klauspost/compress/zstd/decoder.go
generated
vendored
@ -35,6 +35,7 @@ type Decoder struct {
|
||||
br readerWrapper
|
||||
enabled bool
|
||||
inFrame bool
|
||||
dstBuf []byte
|
||||
}
|
||||
|
||||
frame *frameDec
|
||||
@ -187,21 +188,23 @@ func (d *Decoder) Reset(r io.Reader) error {
|
||||
}
|
||||
|
||||
// If bytes buffer and < 5MB, do sync decoding anyway.
|
||||
if bb, ok := r.(byter); ok && bb.Len() < 5<<20 {
|
||||
if bb, ok := r.(byter); ok && bb.Len() < d.o.decodeBufsBelow && !d.o.limitToCap {
|
||||
bb2 := bb
|
||||
if debugDecoder {
|
||||
println("*bytes.Buffer detected, doing sync decode, len:", bb.Len())
|
||||
}
|
||||
b := bb2.Bytes()
|
||||
var dst []byte
|
||||
if cap(d.current.b) > 0 {
|
||||
dst = d.current.b
|
||||
if cap(d.syncStream.dstBuf) > 0 {
|
||||
dst = d.syncStream.dstBuf[:0]
|
||||
}
|
||||
|
||||
dst, err := d.DecodeAll(b, dst[:0])
|
||||
dst, err := d.DecodeAll(b, dst)
|
||||
if err == nil {
|
||||
err = io.EOF
|
||||
}
|
||||
// Save output buffer
|
||||
d.syncStream.dstBuf = dst
|
||||
d.current.b = dst
|
||||
d.current.err = err
|
||||
d.current.flushed = true
|
||||
@ -216,6 +219,7 @@ func (d *Decoder) Reset(r io.Reader) error {
|
||||
d.current.err = nil
|
||||
d.current.flushed = false
|
||||
d.current.d = nil
|
||||
d.syncStream.dstBuf = nil
|
||||
|
||||
// Ensure no-one else is still running...
|
||||
d.streamWg.Wait()
|
||||
@ -312,6 +316,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
// Grab a block decoder and frame decoder.
|
||||
block := <-d.decoders
|
||||
frame := block.localFrame
|
||||
initialSize := len(dst)
|
||||
defer func() {
|
||||
if debugDecoder {
|
||||
printf("re-adding decoder: %p", block)
|
||||
@ -348,10 +353,22 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
frame.history.setDict(&dict)
|
||||
}
|
||||
if frame.WindowSize > d.o.maxWindowSize {
|
||||
if debugDecoder {
|
||||
println("window size exceeded:", frame.WindowSize, ">", d.o.maxWindowSize)
|
||||
}
|
||||
return dst, ErrWindowSizeExceeded
|
||||
}
|
||||
if frame.FrameContentSize != fcsUnknown {
|
||||
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)) {
|
||||
if frame.FrameContentSize > d.o.maxDecodedSize-uint64(len(dst)-initialSize) {
|
||||
if debugDecoder {
|
||||
println("decoder size exceeded; fcs:", frame.FrameContentSize, "> mcs:", d.o.maxDecodedSize-uint64(len(dst)-initialSize), "len:", len(dst))
|
||||
}
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if d.o.limitToCap && frame.FrameContentSize > uint64(cap(dst)-len(dst)) {
|
||||
if debugDecoder {
|
||||
println("decoder size exceeded; fcs:", frame.FrameContentSize, "> (cap-len)", cap(dst)-len(dst))
|
||||
}
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if cap(dst)-len(dst) < int(frame.FrameContentSize) {
|
||||
@ -361,7 +378,7 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
}
|
||||
}
|
||||
|
||||
if cap(dst) == 0 {
|
||||
if cap(dst) == 0 && !d.o.limitToCap {
|
||||
// Allocate len(input) * 2 by default if nothing is provided
|
||||
// and we didn't get frame content size.
|
||||
size := len(input) * 2
|
||||
@ -379,6 +396,9 @@ func (d *Decoder) DecodeAll(input, dst []byte) ([]byte, error) {
|
||||
if err != nil {
|
||||
return dst, err
|
||||
}
|
||||
if uint64(len(dst)-initialSize) > d.o.maxDecodedSize {
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if len(frame.bBuf) == 0 {
|
||||
if debugDecoder {
|
||||
println("frame dbuf empty")
|
||||
@ -664,6 +684,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if debugDecoder {
|
||||
println("Async 1: new history, recent:", block.async.newHist.recentOffsets)
|
||||
}
|
||||
hist.reset()
|
||||
hist.decoders = block.async.newHist.decoders
|
||||
hist.recentOffsets = block.async.newHist.recentOffsets
|
||||
hist.windowSize = block.async.newHist.windowSize
|
||||
@ -695,6 +716,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
seqExecute <- block
|
||||
}
|
||||
close(seqExecute)
|
||||
hist.reset()
|
||||
}()
|
||||
|
||||
var wg sync.WaitGroup
|
||||
@ -718,6 +740,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if debugDecoder {
|
||||
println("Async 2: new history")
|
||||
}
|
||||
hist.reset()
|
||||
hist.windowSize = block.async.newHist.windowSize
|
||||
hist.allocFrameBuffer = block.async.newHist.allocFrameBuffer
|
||||
if block.async.newHist.dict != nil {
|
||||
@ -747,7 +770,7 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if block.lowMem {
|
||||
block.dst = make([]byte, block.RLESize)
|
||||
} else {
|
||||
block.dst = make([]byte, maxBlockSize)
|
||||
block.dst = make([]byte, maxCompressedBlockSize)
|
||||
}
|
||||
}
|
||||
block.dst = block.dst[:block.RLESize]
|
||||
@ -799,13 +822,14 @@ func (d *Decoder) startStreamDecoder(ctx context.Context, r io.Reader, output ch
|
||||
if debugDecoder {
|
||||
println("decoder goroutines finished")
|
||||
}
|
||||
hist.reset()
|
||||
}()
|
||||
|
||||
var hist history
|
||||
decodeStream:
|
||||
for {
|
||||
var hist history
|
||||
var hasErr bool
|
||||
|
||||
hist.reset()
|
||||
decodeBlock := func(block *blockDec) {
|
||||
if hasErr {
|
||||
if block != nil {
|
||||
@ -849,6 +873,10 @@ decodeStream:
|
||||
}
|
||||
}
|
||||
if err == nil && d.frame.WindowSize > d.o.maxWindowSize {
|
||||
if debugDecoder {
|
||||
println("decoder size exceeded, fws:", d.frame.WindowSize, "> mws:", d.o.maxWindowSize)
|
||||
}
|
||||
|
||||
err = ErrDecoderSizeExceeded
|
||||
}
|
||||
if err != nil {
|
||||
@ -917,5 +945,6 @@ decodeStream:
|
||||
}
|
||||
close(seqDecode)
|
||||
wg.Wait()
|
||||
hist.reset()
|
||||
d.frame.history.b = frameHistCache
|
||||
}
|
||||
|
44
vendor/github.com/klauspost/compress/zstd/decoder_options.go
generated
vendored
44
vendor/github.com/klauspost/compress/zstd/decoder_options.go
generated
vendored
@ -14,20 +14,23 @@ type DOption func(*decoderOptions) error
|
||||
|
||||
// options retains accumulated state of multiple options.
|
||||
type decoderOptions struct {
|
||||
lowMem bool
|
||||
concurrent int
|
||||
maxDecodedSize uint64
|
||||
maxWindowSize uint64
|
||||
dicts []dict
|
||||
ignoreChecksum bool
|
||||
lowMem bool
|
||||
concurrent int
|
||||
maxDecodedSize uint64
|
||||
maxWindowSize uint64
|
||||
dicts []dict
|
||||
ignoreChecksum bool
|
||||
limitToCap bool
|
||||
decodeBufsBelow int
|
||||
}
|
||||
|
||||
func (o *decoderOptions) setDefault() {
|
||||
*o = decoderOptions{
|
||||
// use less ram: true for now, but may change.
|
||||
lowMem: true,
|
||||
concurrent: runtime.GOMAXPROCS(0),
|
||||
maxWindowSize: MaxWindowSize,
|
||||
lowMem: true,
|
||||
concurrent: runtime.GOMAXPROCS(0),
|
||||
maxWindowSize: MaxWindowSize,
|
||||
decodeBufsBelow: 128 << 10,
|
||||
}
|
||||
if o.concurrent > 4 {
|
||||
o.concurrent = 4
|
||||
@ -114,6 +117,29 @@ func WithDecoderMaxWindow(size uint64) DOption {
|
||||
}
|
||||
}
|
||||
|
||||
// WithDecodeAllCapLimit will limit DecodeAll to decoding cap(dst)-len(dst) bytes,
|
||||
// or any size set in WithDecoderMaxMemory.
|
||||
// This can be used to limit decoding to a specific maximum output size.
|
||||
// Disabled by default.
|
||||
func WithDecodeAllCapLimit(b bool) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
o.limitToCap = b
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// WithDecodeBuffersBelow will fully decode readers that have a
|
||||
// `Bytes() []byte` and `Len() int` interface similar to bytes.Buffer.
|
||||
// This typically uses less allocations but will have the full decompressed object in memory.
|
||||
// Note that DecodeAllCapLimit will disable this, as well as giving a size of 0 or less.
|
||||
// Default is 128KiB.
|
||||
func WithDecodeBuffersBelow(size int) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
o.decodeBufsBelow = size
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// IgnoreChecksum allows to forcibly ignore checksum checking.
|
||||
func IgnoreChecksum(b bool) DOption {
|
||||
return func(o *decoderOptions) error {
|
||||
|
1
vendor/github.com/klauspost/compress/zstd/enc_best.go
generated
vendored
1
vendor/github.com/klauspost/compress/zstd/enc_best.go
generated
vendored
@ -32,6 +32,7 @@ type match struct {
|
||||
length int32
|
||||
rep int32
|
||||
est int32
|
||||
_ [12]byte // Aligned size to cache line: 4+4+4+4+4 bytes + 12 bytes padding = 32 bytes
|
||||
}
|
||||
|
||||
const highScore = 25000
|
||||
|
23
vendor/github.com/klauspost/compress/zstd/enc_better.go
generated
vendored
23
vendor/github.com/klauspost/compress/zstd/enc_better.go
generated
vendored
@ -416,15 +416,23 @@ encodeLoop:
|
||||
|
||||
// Try to find a better match by searching for a long match at the end of the current best match
|
||||
if s+matched < sLimit {
|
||||
// Allow some bytes at the beginning to mismatch.
|
||||
// Sweet spot is around 3 bytes, but depends on input.
|
||||
// The skipped bytes are tested in Extend backwards,
|
||||
// and still picked up as part of the match if they do.
|
||||
const skipBeginning = 3
|
||||
|
||||
nextHashL := hashLen(load6432(src, s+matched), betterLongTableBits, betterLongLen)
|
||||
cv := load3232(src, s)
|
||||
s2 := s + skipBeginning
|
||||
cv := load3232(src, s2)
|
||||
candidateL := e.longTable[nextHashL]
|
||||
coffsetL := candidateL.offset - e.cur - matched
|
||||
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
coffsetL := candidateL.offset - e.cur - matched + skipBeginning
|
||||
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
// Found a long match, at least 4 bytes.
|
||||
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
|
||||
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
|
||||
if matchedNext > matched {
|
||||
t = coffsetL
|
||||
s = s2
|
||||
matched = matchedNext
|
||||
if debugMatches {
|
||||
println("long match at end-of-match")
|
||||
@ -434,12 +442,13 @@ encodeLoop:
|
||||
|
||||
// Check prev long...
|
||||
if true {
|
||||
coffsetL = candidateL.prev - e.cur - matched
|
||||
if coffsetL >= 0 && coffsetL < s && s-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
coffsetL = candidateL.prev - e.cur - matched + skipBeginning
|
||||
if coffsetL >= 0 && coffsetL < s2 && s2-coffsetL < e.maxMatchOff && cv == load3232(src, coffsetL) {
|
||||
// Found a long match, at least 4 bytes.
|
||||
matchedNext := e.matchlen(s+4, coffsetL+4, src) + 4
|
||||
matchedNext := e.matchlen(s2+4, coffsetL+4, src) + 4
|
||||
if matchedNext > matched {
|
||||
t = coffsetL
|
||||
s = s2
|
||||
matched = matchedNext
|
||||
if debugMatches {
|
||||
println("prev long match at end-of-match")
|
||||
|
7
vendor/github.com/klauspost/compress/zstd/enc_dfast.go
generated
vendored
7
vendor/github.com/klauspost/compress/zstd/enc_dfast.go
generated
vendored
@ -1103,7 +1103,8 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
}
|
||||
|
||||
if allDirty || dirtyShardCnt > dLongTableShardCnt/2 {
|
||||
copy(e.longTable[:], e.dictLongTable)
|
||||
//copy(e.longTable[:], e.dictLongTable)
|
||||
e.longTable = *(*[dFastLongTableSize]tableEntry)(e.dictLongTable)
|
||||
for i := range e.longTableShardDirty {
|
||||
e.longTableShardDirty[i] = false
|
||||
}
|
||||
@ -1114,7 +1115,9 @@ func (e *doubleFastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
continue
|
||||
}
|
||||
|
||||
copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
|
||||
// copy(e.longTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize], e.dictLongTable[i*dLongTableShardSize:(i+1)*dLongTableShardSize])
|
||||
*(*[dLongTableShardSize]tableEntry)(e.longTable[i*dLongTableShardSize:]) = *(*[dLongTableShardSize]tableEntry)(e.dictLongTable[i*dLongTableShardSize:])
|
||||
|
||||
e.longTableShardDirty[i] = false
|
||||
}
|
||||
}
|
||||
|
8
vendor/github.com/klauspost/compress/zstd/enc_fast.go
generated
vendored
8
vendor/github.com/klauspost/compress/zstd/enc_fast.go
generated
vendored
@ -304,7 +304,7 @@ func (e *fastEncoder) EncodeNoHist(blk *blockEnc, src []byte) {
|
||||
minNonLiteralBlockSize = 1 + 1 + inputMargin
|
||||
)
|
||||
if debugEncoder {
|
||||
if len(src) > maxBlockSize {
|
||||
if len(src) > maxCompressedBlockSize {
|
||||
panic("src too big")
|
||||
}
|
||||
}
|
||||
@ -871,7 +871,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
const shardCnt = tableShardCnt
|
||||
const shardSize = tableShardSize
|
||||
if e.allDirty || dirtyShardCnt > shardCnt*4/6 {
|
||||
copy(e.table[:], e.dictTable)
|
||||
//copy(e.table[:], e.dictTable)
|
||||
e.table = *(*[tableSize]tableEntry)(e.dictTable)
|
||||
for i := range e.tableShardDirty {
|
||||
e.tableShardDirty[i] = false
|
||||
}
|
||||
@ -883,7 +884,8 @@ func (e *fastEncoderDict) Reset(d *dict, singleBlock bool) {
|
||||
continue
|
||||
}
|
||||
|
||||
copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
|
||||
//copy(e.table[i*shardSize:(i+1)*shardSize], e.dictTable[i*shardSize:(i+1)*shardSize])
|
||||
*(*[shardSize]tableEntry)(e.table[i*shardSize:]) = *(*[shardSize]tableEntry)(e.dictTable[i*shardSize:])
|
||||
e.tableShardDirty[i] = false
|
||||
}
|
||||
e.allDirty = false
|
||||
|
4
vendor/github.com/klauspost/compress/zstd/encoder.go
generated
vendored
4
vendor/github.com/klauspost/compress/zstd/encoder.go
generated
vendored
@ -528,8 +528,8 @@ func (e *Encoder) EncodeAll(src, dst []byte) []byte {
|
||||
// If a non-single block is needed the encoder will reset again.
|
||||
e.encoders <- enc
|
||||
}()
|
||||
// Use single segments when above minimum window and below 1MB.
|
||||
single := len(src) < 1<<20 && len(src) > MinWindowSize
|
||||
// Use single segments when above minimum window and below window size.
|
||||
single := len(src) <= e.o.windowSize && len(src) > MinWindowSize
|
||||
if e.o.single != nil {
|
||||
single = *e.o.single
|
||||
}
|
||||
|
2
vendor/github.com/klauspost/compress/zstd/encoder_options.go
generated
vendored
2
vendor/github.com/klauspost/compress/zstd/encoder_options.go
generated
vendored
@ -283,7 +283,7 @@ func WithNoEntropyCompression(b bool) EOption {
|
||||
// a decoder is allowed to reject a compressed frame which requests a memory size beyond decoder's authorized range.
|
||||
// For broader compatibility, decoders are recommended to support memory sizes of at least 8 MB.
|
||||
// This is only a recommendation, each decoder is free to support higher or lower limits, depending on local limitations.
|
||||
// If this is not specified, block encodes will automatically choose this based on the input size.
|
||||
// If this is not specified, block encodes will automatically choose this based on the input size and the window size.
|
||||
// This setting has no effect on streamed encodes.
|
||||
func WithSingleSegment(b bool) EOption {
|
||||
return func(o *encoderOptions) error {
|
||||
|
57
vendor/github.com/klauspost/compress/zstd/framedec.go
generated
vendored
57
vendor/github.com/klauspost/compress/zstd/framedec.go
generated
vendored
@ -106,7 +106,7 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
}
|
||||
n := uint32(b[0]) | (uint32(b[1]) << 8) | (uint32(b[2]) << 16) | (uint32(b[3]) << 24)
|
||||
println("Skipping frame with", n, "bytes.")
|
||||
err = br.skipN(int(n))
|
||||
err = br.skipN(int64(n))
|
||||
if err != nil {
|
||||
if debugDecoder {
|
||||
println("Reading discarded frame", err)
|
||||
@ -231,20 +231,27 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
d.crc.Reset()
|
||||
}
|
||||
|
||||
if d.WindowSize > d.o.maxWindowSize {
|
||||
if debugDecoder {
|
||||
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||
}
|
||||
return ErrWindowSizeExceeded
|
||||
}
|
||||
|
||||
if d.WindowSize == 0 && d.SingleSegment {
|
||||
// We may not need window in this case.
|
||||
d.WindowSize = d.FrameContentSize
|
||||
if d.WindowSize < MinWindowSize {
|
||||
d.WindowSize = MinWindowSize
|
||||
}
|
||||
if d.WindowSize > d.o.maxDecodedSize {
|
||||
if debugDecoder {
|
||||
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||
}
|
||||
return ErrDecoderSizeExceeded
|
||||
}
|
||||
}
|
||||
|
||||
if d.WindowSize > uint64(d.o.maxWindowSize) {
|
||||
if debugDecoder {
|
||||
printf("window size %d > max %d\n", d.WindowSize, d.o.maxWindowSize)
|
||||
}
|
||||
return ErrWindowSizeExceeded
|
||||
}
|
||||
// The minimum Window_Size is 1 KB.
|
||||
if d.WindowSize < MinWindowSize {
|
||||
if debugDecoder {
|
||||
@ -254,11 +261,16 @@ func (d *frameDec) reset(br byteBuffer) error {
|
||||
}
|
||||
d.history.windowSize = int(d.WindowSize)
|
||||
if !d.o.lowMem || d.history.windowSize < maxBlockSize {
|
||||
// Alloc 2x window size if not low-mem, or very small window size.
|
||||
// Alloc 2x window size if not low-mem, or window size below 2MB.
|
||||
d.history.allocFrameBuffer = d.history.windowSize * 2
|
||||
} else {
|
||||
// Alloc with one additional block
|
||||
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
|
||||
if d.o.lowMem {
|
||||
// Alloc with 1MB extra.
|
||||
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize/2
|
||||
} else {
|
||||
// Alloc with 2MB extra.
|
||||
d.history.allocFrameBuffer = d.history.windowSize + maxBlockSize
|
||||
}
|
||||
}
|
||||
|
||||
if debugDecoder {
|
||||
@ -336,7 +348,7 @@ func (d *frameDec) consumeCRC() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// runDecoder will create a sync decoder that will decode a block of data.
|
||||
// runDecoder will run the decoder for the remainder of the frame.
|
||||
func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||
saved := d.history.b
|
||||
|
||||
@ -346,12 +358,23 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||
// Store input length, so we only check new data.
|
||||
crcStart := len(dst)
|
||||
d.history.decoders.maxSyncLen = 0
|
||||
if d.o.limitToCap {
|
||||
d.history.decoders.maxSyncLen = uint64(cap(dst) - len(dst))
|
||||
}
|
||||
if d.FrameContentSize != fcsUnknown {
|
||||
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
|
||||
if !d.o.limitToCap || d.FrameContentSize+uint64(len(dst)) < d.history.decoders.maxSyncLen {
|
||||
d.history.decoders.maxSyncLen = d.FrameContentSize + uint64(len(dst))
|
||||
}
|
||||
if d.history.decoders.maxSyncLen > d.o.maxDecodedSize {
|
||||
if debugDecoder {
|
||||
println("maxSyncLen:", d.history.decoders.maxSyncLen, "> maxDecodedSize:", d.o.maxDecodedSize)
|
||||
}
|
||||
return dst, ErrDecoderSizeExceeded
|
||||
}
|
||||
if uint64(cap(dst)) < d.history.decoders.maxSyncLen {
|
||||
if debugDecoder {
|
||||
println("maxSyncLen:", d.history.decoders.maxSyncLen)
|
||||
}
|
||||
if !d.o.limitToCap && uint64(cap(dst)) < d.history.decoders.maxSyncLen {
|
||||
// Alloc for output
|
||||
dst2 := make([]byte, len(dst), d.history.decoders.maxSyncLen+compressedBlockOverAlloc)
|
||||
copy(dst2, dst)
|
||||
@ -371,7 +394,13 @@ func (d *frameDec) runDecoder(dst []byte, dec *blockDec) ([]byte, error) {
|
||||
if err != nil {
|
||||
break
|
||||
}
|
||||
if uint64(len(d.history.b)) > d.o.maxDecodedSize {
|
||||
if uint64(len(d.history.b)-crcStart) > d.o.maxDecodedSize {
|
||||
println("runDecoder: maxDecodedSize exceeded", uint64(len(d.history.b)-crcStart), ">", d.o.maxDecodedSize)
|
||||
err = ErrDecoderSizeExceeded
|
||||
break
|
||||
}
|
||||
if d.o.limitToCap && len(d.history.b) > cap(dst) {
|
||||
println("runDecoder: cap exceeded", uint64(len(d.history.b)), ">", cap(dst))
|
||||
err = ErrDecoderSizeExceeded
|
||||
break
|
||||
}
|
||||
|
7
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
generated
vendored
7
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.go
generated
vendored
@ -21,7 +21,8 @@ type buildDtableAsmContext struct {
|
||||
|
||||
// buildDtable_asm is an x86 assembly implementation of fseDecoder.buildDtable.
|
||||
// Function returns non-zero exit code on error.
|
||||
// go:noescape
|
||||
//
|
||||
//go:noescape
|
||||
func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
|
||||
|
||||
// please keep in sync with _generate/gen_fse.go
|
||||
@ -34,8 +35,8 @@ const (
|
||||
// buildDtable will build the decoding table.
|
||||
func (s *fseDecoder) buildDtable() error {
|
||||
ctx := buildDtableAsmContext{
|
||||
stateTable: (*uint16)(&s.stateTable[0]),
|
||||
norm: (*int16)(&s.norm[0]),
|
||||
stateTable: &s.stateTable[0],
|
||||
norm: &s.norm[0],
|
||||
dt: (*uint64)(&s.dt[0]),
|
||||
}
|
||||
code := buildDtable_asm(s, &ctx)
|
||||
|
1
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
generated
vendored
1
vendor/github.com/klauspost/compress/zstd/fse_decoder_amd64.s
generated
vendored
@ -1,7 +1,6 @@
|
||||
// Code generated by command: go run gen_fse.go -out ../fse_decoder_amd64.s -pkg=zstd. DO NOT EDIT.
|
||||
|
||||
//go:build !appengine && !noasm && gc && !noasm
|
||||
// +build !appengine,!noasm,gc,!noasm
|
||||
|
||||
// func buildDtable_asm(s *fseDecoder, ctx *buildDtableAsmContext) int
|
||||
TEXT ·buildDtable_asm(SB), $0-24
|
||||
|
25
vendor/github.com/klauspost/compress/zstd/history.go
generated
vendored
25
vendor/github.com/klauspost/compress/zstd/history.go
generated
vendored
@ -37,26 +37,23 @@ func (h *history) reset() {
|
||||
h.ignoreBuffer = 0
|
||||
h.error = false
|
||||
h.recentOffsets = [3]int{1, 4, 8}
|
||||
if f := h.decoders.litLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
}
|
||||
if f := h.decoders.offsets.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
}
|
||||
if f := h.decoders.matchLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
}
|
||||
h.decoders.freeDecoders()
|
||||
h.decoders = sequenceDecs{br: h.decoders.br}
|
||||
if h.huffTree != nil {
|
||||
if h.dict == nil || h.dict.litEnc != h.huffTree {
|
||||
huffDecoderPool.Put(h.huffTree)
|
||||
}
|
||||
}
|
||||
h.freeHuffDecoder()
|
||||
h.huffTree = nil
|
||||
h.dict = nil
|
||||
//printf("history created: %+v (l: %d, c: %d)", *h, len(h.b), cap(h.b))
|
||||
}
|
||||
|
||||
func (h *history) freeHuffDecoder() {
|
||||
if h.huffTree != nil {
|
||||
if h.dict == nil || h.dict.litEnc != h.huffTree {
|
||||
huffDecoderPool.Put(h.huffTree)
|
||||
h.huffTree = nil
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (h *history) setDict(dict *dict) {
|
||||
if dict == nil {
|
||||
return
|
||||
|
22
vendor/github.com/klauspost/compress/zstd/seqdec.go
generated
vendored
22
vendor/github.com/klauspost/compress/zstd/seqdec.go
generated
vendored
@ -99,6 +99,21 @@ func (s *sequenceDecs) initialize(br *bitReader, hist *history, out []byte) erro
|
||||
return nil
|
||||
}
|
||||
|
||||
func (s *sequenceDecs) freeDecoders() {
|
||||
if f := s.litLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
s.litLengths.fse = nil
|
||||
}
|
||||
if f := s.offsets.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
s.offsets.fse = nil
|
||||
}
|
||||
if f := s.matchLengths.fse; f != nil && !f.preDefined {
|
||||
fseDecoderPool.Put(f)
|
||||
s.matchLengths.fse = nil
|
||||
}
|
||||
}
|
||||
|
||||
// execute will execute the decoded sequence with the provided history.
|
||||
// The sequence must be evaluated before being sent.
|
||||
func (s *sequenceDecs) execute(seqs []seqVals, hist []byte) error {
|
||||
@ -299,7 +314,10 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
|
||||
}
|
||||
size := ll + ml + len(out)
|
||||
if size-startSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
|
||||
if size-startSize == 424242 {
|
||||
panic("here")
|
||||
}
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
if size > cap(out) {
|
||||
// Not enough size, which can happen under high volume block streaming conditions
|
||||
@ -411,7 +429,7 @@ func (s *sequenceDecs) decodeSync(hist []byte) error {
|
||||
|
||||
// Check if space for literals
|
||||
if size := len(s.literals) + len(s.out) - startSize; size > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", size, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
|
||||
// Add final literals
|
||||
|
43
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
generated
vendored
43
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.go
generated
vendored
@ -32,18 +32,22 @@ type decodeSyncAsmContext struct {
|
||||
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
|
||||
|
||||
@ -55,16 +59,22 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
|
||||
return false, nil
|
||||
}
|
||||
useSafe := false
|
||||
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
|
||||
useSafe = true
|
||||
}
|
||||
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
|
||||
// FIXME: Using unsafe memory copies leads to rare, random crashes
|
||||
// with fuzz testing. It is therefore disabled for now.
|
||||
const useSafe = true
|
||||
/*
|
||||
useSafe := false
|
||||
if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
|
||||
useSafe = true
|
||||
}
|
||||
if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
|
||||
useSafe = true
|
||||
}
|
||||
*/
|
||||
|
||||
br := s.br
|
||||
|
||||
@ -129,7 +139,7 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||
if debugDecoder {
|
||||
println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
|
||||
}
|
||||
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", size-startSize, maxBlockSize)
|
||||
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
|
||||
default:
|
||||
return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
|
||||
@ -137,7 +147,8 @@ func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
|
||||
|
||||
s.seqSize += ctx.litRemain
|
||||
if s.seqSize > maxBlockSize {
|
||||
return true, fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
|
||||
}
|
||||
err := br.close()
|
||||
if err != nil {
|
||||
@ -195,20 +206,24 @@ const errorNotEnoughSpace = 5
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
|
||||
|
||||
@ -275,7 +290,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||
|
||||
s.seqSize += ctx.litRemain
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
err := br.close()
|
||||
if err != nil {
|
||||
@ -302,10 +317,12 @@ type executeAsmContext struct {
|
||||
// Returns false if a match offset is too big.
|
||||
//
|
||||
// Please refer to seqdec_generic.go for the reference implementation.
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
|
||||
|
||||
// Same as above, but with safe memcopies
|
||||
//
|
||||
//go:noescape
|
||||
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
|
||||
|
||||
|
2384
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
2384
vendor/github.com/klauspost/compress/zstd/seqdec_amd64.s
generated
vendored
File diff suppressed because it is too large
Load Diff
4
vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
generated
vendored
4
vendor/github.com/klauspost/compress/zstd/seqdec_generic.go
generated
vendored
@ -111,7 +111,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||
}
|
||||
s.seqSize += ll + ml
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
litRemain -= ll
|
||||
if litRemain < 0 {
|
||||
@ -149,7 +149,7 @@ func (s *sequenceDecs) decode(seqs []seqVals) error {
|
||||
}
|
||||
s.seqSize += litRemain
|
||||
if s.seqSize > maxBlockSize {
|
||||
return fmt.Errorf("output (%d) bigger than max block size (%d)", s.seqSize, maxBlockSize)
|
||||
return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
|
||||
}
|
||||
err := br.close()
|
||||
if err != nil {
|
||||
|
Reference in New Issue
Block a user