mirror of
https://gitea.com/Lydanne/buildx.git
synced 2025-07-09 21:17:09 +08:00
vendor: update buildkit to 3e38a2d
Signed-off-by: CrazyMax <crazy-max@users.noreply.github.com>
This commit is contained in:
5
vendor/github.com/klauspost/compress/huff0/autogen.go
generated
vendored
Normal file
5
vendor/github.com/klauspost/compress/huff0/autogen.go
generated
vendored
Normal file
@ -0,0 +1,5 @@
|
||||
package huff0
|
||||
|
||||
//go:generate go run generate.go
|
||||
//go:generate asmfmt -w decompress_amd64.s
|
||||
//go:generate asmfmt -w decompress_8b_amd64.s
|
5
vendor/github.com/klauspost/compress/huff0/bitreader.go
generated
vendored
5
vendor/github.com/klauspost/compress/huff0/bitreader.go
generated
vendored
@ -165,6 +165,11 @@ func (b *bitReaderShifted) peekBitsFast(n uint8) uint16 {
|
||||
return uint16(b.value >> ((64 - n) & 63))
|
||||
}
|
||||
|
||||
// peekTopBits(n) is equvialent to peekBitFast(64 - n)
|
||||
func (b *bitReaderShifted) peekTopBits(n uint8) uint16 {
|
||||
return uint16(b.value >> n)
|
||||
}
|
||||
|
||||
func (b *bitReaderShifted) advance(n uint8) {
|
||||
b.bitsRead += n
|
||||
b.value <<= n & 63
|
||||
|
183
vendor/github.com/klauspost/compress/huff0/decompress.go
generated
vendored
183
vendor/github.com/klauspost/compress/huff0/decompress.go
generated
vendored
@ -725,189 +725,6 @@ func (d *Decoder) decompress1X8BitExactly(dst, src []byte) ([]byte, error) {
|
||||
return dst, br.close()
|
||||
}
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
// the uncompressed data exactly.
|
||||
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
if len(d.dt.single) == 0 {
|
||||
return nil, errors.New("no table loaded")
|
||||
}
|
||||
if len(src) < 6+(4*1) {
|
||||
return nil, errors.New("input too small")
|
||||
}
|
||||
if use8BitTables && d.actualTableLog <= 8 {
|
||||
return d.decompress4X8bit(dst, src)
|
||||
}
|
||||
|
||||
var br [4]bitReaderShifted
|
||||
// Decode "jump table"
|
||||
start := 6
|
||||
for i := 0; i < 3; i++ {
|
||||
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||
if start+length >= len(src) {
|
||||
return nil, errors.New("truncated input (or invalid offset)")
|
||||
}
|
||||
err := br[i].init(src[start : start+length])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
start += length
|
||||
}
|
||||
err := br[3].init(src[start:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// destination, offset to match first output
|
||||
dstSize := cap(dst)
|
||||
dst = dst[:dstSize]
|
||||
out := dst
|
||||
dstEvery := (dstSize + 3) / 4
|
||||
|
||||
const tlSize = 1 << tableLogMax
|
||||
const tlMask = tlSize - 1
|
||||
single := d.dt.single[:tlSize]
|
||||
|
||||
// Use temp table to avoid bound checks/append penalty.
|
||||
buf := d.buffer()
|
||||
var off uint8
|
||||
var decoded int
|
||||
|
||||
// Decode 2 values from each decoder/loop.
|
||||
const bufoff = 256
|
||||
for {
|
||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||
break
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 0
|
||||
const stream2 = 1
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 2
|
||||
const stream2 = 3
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
off += 2
|
||||
|
||||
if off == 0 {
|
||||
if bufoff > dstEvery {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
ioff := int(off)
|
||||
if len(out) < dstEvery*3+ioff {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 3")
|
||||
}
|
||||
copy(out, buf[0][:off])
|
||||
copy(out[dstEvery:], buf[1][:off])
|
||||
copy(out[dstEvery*2:], buf[2][:off])
|
||||
copy(out[dstEvery*3:], buf[3][:off])
|
||||
decoded += int(off) * 4
|
||||
out = out[off:]
|
||||
}
|
||||
|
||||
// Decode remaining.
|
||||
remainBytes := dstEvery - (decoded / 4)
|
||||
for i := range br {
|
||||
offset := dstEvery * i
|
||||
endsAt := offset + remainBytes
|
||||
if endsAt > len(out) {
|
||||
endsAt = len(out)
|
||||
}
|
||||
br := &br[i]
|
||||
bitsLeft := br.remaining()
|
||||
for bitsLeft > 0 {
|
||||
br.fill()
|
||||
if offset >= endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 4")
|
||||
}
|
||||
|
||||
// Read value and increment offset.
|
||||
val := br.peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask].entry
|
||||
nBits := uint8(v)
|
||||
br.advance(nBits)
|
||||
bitsLeft -= uint(nBits)
|
||||
out[offset] = uint8(v >> 8)
|
||||
offset++
|
||||
}
|
||||
if offset != endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||
}
|
||||
decoded += offset - dstEvery*i
|
||||
err = br.close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
d.bufs.Put(buf)
|
||||
if dstSize != decoded {
|
||||
return nil, errors.New("corruption detected: short output block")
|
||||
}
|
||||
return dst, nil
|
||||
}
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
|
488
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s
generated
vendored
Normal file
488
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s
generated
vendored
Normal file
@ -0,0 +1,488 @@
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
|
||||
// const stream = 0
|
||||
// br0.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br0), br_value
|
||||
MOVQ bitReaderShifted_off(br0), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill0
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br0), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br0.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill0:
|
||||
|
||||
// val0 := br0.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br0.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br0.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br0.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 0(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br0.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br0.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br0.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br0.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 0+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
|
||||
MOVQ br_value, bitReaderShifted_value(br0)
|
||||
MOVQ br_offset, bitReaderShifted_off(br0)
|
||||
|
||||
// const stream = 1
|
||||
// br1.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br1), br_value
|
||||
MOVQ bitReaderShifted_off(br1), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill1
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br1), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br1.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill1:
|
||||
|
||||
// val0 := br1.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br1.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br1.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br1.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 256(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br1.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br1.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br1.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br1.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 256+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
|
||||
MOVQ br_value, bitReaderShifted_value(br1)
|
||||
MOVQ br_offset, bitReaderShifted_off(br1)
|
||||
|
||||
// const stream = 2
|
||||
// br2.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br2), br_value
|
||||
MOVQ bitReaderShifted_off(br2), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill2
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br2), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br2.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill2:
|
||||
|
||||
// val0 := br2.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br2.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br2.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br2.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 512(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br2.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br2.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br2.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br2.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 512+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
|
||||
MOVQ br_value, bitReaderShifted_value(br2)
|
||||
MOVQ br_offset, bitReaderShifted_off(br2)
|
||||
|
||||
// const stream = 3
|
||||
// br3.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br3), br_value
|
||||
MOVQ bitReaderShifted_off(br3), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill3
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br3), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br3.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill3:
|
||||
|
||||
// val0 := br3.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br3.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br3.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br3.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 768(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br3.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br3.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br3.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br3.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, 768+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
|
||||
MOVQ br_value, bitReaderShifted_value(br3)
|
||||
MOVQ br_offset, bitReaderShifted_off(br3)
|
||||
|
||||
ADDQ $4, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
197
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
generated
vendored
Normal file
197
vendor/github.com/klauspost/compress/huff0/decompress_8b_amd64.s.in
generated
vendored
Normal file
@ -0,0 +1,197 @@
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
{{ define "decode_2_values_x86" }}
|
||||
// const stream = {{ var "id" }}
|
||||
// br{{ var "id"}}.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
||||
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
||||
|
||||
// if b.bitsRead >= 32 {
|
||||
CMPQ br_bits_read, $32
|
||||
JB skip_fill{{ var "id" }}
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
// }
|
||||
skip_fill{{ var "id" }}:
|
||||
|
||||
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
||||
|
||||
// SECOND PART:
|
||||
// val2 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v2 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// val3 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
// v3 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CX, br_value // value <<= n
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off+2] = uint8(v2.entry >> 8)
|
||||
// buf[stream][off+3] = uint8(v3.entry >> 8)
|
||||
MOVW BX, {{ var "bufofs" }}+2(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
||||
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
||||
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
||||
{{ end }}
|
||||
|
||||
{{ set "id" "0" }}
|
||||
{{ set "ofs" "0" }}
|
||||
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "1" }}
|
||||
{{ set "ofs" "8" }}
|
||||
{{ set "bufofs" "256" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "2" }}
|
||||
{{ set "ofs" "16" }}
|
||||
{{ set "bufofs" "512" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "3" }}
|
||||
{{ set "ofs" "24" }}
|
||||
{{ set "bufofs" "768" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
ADDQ $4, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
181
vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
generated
vendored
Normal file
181
vendor/github.com/klauspost/compress/huff0/decompress_amd64.go
generated
vendored
Normal file
@ -0,0 +1,181 @@
|
||||
//go:build amd64 && !appengine && !noasm && gc
|
||||
// +build amd64,!appengine,!noasm,gc
|
||||
|
||||
// This file contains the specialisation of Decoder.Decompress4X
|
||||
// that uses an asm implementation of its main loop.
|
||||
package huff0
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// decompress4x_main_loop_x86 is an x86 assembler implementation
|
||||
// of Decompress4X when tablelog > 8.
|
||||
// go:noescape
|
||||
func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
||||
|
||||
// decompress4x_8b_loop_x86 is an x86 assembler implementation
|
||||
// of Decompress4X when tablelog <= 8 which decodes 4 entries
|
||||
// per loop.
|
||||
// go:noescape
|
||||
func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
|
||||
|
||||
// fallback8BitSize is the size where using Go version is faster.
|
||||
const fallback8BitSize = 800
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
// the uncompressed data exactly.
|
||||
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
if len(d.dt.single) == 0 {
|
||||
return nil, errors.New("no table loaded")
|
||||
}
|
||||
if len(src) < 6+(4*1) {
|
||||
return nil, errors.New("input too small")
|
||||
}
|
||||
|
||||
use8BitTables := d.actualTableLog <= 8
|
||||
if cap(dst) < fallback8BitSize && use8BitTables {
|
||||
return d.decompress4X8bit(dst, src)
|
||||
}
|
||||
var br [4]bitReaderShifted
|
||||
// Decode "jump table"
|
||||
start := 6
|
||||
for i := 0; i < 3; i++ {
|
||||
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||
if start+length >= len(src) {
|
||||
return nil, errors.New("truncated input (or invalid offset)")
|
||||
}
|
||||
err := br[i].init(src[start : start+length])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
start += length
|
||||
}
|
||||
err := br[3].init(src[start:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// destination, offset to match first output
|
||||
dstSize := cap(dst)
|
||||
dst = dst[:dstSize]
|
||||
out := dst
|
||||
dstEvery := (dstSize + 3) / 4
|
||||
|
||||
const tlSize = 1 << tableLogMax
|
||||
const tlMask = tlSize - 1
|
||||
single := d.dt.single[:tlSize]
|
||||
|
||||
// Use temp table to avoid bound checks/append penalty.
|
||||
buf := d.buffer()
|
||||
var off uint8
|
||||
var decoded int
|
||||
|
||||
const debug = false
|
||||
|
||||
// see: bitReaderShifted.peekBitsFast()
|
||||
peekBits := uint8((64 - d.actualTableLog) & 63)
|
||||
|
||||
// Decode 2 values from each decoder/loop.
|
||||
const bufoff = 256
|
||||
for {
|
||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||
break
|
||||
}
|
||||
|
||||
if use8BitTables {
|
||||
off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
||||
} else {
|
||||
off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
|
||||
}
|
||||
if debug {
|
||||
fmt.Print("DEBUG: ")
|
||||
fmt.Printf("off=%d,", off)
|
||||
for i := 0; i < 4; i++ {
|
||||
fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}",
|
||||
i, br[i].bitsRead, br[i].value, br[i].off)
|
||||
}
|
||||
fmt.Println("")
|
||||
}
|
||||
|
||||
if off != 0 {
|
||||
break
|
||||
}
|
||||
|
||||
if bufoff > dstEvery {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
ioff := int(off)
|
||||
if len(out) < dstEvery*3+ioff {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 3")
|
||||
}
|
||||
copy(out, buf[0][:off])
|
||||
copy(out[dstEvery:], buf[1][:off])
|
||||
copy(out[dstEvery*2:], buf[2][:off])
|
||||
copy(out[dstEvery*3:], buf[3][:off])
|
||||
decoded += int(off) * 4
|
||||
out = out[off:]
|
||||
}
|
||||
|
||||
// Decode remaining.
|
||||
remainBytes := dstEvery - (decoded / 4)
|
||||
for i := range br {
|
||||
offset := dstEvery * i
|
||||
endsAt := offset + remainBytes
|
||||
if endsAt > len(out) {
|
||||
endsAt = len(out)
|
||||
}
|
||||
br := &br[i]
|
||||
bitsLeft := br.remaining()
|
||||
for bitsLeft > 0 {
|
||||
br.fill()
|
||||
if offset >= endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 4")
|
||||
}
|
||||
|
||||
// Read value and increment offset.
|
||||
val := br.peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask].entry
|
||||
nBits := uint8(v)
|
||||
br.advance(nBits)
|
||||
bitsLeft -= uint(nBits)
|
||||
out[offset] = uint8(v >> 8)
|
||||
offset++
|
||||
}
|
||||
if offset != endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||
}
|
||||
decoded += offset - dstEvery*i
|
||||
err = br.close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
d.bufs.Put(buf)
|
||||
if dstSize != decoded {
|
||||
return nil, errors.New("corruption detected: short output block")
|
||||
}
|
||||
return dst, nil
|
||||
}
|
506
vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
generated
vendored
Normal file
506
vendor/github.com/klauspost/compress/huff0/decompress_amd64.s
generated
vendored
Normal file
@ -0,0 +1,506 @@
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
#ifdef GOAMD64_v4
|
||||
#ifndef GOAMD64_v3
|
||||
#define GOAMD64_v3
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
|
||||
// const stream = 0
|
||||
// br0.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br0), br_value
|
||||
MOVQ bitReaderShifted_off(br0), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill0
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br0), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br0.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill0:
|
||||
|
||||
// val0 := br0.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br0.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br0.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br0.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 0(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
|
||||
MOVQ br_value, bitReaderShifted_value(br0)
|
||||
MOVQ br_offset, bitReaderShifted_off(br0)
|
||||
|
||||
// const stream = 1
|
||||
// br1.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br1), br_value
|
||||
MOVQ bitReaderShifted_off(br1), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill1
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br1), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br1.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill1:
|
||||
|
||||
// val0 := br1.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br1.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br1.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br1.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 256(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
|
||||
MOVQ br_value, bitReaderShifted_value(br1)
|
||||
MOVQ br_offset, bitReaderShifted_off(br1)
|
||||
|
||||
// const stream = 2
|
||||
// br2.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br2), br_value
|
||||
MOVQ bitReaderShifted_off(br2), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill2
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br2), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br2.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill2:
|
||||
|
||||
// val0 := br2.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br2.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br2.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br2.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 512(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
|
||||
MOVQ br_value, bitReaderShifted_value(br2)
|
||||
MOVQ br_offset, bitReaderShifted_off(br2)
|
||||
|
||||
// const stream = 3
|
||||
// br3.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br3), br_value
|
||||
MOVQ bitReaderShifted_off(br3), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill3
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br3), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br3.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
|
||||
// }
|
||||
skip_fill3:
|
||||
|
||||
// val0 := br3.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br3.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#else
|
||||
// val1 := br3.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br3.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, 768(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
|
||||
MOVQ br_value, bitReaderShifted_value(br3)
|
||||
MOVQ br_offset, bitReaderShifted_off(br3)
|
||||
|
||||
ADDQ $2, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
195
vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in
generated
vendored
Normal file
195
vendor/github.com/klauspost/compress/huff0/decompress_amd64.s.in
generated
vendored
Normal file
@ -0,0 +1,195 @@
|
||||
// +build !appengine
|
||||
// +build gc
|
||||
// +build !noasm
|
||||
|
||||
#include "textflag.h"
|
||||
#include "funcdata.h"
|
||||
#include "go_asm.h"
|
||||
|
||||
#ifdef GOAMD64_v4
|
||||
#ifndef GOAMD64_v3
|
||||
#define GOAMD64_v3
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#define bufoff 256 // see decompress.go, we're using [4][256]byte table
|
||||
|
||||
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
|
||||
// peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
|
||||
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
|
||||
#define off R8
|
||||
#define buffer DI
|
||||
#define table SI
|
||||
|
||||
#define br_bits_read R9
|
||||
#define br_value R10
|
||||
#define br_offset R11
|
||||
#define peek_bits R12
|
||||
#define exhausted DX
|
||||
|
||||
#define br0 R13
|
||||
#define br1 R14
|
||||
#define br2 R15
|
||||
#define br3 BP
|
||||
|
||||
MOVQ BP, 0(SP)
|
||||
|
||||
XORQ exhausted, exhausted // exhausted = false
|
||||
XORQ off, off // off = 0
|
||||
|
||||
MOVBQZX peekBits+32(FP), peek_bits
|
||||
MOVQ buf+40(FP), buffer
|
||||
MOVQ tbl+48(FP), table
|
||||
|
||||
MOVQ pbr0+0(FP), br0
|
||||
MOVQ pbr1+8(FP), br1
|
||||
MOVQ pbr2+16(FP), br2
|
||||
MOVQ pbr3+24(FP), br3
|
||||
|
||||
main_loop:
|
||||
{{ define "decode_2_values_x86" }}
|
||||
// const stream = {{ var "id" }}
|
||||
// br{{ var "id"}}.fillFast()
|
||||
MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
|
||||
MOVQ bitReaderShifted_value(br{{ var "id" }}), br_value
|
||||
MOVQ bitReaderShifted_off(br{{ var "id" }}), br_offset
|
||||
|
||||
// We must have at least 2 * max tablelog left
|
||||
CMPQ br_bits_read, $64-22
|
||||
JBE skip_fill{{ var "id" }}
|
||||
|
||||
SUBQ $32, br_bits_read // b.bitsRead -= 32
|
||||
SUBQ $4, br_offset // b.off -= 4
|
||||
|
||||
// v := b.in[b.off-4 : b.off]
|
||||
// v = v[:4]
|
||||
// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
|
||||
MOVQ bitReaderShifted_in(br{{ var "id" }}), AX
|
||||
|
||||
// b.value |= uint64(low) << (b.bitsRead & 63)
|
||||
#ifdef GOAMD64_v3
|
||||
SHLXQ br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
|
||||
#else
|
||||
MOVL 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4])
|
||||
MOVQ br_bits_read, CX
|
||||
SHLQ CL, AX
|
||||
#endif
|
||||
|
||||
ORQ AX, br_value
|
||||
|
||||
// exhausted = exhausted || (br{{ var "id"}}.off < 4)
|
||||
CMPQ br_offset, $4
|
||||
SETLT DL
|
||||
ORB DL, DH
|
||||
// }
|
||||
skip_fill{{ var "id" }}:
|
||||
|
||||
// val0 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
#else
|
||||
MOVQ br_value, AX
|
||||
MOVQ peek_bits, CX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
#endif
|
||||
|
||||
// v0 := table[val0&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v0
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v0.entry))
|
||||
MOVB AH, BL // BL = uint8(v0.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
SHRXQ peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
|
||||
#else
|
||||
// val1 := br{{ var "id"}}.peekTopBits(peekBits)
|
||||
MOVQ peek_bits, CX
|
||||
MOVQ br_value, AX
|
||||
SHRQ CL, AX // AX = (value >> peek_bits) & mask
|
||||
#endif
|
||||
|
||||
// v1 := table[val1&mask]
|
||||
MOVW 0(table)(AX*2), AX // AX - v1
|
||||
|
||||
// br{{ var "id"}}.advance(uint8(v1.entry))
|
||||
MOVB AH, BH // BH = uint8(v1.entry >> 8)
|
||||
|
||||
#ifdef GOAMD64_v3
|
||||
MOVBQZX AL, CX
|
||||
SHLXQ AX, br_value, br_value // value <<= n
|
||||
#else
|
||||
MOVBQZX AL, CX
|
||||
SHLQ CL, br_value // value <<= n
|
||||
#endif
|
||||
|
||||
ADDQ CX, br_bits_read // bits_read += n
|
||||
|
||||
|
||||
// these two writes get coalesced
|
||||
// buf[stream][off] = uint8(v0.entry >> 8)
|
||||
// buf[stream][off+1] = uint8(v1.entry >> 8)
|
||||
MOVW BX, {{ var "bufofs" }}(buffer)(off*1)
|
||||
|
||||
// update the bitrader reader structure
|
||||
MOVB br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
|
||||
MOVQ br_value, bitReaderShifted_value(br{{ var "id" }})
|
||||
MOVQ br_offset, bitReaderShifted_off(br{{ var "id" }})
|
||||
{{ end }}
|
||||
|
||||
{{ set "id" "0" }}
|
||||
{{ set "ofs" "0" }}
|
||||
{{ set "bufofs" "0" }} {{/* id * bufoff */}}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "1" }}
|
||||
{{ set "ofs" "8" }}
|
||||
{{ set "bufofs" "256" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "2" }}
|
||||
{{ set "ofs" "16" }}
|
||||
{{ set "bufofs" "512" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
{{ set "id" "3" }}
|
||||
{{ set "ofs" "24" }}
|
||||
{{ set "bufofs" "768" }}
|
||||
{{ template "decode_2_values_x86" . }}
|
||||
|
||||
ADDQ $2, off // off += 2
|
||||
|
||||
TESTB DH, DH // any br[i].ofs < 4?
|
||||
JNZ end
|
||||
|
||||
CMPQ off, $bufoff
|
||||
JL main_loop
|
||||
end:
|
||||
MOVQ 0(SP), BP
|
||||
|
||||
MOVB off, ret+56(FP)
|
||||
RET
|
||||
#undef off
|
||||
#undef buffer
|
||||
#undef table
|
||||
|
||||
#undef br_bits_read
|
||||
#undef br_value
|
||||
#undef br_offset
|
||||
#undef peek_bits
|
||||
#undef exhausted
|
||||
|
||||
#undef br0
|
||||
#undef br1
|
||||
#undef br2
|
||||
#undef br3
|
193
vendor/github.com/klauspost/compress/huff0/decompress_generic.go
generated
vendored
Normal file
193
vendor/github.com/klauspost/compress/huff0/decompress_generic.go
generated
vendored
Normal file
@ -0,0 +1,193 @@
|
||||
//go:build !amd64 || appengine || !gc || noasm
|
||||
// +build !amd64 appengine !gc noasm
|
||||
|
||||
// This file contains a generic implementation of Decoder.Decompress4X.
|
||||
package huff0
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
)
|
||||
|
||||
// Decompress4X will decompress a 4X encoded stream.
|
||||
// The length of the supplied input must match the end of a block exactly.
|
||||
// The *capacity* of the dst slice must match the destination size of
|
||||
// the uncompressed data exactly.
|
||||
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
|
||||
if len(d.dt.single) == 0 {
|
||||
return nil, errors.New("no table loaded")
|
||||
}
|
||||
if len(src) < 6+(4*1) {
|
||||
return nil, errors.New("input too small")
|
||||
}
|
||||
if use8BitTables && d.actualTableLog <= 8 {
|
||||
return d.decompress4X8bit(dst, src)
|
||||
}
|
||||
|
||||
var br [4]bitReaderShifted
|
||||
// Decode "jump table"
|
||||
start := 6
|
||||
for i := 0; i < 3; i++ {
|
||||
length := int(src[i*2]) | (int(src[i*2+1]) << 8)
|
||||
if start+length >= len(src) {
|
||||
return nil, errors.New("truncated input (or invalid offset)")
|
||||
}
|
||||
err := br[i].init(src[start : start+length])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
start += length
|
||||
}
|
||||
err := br[3].init(src[start:])
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// destination, offset to match first output
|
||||
dstSize := cap(dst)
|
||||
dst = dst[:dstSize]
|
||||
out := dst
|
||||
dstEvery := (dstSize + 3) / 4
|
||||
|
||||
const tlSize = 1 << tableLogMax
|
||||
const tlMask = tlSize - 1
|
||||
single := d.dt.single[:tlSize]
|
||||
|
||||
// Use temp table to avoid bound checks/append penalty.
|
||||
buf := d.buffer()
|
||||
var off uint8
|
||||
var decoded int
|
||||
|
||||
// Decode 2 values from each decoder/loop.
|
||||
const bufoff = 256
|
||||
for {
|
||||
if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
|
||||
break
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 0
|
||||
const stream2 = 1
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
{
|
||||
const stream = 2
|
||||
const stream2 = 3
|
||||
br[stream].fillFast()
|
||||
br[stream2].fillFast()
|
||||
|
||||
val := br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 := br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask]
|
||||
v2 := single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off] = uint8(v.entry >> 8)
|
||||
buf[stream2][off] = uint8(v2.entry >> 8)
|
||||
|
||||
val = br[stream].peekBitsFast(d.actualTableLog)
|
||||
val2 = br[stream2].peekBitsFast(d.actualTableLog)
|
||||
v = single[val&tlMask]
|
||||
v2 = single[val2&tlMask]
|
||||
br[stream].advance(uint8(v.entry))
|
||||
br[stream2].advance(uint8(v2.entry))
|
||||
buf[stream][off+1] = uint8(v.entry >> 8)
|
||||
buf[stream2][off+1] = uint8(v2.entry >> 8)
|
||||
}
|
||||
|
||||
off += 2
|
||||
|
||||
if off == 0 {
|
||||
if bufoff > dstEvery {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 1")
|
||||
}
|
||||
copy(out, buf[0][:])
|
||||
copy(out[dstEvery:], buf[1][:])
|
||||
copy(out[dstEvery*2:], buf[2][:])
|
||||
copy(out[dstEvery*3:], buf[3][:])
|
||||
out = out[bufoff:]
|
||||
decoded += bufoff * 4
|
||||
// There must at least be 3 buffers left.
|
||||
if len(out) < dstEvery*3 {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 2")
|
||||
}
|
||||
}
|
||||
}
|
||||
if off > 0 {
|
||||
ioff := int(off)
|
||||
if len(out) < dstEvery*3+ioff {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 3")
|
||||
}
|
||||
copy(out, buf[0][:off])
|
||||
copy(out[dstEvery:], buf[1][:off])
|
||||
copy(out[dstEvery*2:], buf[2][:off])
|
||||
copy(out[dstEvery*3:], buf[3][:off])
|
||||
decoded += int(off) * 4
|
||||
out = out[off:]
|
||||
}
|
||||
|
||||
// Decode remaining.
|
||||
remainBytes := dstEvery - (decoded / 4)
|
||||
for i := range br {
|
||||
offset := dstEvery * i
|
||||
endsAt := offset + remainBytes
|
||||
if endsAt > len(out) {
|
||||
endsAt = len(out)
|
||||
}
|
||||
br := &br[i]
|
||||
bitsLeft := br.remaining()
|
||||
for bitsLeft > 0 {
|
||||
br.fill()
|
||||
if offset >= endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, errors.New("corruption detected: stream overrun 4")
|
||||
}
|
||||
|
||||
// Read value and increment offset.
|
||||
val := br.peekBitsFast(d.actualTableLog)
|
||||
v := single[val&tlMask].entry
|
||||
nBits := uint8(v)
|
||||
br.advance(nBits)
|
||||
bitsLeft -= uint(nBits)
|
||||
out[offset] = uint8(v >> 8)
|
||||
offset++
|
||||
}
|
||||
if offset != endsAt {
|
||||
d.bufs.Put(buf)
|
||||
return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
|
||||
}
|
||||
decoded += offset - dstEvery*i
|
||||
err = br.close()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
d.bufs.Put(buf)
|
||||
if dstSize != decoded {
|
||||
return nil, errors.New("corruption detected: short output block")
|
||||
}
|
||||
return dst, nil
|
||||
}
|
Reference in New Issue
Block a user