mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-11-04 18:13:42 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			227 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			227 lines
		
	
	
		
			5.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
//go:build amd64 && !appengine && !noasm && gc
 | 
						|
// +build amd64,!appengine,!noasm,gc
 | 
						|
 | 
						|
// This file contains the specialisation of Decoder.Decompress4X
 | 
						|
// and Decoder.Decompress1X that use an asm implementation of thir main loops.
 | 
						|
package huff0
 | 
						|
 | 
						|
import (
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
 | 
						|
	"github.com/klauspost/compress/internal/cpuinfo"
 | 
						|
)
 | 
						|
 | 
						|
// decompress4x_main_loop_x86 is an x86 assembler implementation
 | 
						|
// of Decompress4X when tablelog > 8.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func decompress4x_main_loop_amd64(ctx *decompress4xContext)
 | 
						|
 | 
						|
// decompress4x_8b_loop_x86 is an x86 assembler implementation
 | 
						|
// of Decompress4X when tablelog <= 8 which decodes 4 entries
 | 
						|
// per loop.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func decompress4x_8b_main_loop_amd64(ctx *decompress4xContext)
 | 
						|
 | 
						|
// fallback8BitSize is the size where using Go version is faster.
 | 
						|
const fallback8BitSize = 800
 | 
						|
 | 
						|
type decompress4xContext struct {
 | 
						|
	pbr      *[4]bitReaderShifted
 | 
						|
	peekBits uint8
 | 
						|
	out      *byte
 | 
						|
	dstEvery int
 | 
						|
	tbl      *dEntrySingle
 | 
						|
	decoded  int
 | 
						|
	limit    *byte
 | 
						|
}
 | 
						|
 | 
						|
// Decompress4X will decompress a 4X encoded stream.
 | 
						|
// The length of the supplied input must match the end of a block exactly.
 | 
						|
// The *capacity* of the dst slice must match the destination size of
 | 
						|
// the uncompressed data exactly.
 | 
						|
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
 | 
						|
	if len(d.dt.single) == 0 {
 | 
						|
		return nil, errors.New("no table loaded")
 | 
						|
	}
 | 
						|
	if len(src) < 6+(4*1) {
 | 
						|
		return nil, errors.New("input too small")
 | 
						|
	}
 | 
						|
 | 
						|
	use8BitTables := d.actualTableLog <= 8
 | 
						|
	if cap(dst) < fallback8BitSize && use8BitTables {
 | 
						|
		return d.decompress4X8bit(dst, src)
 | 
						|
	}
 | 
						|
 | 
						|
	var br [4]bitReaderShifted
 | 
						|
	// Decode "jump table"
 | 
						|
	start := 6
 | 
						|
	for i := 0; i < 3; i++ {
 | 
						|
		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
 | 
						|
		if start+length >= len(src) {
 | 
						|
			return nil, errors.New("truncated input (or invalid offset)")
 | 
						|
		}
 | 
						|
		err := br[i].init(src[start : start+length])
 | 
						|
		if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
		start += length
 | 
						|
	}
 | 
						|
	err := br[3].init(src[start:])
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	// destination, offset to match first output
 | 
						|
	dstSize := cap(dst)
 | 
						|
	dst = dst[:dstSize]
 | 
						|
	out := dst
 | 
						|
	dstEvery := (dstSize + 3) / 4
 | 
						|
 | 
						|
	const tlSize = 1 << tableLogMax
 | 
						|
	const tlMask = tlSize - 1
 | 
						|
	single := d.dt.single[:tlSize]
 | 
						|
 | 
						|
	var decoded int
 | 
						|
 | 
						|
	if len(out) > 4*4 && !(br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4) {
 | 
						|
		ctx := decompress4xContext{
 | 
						|
			pbr:      &br,
 | 
						|
			peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
 | 
						|
			out:      &out[0],
 | 
						|
			dstEvery: dstEvery,
 | 
						|
			tbl:      &single[0],
 | 
						|
			limit:    &out[dstEvery-4], // Always stop decoding when first buffer gets here to avoid writing OOB on last.
 | 
						|
		}
 | 
						|
		if use8BitTables {
 | 
						|
			decompress4x_8b_main_loop_amd64(&ctx)
 | 
						|
		} else {
 | 
						|
			decompress4x_main_loop_amd64(&ctx)
 | 
						|
		}
 | 
						|
 | 
						|
		decoded = ctx.decoded
 | 
						|
		out = out[decoded/4:]
 | 
						|
	}
 | 
						|
 | 
						|
	// Decode remaining.
 | 
						|
	remainBytes := dstEvery - (decoded / 4)
 | 
						|
	for i := range br {
 | 
						|
		offset := dstEvery * i
 | 
						|
		endsAt := offset + remainBytes
 | 
						|
		if endsAt > len(out) {
 | 
						|
			endsAt = len(out)
 | 
						|
		}
 | 
						|
		br := &br[i]
 | 
						|
		bitsLeft := br.remaining()
 | 
						|
		for bitsLeft > 0 {
 | 
						|
			br.fill()
 | 
						|
			if offset >= endsAt {
 | 
						|
				return nil, errors.New("corruption detected: stream overrun 4")
 | 
						|
			}
 | 
						|
 | 
						|
			// Read value and increment offset.
 | 
						|
			val := br.peekBitsFast(d.actualTableLog)
 | 
						|
			v := single[val&tlMask].entry
 | 
						|
			nBits := uint8(v)
 | 
						|
			br.advance(nBits)
 | 
						|
			bitsLeft -= uint(nBits)
 | 
						|
			out[offset] = uint8(v >> 8)
 | 
						|
			offset++
 | 
						|
		}
 | 
						|
		if offset != endsAt {
 | 
						|
			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
 | 
						|
		}
 | 
						|
		decoded += offset - dstEvery*i
 | 
						|
		err = br.close()
 | 
						|
		if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if dstSize != decoded {
 | 
						|
		return nil, errors.New("corruption detected: short output block")
 | 
						|
	}
 | 
						|
	return dst, nil
 | 
						|
}
 | 
						|
 | 
						|
// decompress4x_main_loop_x86 is an x86 assembler implementation
 | 
						|
// of Decompress1X when tablelog > 8.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func decompress1x_main_loop_amd64(ctx *decompress1xContext)
 | 
						|
 | 
						|
// decompress4x_main_loop_x86 is an x86 with BMI2 assembler implementation
 | 
						|
// of Decompress1X when tablelog > 8.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func decompress1x_main_loop_bmi2(ctx *decompress1xContext)
 | 
						|
 | 
						|
type decompress1xContext struct {
 | 
						|
	pbr      *bitReaderShifted
 | 
						|
	peekBits uint8
 | 
						|
	out      *byte
 | 
						|
	outCap   int
 | 
						|
	tbl      *dEntrySingle
 | 
						|
	decoded  int
 | 
						|
}
 | 
						|
 | 
						|
// Error reported by asm implementations
 | 
						|
const error_max_decoded_size_exeeded = -1
 | 
						|
 | 
						|
// Decompress1X will decompress a 1X encoded stream.
 | 
						|
// The cap of the output buffer will be the maximum decompressed size.
 | 
						|
// The length of the supplied input must match the end of a block exactly.
 | 
						|
func (d *Decoder) Decompress1X(dst, src []byte) ([]byte, error) {
 | 
						|
	if len(d.dt.single) == 0 {
 | 
						|
		return nil, errors.New("no table loaded")
 | 
						|
	}
 | 
						|
	var br bitReaderShifted
 | 
						|
	err := br.init(src)
 | 
						|
	if err != nil {
 | 
						|
		return dst, err
 | 
						|
	}
 | 
						|
	maxDecodedSize := cap(dst)
 | 
						|
	dst = dst[:maxDecodedSize]
 | 
						|
 | 
						|
	const tlSize = 1 << tableLogMax
 | 
						|
	const tlMask = tlSize - 1
 | 
						|
 | 
						|
	if maxDecodedSize >= 4 {
 | 
						|
		ctx := decompress1xContext{
 | 
						|
			pbr:      &br,
 | 
						|
			out:      &dst[0],
 | 
						|
			outCap:   maxDecodedSize,
 | 
						|
			peekBits: uint8((64 - d.actualTableLog) & 63), // see: bitReaderShifted.peekBitsFast()
 | 
						|
			tbl:      &d.dt.single[0],
 | 
						|
		}
 | 
						|
 | 
						|
		if cpuinfo.HasBMI2() {
 | 
						|
			decompress1x_main_loop_bmi2(&ctx)
 | 
						|
		} else {
 | 
						|
			decompress1x_main_loop_amd64(&ctx)
 | 
						|
		}
 | 
						|
		if ctx.decoded == error_max_decoded_size_exeeded {
 | 
						|
			return nil, ErrMaxDecodedSizeExceeded
 | 
						|
		}
 | 
						|
 | 
						|
		dst = dst[:ctx.decoded]
 | 
						|
	}
 | 
						|
 | 
						|
	// br < 8, so uint8 is fine
 | 
						|
	bitsLeft := uint8(br.off)*8 + 64 - br.bitsRead
 | 
						|
	for bitsLeft > 0 {
 | 
						|
		br.fill()
 | 
						|
		if len(dst) >= maxDecodedSize {
 | 
						|
			br.close()
 | 
						|
			return nil, ErrMaxDecodedSizeExceeded
 | 
						|
		}
 | 
						|
		v := d.dt.single[br.peekBitsFast(d.actualTableLog)&tlMask]
 | 
						|
		nBits := uint8(v.entry)
 | 
						|
		br.advance(nBits)
 | 
						|
		bitsLeft -= nBits
 | 
						|
		dst = append(dst, uint8(v.entry>>8))
 | 
						|
	}
 | 
						|
	return dst, br.close()
 | 
						|
}
 |