mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-11-04 18:13:42 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			379 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			379 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
//go:build amd64 && !appengine && !noasm && gc
 | 
						|
// +build amd64,!appengine,!noasm,gc
 | 
						|
 | 
						|
package zstd
 | 
						|
 | 
						|
import (
 | 
						|
	"fmt"
 | 
						|
 | 
						|
	"github.com/klauspost/compress/internal/cpuinfo"
 | 
						|
)
 | 
						|
 | 
						|
type decodeSyncAsmContext struct {
 | 
						|
	llTable     []decSymbol
 | 
						|
	mlTable     []decSymbol
 | 
						|
	ofTable     []decSymbol
 | 
						|
	llState     uint64
 | 
						|
	mlState     uint64
 | 
						|
	ofState     uint64
 | 
						|
	iteration   int
 | 
						|
	litRemain   int
 | 
						|
	out         []byte
 | 
						|
	outPosition int
 | 
						|
	literals    []byte
 | 
						|
	litPosition int
 | 
						|
	history     []byte
 | 
						|
	windowSize  int
 | 
						|
	ll          int // set on error (not for all errors, please refer to _generate/gen.go)
 | 
						|
	ml          int // set on error (not for all errors, please refer to _generate/gen.go)
 | 
						|
	mo          int // set on error (not for all errors, please refer to _generate/gen.go)
 | 
						|
}
 | 
						|
 | 
						|
// sequenceDecs_decodeSync_amd64 implements the main loop of sequenceDecs.decodeSync in x86 asm.
 | 
						|
//
 | 
						|
// Please refer to seqdec_generic.go for the reference implementation.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decodeSync_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | 
						|
 | 
						|
// sequenceDecs_decodeSync_bmi2 implements the main loop of sequenceDecs.decodeSync in x86 asm with BMI2 extensions.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decodeSync_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | 
						|
 | 
						|
// sequenceDecs_decodeSync_safe_amd64 does the same as above, but does not write more than output buffer.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decodeSync_safe_amd64(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | 
						|
 | 
						|
// sequenceDecs_decodeSync_safe_bmi2 does the same as above, but does not write more than output buffer.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decodeSync_safe_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeSyncAsmContext) int
 | 
						|
 | 
						|
// decode sequences from the stream with the provided history but without a dictionary.
 | 
						|
func (s *sequenceDecs) decodeSyncSimple(hist []byte) (bool, error) {
 | 
						|
	if len(s.dict) > 0 {
 | 
						|
		return false, nil
 | 
						|
	}
 | 
						|
	if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSize {
 | 
						|
		return false, nil
 | 
						|
	}
 | 
						|
 | 
						|
	// FIXME: Using unsafe memory copies leads to rare, random crashes
 | 
						|
	// with fuzz testing. It is therefore disabled for now.
 | 
						|
	const useSafe = true
 | 
						|
	/*
 | 
						|
		useSafe := false
 | 
						|
		if s.maxSyncLen == 0 && cap(s.out)-len(s.out) < maxCompressedBlockSizeAlloc {
 | 
						|
			useSafe = true
 | 
						|
		}
 | 
						|
		if s.maxSyncLen > 0 && cap(s.out)-len(s.out)-compressedBlockOverAlloc < int(s.maxSyncLen) {
 | 
						|
			useSafe = true
 | 
						|
		}
 | 
						|
		if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
 | 
						|
			useSafe = true
 | 
						|
		}
 | 
						|
	*/
 | 
						|
 | 
						|
	br := s.br
 | 
						|
 | 
						|
	maxBlockSize := maxCompressedBlockSize
 | 
						|
	if s.windowSize < maxBlockSize {
 | 
						|
		maxBlockSize = s.windowSize
 | 
						|
	}
 | 
						|
 | 
						|
	ctx := decodeSyncAsmContext{
 | 
						|
		llTable:     s.litLengths.fse.dt[:maxTablesize],
 | 
						|
		mlTable:     s.matchLengths.fse.dt[:maxTablesize],
 | 
						|
		ofTable:     s.offsets.fse.dt[:maxTablesize],
 | 
						|
		llState:     uint64(s.litLengths.state.state),
 | 
						|
		mlState:     uint64(s.matchLengths.state.state),
 | 
						|
		ofState:     uint64(s.offsets.state.state),
 | 
						|
		iteration:   s.nSeqs - 1,
 | 
						|
		litRemain:   len(s.literals),
 | 
						|
		out:         s.out,
 | 
						|
		outPosition: len(s.out),
 | 
						|
		literals:    s.literals,
 | 
						|
		windowSize:  s.windowSize,
 | 
						|
		history:     hist,
 | 
						|
	}
 | 
						|
 | 
						|
	s.seqSize = 0
 | 
						|
	startSize := len(s.out)
 | 
						|
 | 
						|
	var errCode int
 | 
						|
	if cpuinfo.HasBMI2() {
 | 
						|
		if useSafe {
 | 
						|
			errCode = sequenceDecs_decodeSync_safe_bmi2(s, br, &ctx)
 | 
						|
		} else {
 | 
						|
			errCode = sequenceDecs_decodeSync_bmi2(s, br, &ctx)
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		if useSafe {
 | 
						|
			errCode = sequenceDecs_decodeSync_safe_amd64(s, br, &ctx)
 | 
						|
		} else {
 | 
						|
			errCode = sequenceDecs_decodeSync_amd64(s, br, &ctx)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	switch errCode {
 | 
						|
	case noError:
 | 
						|
		break
 | 
						|
 | 
						|
	case errorMatchLenOfsMismatch:
 | 
						|
		return true, fmt.Errorf("zero matchoff and matchlen (%d) > 0", ctx.ml)
 | 
						|
 | 
						|
	case errorMatchLenTooBig:
 | 
						|
		return true, fmt.Errorf("match len (%d) bigger than max allowed length", ctx.ml)
 | 
						|
 | 
						|
	case errorMatchOffTooBig:
 | 
						|
		return true, fmt.Errorf("match offset (%d) bigger than current history (%d)",
 | 
						|
			ctx.mo, ctx.outPosition+len(hist)-startSize)
 | 
						|
 | 
						|
	case errorNotEnoughLiterals:
 | 
						|
		return true, fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available",
 | 
						|
			ctx.ll, ctx.litRemain+ctx.ll)
 | 
						|
 | 
						|
	case errorNotEnoughSpace:
 | 
						|
		size := ctx.outPosition + ctx.ll + ctx.ml
 | 
						|
		if debugDecoder {
 | 
						|
			println("msl:", s.maxSyncLen, "cap", cap(s.out), "bef:", startSize, "sz:", size-startSize, "mbs:", maxBlockSize, "outsz:", cap(s.out)-startSize)
 | 
						|
		}
 | 
						|
		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 | 
						|
 | 
						|
	default:
 | 
						|
		return true, fmt.Errorf("sequenceDecs_decode returned erronous code %d", errCode)
 | 
						|
	}
 | 
						|
 | 
						|
	s.seqSize += ctx.litRemain
 | 
						|
	if s.seqSize > maxBlockSize {
 | 
						|
		return true, fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 | 
						|
	}
 | 
						|
	err := br.close()
 | 
						|
	if err != nil {
 | 
						|
		printf("Closing sequences: %v, %+v\n", err, *br)
 | 
						|
		return true, err
 | 
						|
	}
 | 
						|
 | 
						|
	s.literals = s.literals[ctx.litPosition:]
 | 
						|
	t := ctx.outPosition
 | 
						|
	s.out = s.out[:t]
 | 
						|
 | 
						|
	// Add final literals
 | 
						|
	s.out = append(s.out, s.literals...)
 | 
						|
	if debugDecoder {
 | 
						|
		t += len(s.literals)
 | 
						|
		if t != len(s.out) {
 | 
						|
			panic(fmt.Errorf("length mismatch, want %d, got %d", len(s.out), t))
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	return true, nil
 | 
						|
}
 | 
						|
 | 
						|
// --------------------------------------------------------------------------------
 | 
						|
 | 
						|
type decodeAsmContext struct {
 | 
						|
	llTable   []decSymbol
 | 
						|
	mlTable   []decSymbol
 | 
						|
	ofTable   []decSymbol
 | 
						|
	llState   uint64
 | 
						|
	mlState   uint64
 | 
						|
	ofState   uint64
 | 
						|
	iteration int
 | 
						|
	seqs      []seqVals
 | 
						|
	litRemain int
 | 
						|
}
 | 
						|
 | 
						|
const noError = 0
 | 
						|
 | 
						|
// error reported when mo == 0 && ml > 0
 | 
						|
const errorMatchLenOfsMismatch = 1
 | 
						|
 | 
						|
// error reported when ml > maxMatchLen
 | 
						|
const errorMatchLenTooBig = 2
 | 
						|
 | 
						|
// error reported when mo > available history or mo > s.windowSize
 | 
						|
const errorMatchOffTooBig = 3
 | 
						|
 | 
						|
// error reported when the sum of literal lengths exeeceds the literal buffer size
 | 
						|
const errorNotEnoughLiterals = 4
 | 
						|
 | 
						|
// error reported when capacity of `out` is too small
 | 
						|
const errorNotEnoughSpace = 5
 | 
						|
 | 
						|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 | 
						|
//
 | 
						|
// Please refer to seqdec_generic.go for the reference implementation.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decode_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | 
						|
 | 
						|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm.
 | 
						|
//
 | 
						|
// Please refer to seqdec_generic.go for the reference implementation.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decode_56_amd64(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | 
						|
 | 
						|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decode_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | 
						|
 | 
						|
// sequenceDecs_decode implements the main loop of sequenceDecs in x86 asm with BMI2 extensions.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_decode_56_bmi2(s *sequenceDecs, br *bitReader, ctx *decodeAsmContext) int
 | 
						|
 | 
						|
// decode sequences from the stream without the provided history.
 | 
						|
func (s *sequenceDecs) decode(seqs []seqVals) error {
 | 
						|
	br := s.br
 | 
						|
 | 
						|
	maxBlockSize := maxCompressedBlockSize
 | 
						|
	if s.windowSize < maxBlockSize {
 | 
						|
		maxBlockSize = s.windowSize
 | 
						|
	}
 | 
						|
 | 
						|
	ctx := decodeAsmContext{
 | 
						|
		llTable:   s.litLengths.fse.dt[:maxTablesize],
 | 
						|
		mlTable:   s.matchLengths.fse.dt[:maxTablesize],
 | 
						|
		ofTable:   s.offsets.fse.dt[:maxTablesize],
 | 
						|
		llState:   uint64(s.litLengths.state.state),
 | 
						|
		mlState:   uint64(s.matchLengths.state.state),
 | 
						|
		ofState:   uint64(s.offsets.state.state),
 | 
						|
		seqs:      seqs,
 | 
						|
		iteration: len(seqs) - 1,
 | 
						|
		litRemain: len(s.literals),
 | 
						|
	}
 | 
						|
 | 
						|
	s.seqSize = 0
 | 
						|
	lte56bits := s.maxBits+s.offsets.fse.actualTableLog+s.matchLengths.fse.actualTableLog+s.litLengths.fse.actualTableLog <= 56
 | 
						|
	var errCode int
 | 
						|
	if cpuinfo.HasBMI2() {
 | 
						|
		if lte56bits {
 | 
						|
			errCode = sequenceDecs_decode_56_bmi2(s, br, &ctx)
 | 
						|
		} else {
 | 
						|
			errCode = sequenceDecs_decode_bmi2(s, br, &ctx)
 | 
						|
		}
 | 
						|
	} else {
 | 
						|
		if lte56bits {
 | 
						|
			errCode = sequenceDecs_decode_56_amd64(s, br, &ctx)
 | 
						|
		} else {
 | 
						|
			errCode = sequenceDecs_decode_amd64(s, br, &ctx)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if errCode != 0 {
 | 
						|
		i := len(seqs) - ctx.iteration - 1
 | 
						|
		switch errCode {
 | 
						|
		case errorMatchLenOfsMismatch:
 | 
						|
			ml := ctx.seqs[i].ml
 | 
						|
			return fmt.Errorf("zero matchoff and matchlen (%d) > 0", ml)
 | 
						|
 | 
						|
		case errorMatchLenTooBig:
 | 
						|
			ml := ctx.seqs[i].ml
 | 
						|
			return fmt.Errorf("match len (%d) bigger than max allowed length", ml)
 | 
						|
 | 
						|
		case errorNotEnoughLiterals:
 | 
						|
			ll := ctx.seqs[i].ll
 | 
						|
			return fmt.Errorf("unexpected literal count, want %d bytes, but only %d is available", ll, ctx.litRemain+ll)
 | 
						|
		}
 | 
						|
 | 
						|
		return fmt.Errorf("sequenceDecs_decode_amd64 returned erronous code %d", errCode)
 | 
						|
	}
 | 
						|
 | 
						|
	if ctx.litRemain < 0 {
 | 
						|
		return fmt.Errorf("literal count is too big: total available %d, total requested %d",
 | 
						|
			len(s.literals), len(s.literals)-ctx.litRemain)
 | 
						|
	}
 | 
						|
 | 
						|
	s.seqSize += ctx.litRemain
 | 
						|
	if s.seqSize > maxBlockSize {
 | 
						|
		return fmt.Errorf("output bigger than max block size (%d)", maxBlockSize)
 | 
						|
	}
 | 
						|
	err := br.close()
 | 
						|
	if err != nil {
 | 
						|
		printf("Closing sequences: %v, %+v\n", err, *br)
 | 
						|
	}
 | 
						|
	return err
 | 
						|
}
 | 
						|
 | 
						|
// --------------------------------------------------------------------------------
 | 
						|
 | 
						|
type executeAsmContext struct {
 | 
						|
	seqs        []seqVals
 | 
						|
	seqIndex    int
 | 
						|
	out         []byte
 | 
						|
	history     []byte
 | 
						|
	literals    []byte
 | 
						|
	outPosition int
 | 
						|
	litPosition int
 | 
						|
	windowSize  int
 | 
						|
}
 | 
						|
 | 
						|
// sequenceDecs_executeSimple_amd64 implements the main loop of sequenceDecs.executeSimple in x86 asm.
 | 
						|
//
 | 
						|
// Returns false if a match offset is too big.
 | 
						|
//
 | 
						|
// Please refer to seqdec_generic.go for the reference implementation.
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_executeSimple_amd64(ctx *executeAsmContext) bool
 | 
						|
 | 
						|
// Same as above, but with safe memcopies
 | 
						|
//
 | 
						|
//go:noescape
 | 
						|
func sequenceDecs_executeSimple_safe_amd64(ctx *executeAsmContext) bool
 | 
						|
 | 
						|
// executeSimple handles cases when dictionary is not used.
 | 
						|
func (s *sequenceDecs) executeSimple(seqs []seqVals, hist []byte) error {
 | 
						|
	// Ensure we have enough output size...
 | 
						|
	if len(s.out)+s.seqSize+compressedBlockOverAlloc > cap(s.out) {
 | 
						|
		addBytes := s.seqSize + len(s.out) + compressedBlockOverAlloc
 | 
						|
		s.out = append(s.out, make([]byte, addBytes)...)
 | 
						|
		s.out = s.out[:len(s.out)-addBytes]
 | 
						|
	}
 | 
						|
 | 
						|
	if debugDecoder {
 | 
						|
		printf("Execute %d seqs with literals: %d into %d bytes\n", len(seqs), len(s.literals), s.seqSize)
 | 
						|
	}
 | 
						|
 | 
						|
	var t = len(s.out)
 | 
						|
	out := s.out[:t+s.seqSize]
 | 
						|
 | 
						|
	ctx := executeAsmContext{
 | 
						|
		seqs:        seqs,
 | 
						|
		seqIndex:    0,
 | 
						|
		out:         out,
 | 
						|
		history:     hist,
 | 
						|
		outPosition: t,
 | 
						|
		litPosition: 0,
 | 
						|
		literals:    s.literals,
 | 
						|
		windowSize:  s.windowSize,
 | 
						|
	}
 | 
						|
	var ok bool
 | 
						|
	if cap(s.literals) < len(s.literals)+compressedBlockOverAlloc {
 | 
						|
		ok = sequenceDecs_executeSimple_safe_amd64(&ctx)
 | 
						|
	} else {
 | 
						|
		ok = sequenceDecs_executeSimple_amd64(&ctx)
 | 
						|
	}
 | 
						|
	if !ok {
 | 
						|
		return fmt.Errorf("match offset (%d) bigger than current history (%d)",
 | 
						|
			seqs[ctx.seqIndex].mo, ctx.outPosition+len(hist))
 | 
						|
	}
 | 
						|
	s.literals = s.literals[ctx.litPosition:]
 | 
						|
	t = ctx.outPosition
 | 
						|
 | 
						|
	// Add final literals
 | 
						|
	copy(out[t:], s.literals)
 | 
						|
	if debugDecoder {
 | 
						|
		t += len(s.literals)
 | 
						|
		if t != len(out) {
 | 
						|
			panic(fmt.Errorf("length mismatch, want %d, got %d, ss: %d", len(out), t, s.seqSize))
 | 
						|
		}
 | 
						|
	}
 | 
						|
	s.out = out
 | 
						|
 | 
						|
	return nil
 | 
						|
}
 |