mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-11-04 10:03:42 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			182 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			182 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
//go:build amd64 && !appengine && !noasm && gc
 | 
						|
// +build amd64,!appengine,!noasm,gc
 | 
						|
 | 
						|
// This file contains the specialisation of Decoder.Decompress4X
 | 
						|
// that uses an asm implementation of its main loop.
 | 
						|
package huff0
 | 
						|
 | 
						|
import (
 | 
						|
	"errors"
 | 
						|
	"fmt"
 | 
						|
)
 | 
						|
 | 
						|
// decompress4x_main_loop_x86 is an x86 assembler implementation
 | 
						|
// of Decompress4X when tablelog > 8.
 | 
						|
// go:noescape
 | 
						|
func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 | 
						|
	peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
 | 
						|
 | 
						|
// decompress4x_8b_loop_x86 is an x86 assembler implementation
 | 
						|
// of Decompress4X when tablelog <= 8 which decodes 4 entries
 | 
						|
// per loop.
 | 
						|
// go:noescape
 | 
						|
func decompress4x_8b_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 | 
						|
	peekBits uint8, buf *byte, tbl *dEntrySingle) uint8
 | 
						|
 | 
						|
// fallback8BitSize is the size where using Go version is faster.
 | 
						|
const fallback8BitSize = 800
 | 
						|
 | 
						|
// Decompress4X will decompress a 4X encoded stream.
 | 
						|
// The length of the supplied input must match the end of a block exactly.
 | 
						|
// The *capacity* of the dst slice must match the destination size of
 | 
						|
// the uncompressed data exactly.
 | 
						|
func (d *Decoder) Decompress4X(dst, src []byte) ([]byte, error) {
 | 
						|
	if len(d.dt.single) == 0 {
 | 
						|
		return nil, errors.New("no table loaded")
 | 
						|
	}
 | 
						|
	if len(src) < 6+(4*1) {
 | 
						|
		return nil, errors.New("input too small")
 | 
						|
	}
 | 
						|
 | 
						|
	use8BitTables := d.actualTableLog <= 8
 | 
						|
	if cap(dst) < fallback8BitSize && use8BitTables {
 | 
						|
		return d.decompress4X8bit(dst, src)
 | 
						|
	}
 | 
						|
	var br [4]bitReaderShifted
 | 
						|
	// Decode "jump table"
 | 
						|
	start := 6
 | 
						|
	for i := 0; i < 3; i++ {
 | 
						|
		length := int(src[i*2]) | (int(src[i*2+1]) << 8)
 | 
						|
		if start+length >= len(src) {
 | 
						|
			return nil, errors.New("truncated input (or invalid offset)")
 | 
						|
		}
 | 
						|
		err := br[i].init(src[start : start+length])
 | 
						|
		if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
		start += length
 | 
						|
	}
 | 
						|
	err := br[3].init(src[start:])
 | 
						|
	if err != nil {
 | 
						|
		return nil, err
 | 
						|
	}
 | 
						|
 | 
						|
	// destination, offset to match first output
 | 
						|
	dstSize := cap(dst)
 | 
						|
	dst = dst[:dstSize]
 | 
						|
	out := dst
 | 
						|
	dstEvery := (dstSize + 3) / 4
 | 
						|
 | 
						|
	const tlSize = 1 << tableLogMax
 | 
						|
	const tlMask = tlSize - 1
 | 
						|
	single := d.dt.single[:tlSize]
 | 
						|
 | 
						|
	// Use temp table to avoid bound checks/append penalty.
 | 
						|
	buf := d.buffer()
 | 
						|
	var off uint8
 | 
						|
	var decoded int
 | 
						|
 | 
						|
	const debug = false
 | 
						|
 | 
						|
	// see: bitReaderShifted.peekBitsFast()
 | 
						|
	peekBits := uint8((64 - d.actualTableLog) & 63)
 | 
						|
 | 
						|
	// Decode 2 values from each decoder/loop.
 | 
						|
	const bufoff = 256
 | 
						|
	for {
 | 
						|
		if br[0].off < 4 || br[1].off < 4 || br[2].off < 4 || br[3].off < 4 {
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		if use8BitTables {
 | 
						|
			off = decompress4x_8b_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
 | 
						|
		} else {
 | 
						|
			off = decompress4x_main_loop_x86(&br[0], &br[1], &br[2], &br[3], peekBits, &buf[0][0], &single[0])
 | 
						|
		}
 | 
						|
		if debug {
 | 
						|
			fmt.Print("DEBUG: ")
 | 
						|
			fmt.Printf("off=%d,", off)
 | 
						|
			for i := 0; i < 4; i++ {
 | 
						|
				fmt.Printf(" br[%d]={bitsRead=%d, value=%x, off=%d}",
 | 
						|
					i, br[i].bitsRead, br[i].value, br[i].off)
 | 
						|
			}
 | 
						|
			fmt.Println("")
 | 
						|
		}
 | 
						|
 | 
						|
		if off != 0 {
 | 
						|
			break
 | 
						|
		}
 | 
						|
 | 
						|
		if bufoff > dstEvery {
 | 
						|
			d.bufs.Put(buf)
 | 
						|
			return nil, errors.New("corruption detected: stream overrun 1")
 | 
						|
		}
 | 
						|
		copy(out, buf[0][:])
 | 
						|
		copy(out[dstEvery:], buf[1][:])
 | 
						|
		copy(out[dstEvery*2:], buf[2][:])
 | 
						|
		copy(out[dstEvery*3:], buf[3][:])
 | 
						|
		out = out[bufoff:]
 | 
						|
		decoded += bufoff * 4
 | 
						|
		// There must at least be 3 buffers left.
 | 
						|
		if len(out) < dstEvery*3 {
 | 
						|
			d.bufs.Put(buf)
 | 
						|
			return nil, errors.New("corruption detected: stream overrun 2")
 | 
						|
		}
 | 
						|
	}
 | 
						|
	if off > 0 {
 | 
						|
		ioff := int(off)
 | 
						|
		if len(out) < dstEvery*3+ioff {
 | 
						|
			d.bufs.Put(buf)
 | 
						|
			return nil, errors.New("corruption detected: stream overrun 3")
 | 
						|
		}
 | 
						|
		copy(out, buf[0][:off])
 | 
						|
		copy(out[dstEvery:], buf[1][:off])
 | 
						|
		copy(out[dstEvery*2:], buf[2][:off])
 | 
						|
		copy(out[dstEvery*3:], buf[3][:off])
 | 
						|
		decoded += int(off) * 4
 | 
						|
		out = out[off:]
 | 
						|
	}
 | 
						|
 | 
						|
	// Decode remaining.
 | 
						|
	remainBytes := dstEvery - (decoded / 4)
 | 
						|
	for i := range br {
 | 
						|
		offset := dstEvery * i
 | 
						|
		endsAt := offset + remainBytes
 | 
						|
		if endsAt > len(out) {
 | 
						|
			endsAt = len(out)
 | 
						|
		}
 | 
						|
		br := &br[i]
 | 
						|
		bitsLeft := br.remaining()
 | 
						|
		for bitsLeft > 0 {
 | 
						|
			br.fill()
 | 
						|
			if offset >= endsAt {
 | 
						|
				d.bufs.Put(buf)
 | 
						|
				return nil, errors.New("corruption detected: stream overrun 4")
 | 
						|
			}
 | 
						|
 | 
						|
			// Read value and increment offset.
 | 
						|
			val := br.peekBitsFast(d.actualTableLog)
 | 
						|
			v := single[val&tlMask].entry
 | 
						|
			nBits := uint8(v)
 | 
						|
			br.advance(nBits)
 | 
						|
			bitsLeft -= uint(nBits)
 | 
						|
			out[offset] = uint8(v >> 8)
 | 
						|
			offset++
 | 
						|
		}
 | 
						|
		if offset != endsAt {
 | 
						|
			d.bufs.Put(buf)
 | 
						|
			return nil, fmt.Errorf("corruption detected: short output block %d, end %d != %d", i, offset, endsAt)
 | 
						|
		}
 | 
						|
		decoded += offset - dstEvery*i
 | 
						|
		err = br.close()
 | 
						|
		if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
	}
 | 
						|
	d.bufs.Put(buf)
 | 
						|
	if dstSize != decoded {
 | 
						|
		return nil, errors.New("corruption detected: short output block")
 | 
						|
	}
 | 
						|
	return dst, nil
 | 
						|
}
 |