mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-11-04 10:03:42 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			196 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			196 lines
		
	
	
		
			5.0 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
// +build !appengine
 | 
						|
// +build gc
 | 
						|
// +build !noasm
 | 
						|
 | 
						|
#include "textflag.h"
 | 
						|
#include "funcdata.h"
 | 
						|
#include "go_asm.h"
 | 
						|
 | 
						|
#ifdef GOAMD64_v4
 | 
						|
#ifndef GOAMD64_v3
 | 
						|
#define GOAMD64_v3
 | 
						|
#endif
 | 
						|
#endif
 | 
						|
 | 
						|
#define bufoff      256     // see decompress.go, we're using [4][256]byte table
 | 
						|
 | 
						|
//func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 | 
						|
//	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
 | 
						|
TEXT ·decompress4x_main_loop_x86(SB), NOSPLIT, $8
 | 
						|
#define off             R8
 | 
						|
#define buffer          DI
 | 
						|
#define table           SI
 | 
						|
 | 
						|
#define br_bits_read    R9
 | 
						|
#define br_value        R10
 | 
						|
#define br_offset       R11
 | 
						|
#define peek_bits       R12
 | 
						|
#define exhausted       DX
 | 
						|
 | 
						|
#define br0             R13
 | 
						|
#define br1             R14
 | 
						|
#define br2             R15
 | 
						|
#define br3             BP
 | 
						|
 | 
						|
    MOVQ    BP, 0(SP)
 | 
						|
 | 
						|
    XORQ    exhausted, exhausted    // exhausted = false
 | 
						|
    XORQ    off, off                // off = 0
 | 
						|
 | 
						|
    MOVBQZX peekBits+32(FP), peek_bits
 | 
						|
    MOVQ    buf+40(FP), buffer
 | 
						|
    MOVQ    tbl+48(FP), table
 | 
						|
 | 
						|
    MOVQ    pbr0+0(FP), br0
 | 
						|
    MOVQ    pbr1+8(FP), br1
 | 
						|
    MOVQ    pbr2+16(FP), br2
 | 
						|
    MOVQ    pbr3+24(FP), br3
 | 
						|
 | 
						|
main_loop:
 | 
						|
{{ define "decode_2_values_x86" }}
 | 
						|
    // const stream = {{ var "id" }}
 | 
						|
    // br{{ var "id"}}.fillFast()
 | 
						|
    MOVBQZX bitReaderShifted_bitsRead(br{{ var "id" }}), br_bits_read
 | 
						|
    MOVQ    bitReaderShifted_value(br{{ var "id" }}), br_value
 | 
						|
    MOVQ    bitReaderShifted_off(br{{ var "id" }}), br_offset
 | 
						|
 | 
						|
    // We must have at least 2 * max tablelog left
 | 
						|
    CMPQ    br_bits_read, $64-22
 | 
						|
    JBE     skip_fill{{ var "id" }}
 | 
						|
 | 
						|
    SUBQ    $32, br_bits_read       // b.bitsRead -= 32
 | 
						|
    SUBQ    $4, br_offset           // b.off -= 4
 | 
						|
 | 
						|
	// v := b.in[b.off-4 : b.off]
 | 
						|
	// v = v[:4]
 | 
						|
	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 | 
						|
    MOVQ    bitReaderShifted_in(br{{ var "id" }}), AX
 | 
						|
 | 
						|
	// b.value |= uint64(low) << (b.bitsRead & 63)
 | 
						|
#ifdef GOAMD64_v3
 | 
						|
    SHLXQ   br_bits_read, 0(br_offset)(AX*1), AX // AX = uint32(b.in[b.off:b.off+4]) << (b.bitsRead & 63)
 | 
						|
#else
 | 
						|
    MOVL    0(br_offset)(AX*1), AX  // AX = uint32(b.in[b.off:b.off+4])
 | 
						|
    MOVQ    br_bits_read, CX
 | 
						|
    SHLQ    CL, AX
 | 
						|
#endif
 | 
						|
 | 
						|
    ORQ     AX, br_value
 | 
						|
 | 
						|
    // exhausted = exhausted || (br{{ var "id"}}.off < 4)
 | 
						|
    CMPQ    br_offset, $4
 | 
						|
    SETLT   DL
 | 
						|
    ORB     DL, DH
 | 
						|
    // }
 | 
						|
skip_fill{{ var "id" }}:
 | 
						|
 | 
						|
    // val0 := br{{ var "id"}}.peekTopBits(peekBits)
 | 
						|
#ifdef GOAMD64_v3
 | 
						|
    SHRXQ   peek_bits, br_value, AX // AX = (value >> peek_bits) & mask
 | 
						|
#else
 | 
						|
    MOVQ    br_value, AX
 | 
						|
    MOVQ    peek_bits, CX
 | 
						|
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
 | 
						|
#endif
 | 
						|
 | 
						|
    // v0 := table[val0&mask]
 | 
						|
    MOVW    0(table)(AX*2), AX      // AX - v0
 | 
						|
 | 
						|
    // br{{ var "id"}}.advance(uint8(v0.entry))
 | 
						|
    MOVB    AH, BL                  // BL = uint8(v0.entry >> 8)
 | 
						|
 | 
						|
#ifdef GOAMD64_v3
 | 
						|
    MOVBQZX AL, CX
 | 
						|
    SHLXQ   AX, br_value, br_value // value <<= n
 | 
						|
#else
 | 
						|
    MOVBQZX AL, CX
 | 
						|
    SHLQ    CL, br_value            // value <<= n
 | 
						|
#endif
 | 
						|
 | 
						|
    ADDQ    CX, br_bits_read        // bits_read += n
 | 
						|
 | 
						|
 | 
						|
#ifdef GOAMD64_v3
 | 
						|
    SHRXQ    peek_bits, br_value, AX  // AX = (value >> peek_bits) & mask
 | 
						|
#else
 | 
						|
    // val1 := br{{ var "id"}}.peekTopBits(peekBits)
 | 
						|
    MOVQ    peek_bits, CX
 | 
						|
    MOVQ    br_value, AX
 | 
						|
    SHRQ    CL, AX                  // AX = (value >> peek_bits) & mask
 | 
						|
#endif
 | 
						|
 | 
						|
    // v1 := table[val1&mask]
 | 
						|
    MOVW    0(table)(AX*2), AX      // AX - v1
 | 
						|
 | 
						|
    // br{{ var "id"}}.advance(uint8(v1.entry))
 | 
						|
    MOVB    AH, BH                  // BH = uint8(v1.entry >> 8)
 | 
						|
 | 
						|
#ifdef GOAMD64_v3
 | 
						|
    MOVBQZX AL, CX
 | 
						|
    SHLXQ   AX, br_value, br_value // value <<= n
 | 
						|
#else
 | 
						|
    MOVBQZX AL, CX
 | 
						|
    SHLQ    CL, br_value            // value <<= n
 | 
						|
#endif
 | 
						|
 | 
						|
    ADDQ    CX, br_bits_read        // bits_read += n
 | 
						|
 | 
						|
 | 
						|
    // these two writes get coalesced
 | 
						|
    // buf[stream][off] = uint8(v0.entry >> 8)
 | 
						|
    // buf[stream][off+1] = uint8(v1.entry >> 8)
 | 
						|
    MOVW    BX, {{ var "bufofs" }}(buffer)(off*1)
 | 
						|
 | 
						|
    // update the bitrader reader structure
 | 
						|
    MOVB    br_bits_read, bitReaderShifted_bitsRead(br{{ var "id" }})
 | 
						|
    MOVQ    br_value, bitReaderShifted_value(br{{ var "id" }})
 | 
						|
    MOVQ    br_offset, bitReaderShifted_off(br{{ var "id" }})
 | 
						|
{{ end }}
 | 
						|
 | 
						|
    {{ set "id" "0" }}
 | 
						|
    {{ set "ofs" "0" }}
 | 
						|
    {{ set "bufofs" "0" }} {{/* id * bufoff */}}
 | 
						|
    {{ template "decode_2_values_x86" . }}
 | 
						|
 | 
						|
    {{ set "id" "1" }}
 | 
						|
    {{ set "ofs" "8" }}
 | 
						|
    {{ set "bufofs" "256" }}
 | 
						|
    {{ template "decode_2_values_x86" . }}
 | 
						|
 | 
						|
    {{ set "id" "2" }}
 | 
						|
    {{ set "ofs" "16" }}
 | 
						|
    {{ set "bufofs" "512" }}
 | 
						|
    {{ template "decode_2_values_x86" . }}
 | 
						|
 | 
						|
    {{ set "id" "3" }}
 | 
						|
    {{ set "ofs" "24" }}
 | 
						|
    {{ set "bufofs" "768" }}
 | 
						|
    {{ template "decode_2_values_x86" . }}
 | 
						|
 | 
						|
    ADDQ    $2, off     // off += 2
 | 
						|
 | 
						|
    TESTB   DH, DH      // any br[i].ofs < 4?
 | 
						|
    JNZ     end
 | 
						|
 | 
						|
    CMPQ    off, $bufoff
 | 
						|
    JL      main_loop
 | 
						|
end:
 | 
						|
    MOVQ    0(SP), BP
 | 
						|
 | 
						|
    MOVB    off, ret+56(FP)
 | 
						|
    RET
 | 
						|
#undef  off
 | 
						|
#undef  buffer
 | 
						|
#undef  table
 | 
						|
 | 
						|
#undef  br_bits_read
 | 
						|
#undef  br_value
 | 
						|
#undef  br_offset
 | 
						|
#undef  peek_bits
 | 
						|
#undef  exhausted
 | 
						|
 | 
						|
#undef  br0
 | 
						|
#undef  br1
 | 
						|
#undef  br2
 | 
						|
#undef  br3
 |