mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-11-04 18:13:42 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			489 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
			
		
		
	
	
			489 lines
		
	
	
		
			12 KiB
		
	
	
	
		
			ArmAsm
		
	
	
	
	
	
// +build !appengine
 | 
						|
// +build gc
 | 
						|
// +build !noasm
 | 
						|
 | 
						|
#include "textflag.h"
 | 
						|
#include "funcdata.h"
 | 
						|
#include "go_asm.h"
 | 
						|
 | 
						|
#define bufoff      256 // see decompress.go, we're using [4][256]byte table
 | 
						|
 | 
						|
// func decompress4x_main_loop_x86(pbr0, pbr1, pbr2, pbr3 *bitReaderShifted,
 | 
						|
//	peekBits uint8, buf *byte, tbl *dEntrySingle) (int, bool)
 | 
						|
TEXT ·decompress4x_8b_loop_x86(SB), NOSPLIT, $8
 | 
						|
#define off             R8
 | 
						|
#define buffer          DI
 | 
						|
#define table           SI
 | 
						|
 | 
						|
#define br_bits_read    R9
 | 
						|
#define br_value        R10
 | 
						|
#define br_offset       R11
 | 
						|
#define peek_bits       R12
 | 
						|
#define exhausted       DX
 | 
						|
 | 
						|
#define br0             R13
 | 
						|
#define br1             R14
 | 
						|
#define br2             R15
 | 
						|
#define br3             BP
 | 
						|
 | 
						|
	MOVQ BP, 0(SP)
 | 
						|
 | 
						|
	XORQ exhausted, exhausted // exhausted = false
 | 
						|
	XORQ off, off             // off = 0
 | 
						|
 | 
						|
	MOVBQZX peekBits+32(FP), peek_bits
 | 
						|
	MOVQ    buf+40(FP), buffer
 | 
						|
	MOVQ    tbl+48(FP), table
 | 
						|
 | 
						|
	MOVQ pbr0+0(FP), br0
 | 
						|
	MOVQ pbr1+8(FP), br1
 | 
						|
	MOVQ pbr2+16(FP), br2
 | 
						|
	MOVQ pbr3+24(FP), br3
 | 
						|
 | 
						|
main_loop:
 | 
						|
 | 
						|
	// const stream = 0
 | 
						|
	// br0.fillFast()
 | 
						|
	MOVBQZX bitReaderShifted_bitsRead(br0), br_bits_read
 | 
						|
	MOVQ    bitReaderShifted_value(br0), br_value
 | 
						|
	MOVQ    bitReaderShifted_off(br0), br_offset
 | 
						|
 | 
						|
	// if b.bitsRead >= 32 {
 | 
						|
	CMPQ br_bits_read, $32
 | 
						|
	JB   skip_fill0
 | 
						|
 | 
						|
	SUBQ $32, br_bits_read // b.bitsRead -= 32
 | 
						|
	SUBQ $4, br_offset     // b.off -= 4
 | 
						|
 | 
						|
	// v := b.in[b.off-4 : b.off]
 | 
						|
	// v = v[:4]
 | 
						|
	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 | 
						|
	MOVQ bitReaderShifted_in(br0), AX
 | 
						|
	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 | 
						|
 | 
						|
	// b.value |= uint64(low) << (b.bitsRead & 63)
 | 
						|
	MOVQ br_bits_read, CX
 | 
						|
	SHLQ CL, AX
 | 
						|
	ORQ  AX, br_value
 | 
						|
 | 
						|
	// exhausted = exhausted || (br0.off < 4)
 | 
						|
	CMPQ  br_offset, $4
 | 
						|
	SETLT DL
 | 
						|
	ORB   DL, DH
 | 
						|
 | 
						|
	// }
 | 
						|
skip_fill0:
 | 
						|
 | 
						|
	// val0 := br0.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v0 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br0.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val1 := br0.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v1 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br0.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off] = uint8(v0.entry >> 8)
 | 
						|
	// buf[stream][off+1] = uint8(v1.entry >> 8)
 | 
						|
	MOVW BX, 0(buffer)(off*1)
 | 
						|
 | 
						|
	// SECOND PART:
 | 
						|
	// val2 := br0.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v2 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br0.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val3 := br0.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v3 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br0.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off+2] = uint8(v2.entry >> 8)
 | 
						|
	// buf[stream][off+3] = uint8(v3.entry >> 8)
 | 
						|
	MOVW BX, 0+2(buffer)(off*1)
 | 
						|
 | 
						|
	// update the bitrader reader structure
 | 
						|
	MOVB br_bits_read, bitReaderShifted_bitsRead(br0)
 | 
						|
	MOVQ br_value, bitReaderShifted_value(br0)
 | 
						|
	MOVQ br_offset, bitReaderShifted_off(br0)
 | 
						|
 | 
						|
	// const stream = 1
 | 
						|
	// br1.fillFast()
 | 
						|
	MOVBQZX bitReaderShifted_bitsRead(br1), br_bits_read
 | 
						|
	MOVQ    bitReaderShifted_value(br1), br_value
 | 
						|
	MOVQ    bitReaderShifted_off(br1), br_offset
 | 
						|
 | 
						|
	// if b.bitsRead >= 32 {
 | 
						|
	CMPQ br_bits_read, $32
 | 
						|
	JB   skip_fill1
 | 
						|
 | 
						|
	SUBQ $32, br_bits_read // b.bitsRead -= 32
 | 
						|
	SUBQ $4, br_offset     // b.off -= 4
 | 
						|
 | 
						|
	// v := b.in[b.off-4 : b.off]
 | 
						|
	// v = v[:4]
 | 
						|
	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 | 
						|
	MOVQ bitReaderShifted_in(br1), AX
 | 
						|
	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 | 
						|
 | 
						|
	// b.value |= uint64(low) << (b.bitsRead & 63)
 | 
						|
	MOVQ br_bits_read, CX
 | 
						|
	SHLQ CL, AX
 | 
						|
	ORQ  AX, br_value
 | 
						|
 | 
						|
	// exhausted = exhausted || (br1.off < 4)
 | 
						|
	CMPQ  br_offset, $4
 | 
						|
	SETLT DL
 | 
						|
	ORB   DL, DH
 | 
						|
 | 
						|
	// }
 | 
						|
skip_fill1:
 | 
						|
 | 
						|
	// val0 := br1.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v0 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br1.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val1 := br1.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v1 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br1.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off] = uint8(v0.entry >> 8)
 | 
						|
	// buf[stream][off+1] = uint8(v1.entry >> 8)
 | 
						|
	MOVW BX, 256(buffer)(off*1)
 | 
						|
 | 
						|
	// SECOND PART:
 | 
						|
	// val2 := br1.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v2 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br1.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val3 := br1.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v3 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br1.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off+2] = uint8(v2.entry >> 8)
 | 
						|
	// buf[stream][off+3] = uint8(v3.entry >> 8)
 | 
						|
	MOVW BX, 256+2(buffer)(off*1)
 | 
						|
 | 
						|
	// update the bitrader reader structure
 | 
						|
	MOVB br_bits_read, bitReaderShifted_bitsRead(br1)
 | 
						|
	MOVQ br_value, bitReaderShifted_value(br1)
 | 
						|
	MOVQ br_offset, bitReaderShifted_off(br1)
 | 
						|
 | 
						|
	// const stream = 2
 | 
						|
	// br2.fillFast()
 | 
						|
	MOVBQZX bitReaderShifted_bitsRead(br2), br_bits_read
 | 
						|
	MOVQ    bitReaderShifted_value(br2), br_value
 | 
						|
	MOVQ    bitReaderShifted_off(br2), br_offset
 | 
						|
 | 
						|
	// if b.bitsRead >= 32 {
 | 
						|
	CMPQ br_bits_read, $32
 | 
						|
	JB   skip_fill2
 | 
						|
 | 
						|
	SUBQ $32, br_bits_read // b.bitsRead -= 32
 | 
						|
	SUBQ $4, br_offset     // b.off -= 4
 | 
						|
 | 
						|
	// v := b.in[b.off-4 : b.off]
 | 
						|
	// v = v[:4]
 | 
						|
	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 | 
						|
	MOVQ bitReaderShifted_in(br2), AX
 | 
						|
	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 | 
						|
 | 
						|
	// b.value |= uint64(low) << (b.bitsRead & 63)
 | 
						|
	MOVQ br_bits_read, CX
 | 
						|
	SHLQ CL, AX
 | 
						|
	ORQ  AX, br_value
 | 
						|
 | 
						|
	// exhausted = exhausted || (br2.off < 4)
 | 
						|
	CMPQ  br_offset, $4
 | 
						|
	SETLT DL
 | 
						|
	ORB   DL, DH
 | 
						|
 | 
						|
	// }
 | 
						|
skip_fill2:
 | 
						|
 | 
						|
	// val0 := br2.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v0 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br2.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val1 := br2.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v1 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br2.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off] = uint8(v0.entry >> 8)
 | 
						|
	// buf[stream][off+1] = uint8(v1.entry >> 8)
 | 
						|
	MOVW BX, 512(buffer)(off*1)
 | 
						|
 | 
						|
	// SECOND PART:
 | 
						|
	// val2 := br2.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v2 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br2.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val3 := br2.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v3 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br2.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off+2] = uint8(v2.entry >> 8)
 | 
						|
	// buf[stream][off+3] = uint8(v3.entry >> 8)
 | 
						|
	MOVW BX, 512+2(buffer)(off*1)
 | 
						|
 | 
						|
	// update the bitrader reader structure
 | 
						|
	MOVB br_bits_read, bitReaderShifted_bitsRead(br2)
 | 
						|
	MOVQ br_value, bitReaderShifted_value(br2)
 | 
						|
	MOVQ br_offset, bitReaderShifted_off(br2)
 | 
						|
 | 
						|
	// const stream = 3
 | 
						|
	// br3.fillFast()
 | 
						|
	MOVBQZX bitReaderShifted_bitsRead(br3), br_bits_read
 | 
						|
	MOVQ    bitReaderShifted_value(br3), br_value
 | 
						|
	MOVQ    bitReaderShifted_off(br3), br_offset
 | 
						|
 | 
						|
	// if b.bitsRead >= 32 {
 | 
						|
	CMPQ br_bits_read, $32
 | 
						|
	JB   skip_fill3
 | 
						|
 | 
						|
	SUBQ $32, br_bits_read // b.bitsRead -= 32
 | 
						|
	SUBQ $4, br_offset     // b.off -= 4
 | 
						|
 | 
						|
	// v := b.in[b.off-4 : b.off]
 | 
						|
	// v = v[:4]
 | 
						|
	// low := (uint32(v[0])) | (uint32(v[1]) << 8) | (uint32(v[2]) << 16) | (uint32(v[3]) << 24)
 | 
						|
	MOVQ bitReaderShifted_in(br3), AX
 | 
						|
	MOVL 0(br_offset)(AX*1), AX       // AX = uint32(b.in[b.off:b.off+4])
 | 
						|
 | 
						|
	// b.value |= uint64(low) << (b.bitsRead & 63)
 | 
						|
	MOVQ br_bits_read, CX
 | 
						|
	SHLQ CL, AX
 | 
						|
	ORQ  AX, br_value
 | 
						|
 | 
						|
	// exhausted = exhausted || (br3.off < 4)
 | 
						|
	CMPQ  br_offset, $4
 | 
						|
	SETLT DL
 | 
						|
	ORB   DL, DH
 | 
						|
 | 
						|
	// }
 | 
						|
skip_fill3:
 | 
						|
 | 
						|
	// val0 := br3.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v0 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br3.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val1 := br3.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v1 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br3.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off] = uint8(v0.entry >> 8)
 | 
						|
	// buf[stream][off+1] = uint8(v1.entry >> 8)
 | 
						|
	MOVW BX, 768(buffer)(off*1)
 | 
						|
 | 
						|
	// SECOND PART:
 | 
						|
	// val2 := br3.peekTopBits(peekBits)
 | 
						|
	MOVQ br_value, AX
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v2 := table[val0&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v0
 | 
						|
 | 
						|
	// br3.advance(uint8(v0.entry))
 | 
						|
	MOVB    AH, BL           // BL = uint8(v0.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CL, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// val3 := br3.peekTopBits(peekBits)
 | 
						|
	MOVQ peek_bits, CX
 | 
						|
	MOVQ br_value, AX
 | 
						|
	SHRQ CL, AX        // AX = (value >> peek_bits) & mask
 | 
						|
 | 
						|
	// v3 := table[val1&mask]
 | 
						|
	MOVW 0(table)(AX*2), AX // AX - v1
 | 
						|
 | 
						|
	// br3.advance(uint8(v1.entry))
 | 
						|
	MOVB    AH, BH           // BH = uint8(v1.entry >> 8)
 | 
						|
	MOVBQZX AL, CX
 | 
						|
	SHLQ    CX, br_value     // value <<= n
 | 
						|
	ADDQ    CX, br_bits_read // bits_read += n
 | 
						|
 | 
						|
	// these two writes get coalesced
 | 
						|
	// buf[stream][off+2] = uint8(v2.entry >> 8)
 | 
						|
	// buf[stream][off+3] = uint8(v3.entry >> 8)
 | 
						|
	MOVW BX, 768+2(buffer)(off*1)
 | 
						|
 | 
						|
	// update the bitrader reader structure
 | 
						|
	MOVB br_bits_read, bitReaderShifted_bitsRead(br3)
 | 
						|
	MOVQ br_value, bitReaderShifted_value(br3)
 | 
						|
	MOVQ br_offset, bitReaderShifted_off(br3)
 | 
						|
 | 
						|
	ADDQ $4, off // off += 2
 | 
						|
 | 
						|
	TESTB DH, DH // any br[i].ofs < 4?
 | 
						|
	JNZ   end
 | 
						|
 | 
						|
	CMPQ off, $bufoff
 | 
						|
	JL   main_loop
 | 
						|
 | 
						|
end:
 | 
						|
	MOVQ 0(SP), BP
 | 
						|
 | 
						|
	MOVB off, ret+56(FP)
 | 
						|
	RET
 | 
						|
 | 
						|
#undef off
 | 
						|
#undef buffer
 | 
						|
#undef table
 | 
						|
 | 
						|
#undef br_bits_read
 | 
						|
#undef br_value
 | 
						|
#undef br_offset
 | 
						|
#undef peek_bits
 | 
						|
#undef exhausted
 | 
						|
 | 
						|
#undef br0
 | 
						|
#undef br1
 | 
						|
#undef br2
 | 
						|
#undef br3
 |