mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-11-04 18:13:42 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			86 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			86 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
package compress
 | 
						|
 | 
						|
import "math"
 | 
						|
 | 
						|
// Estimate returns a normalized compressibility estimate of block b.
 | 
						|
// Values close to zero are likely uncompressible.
 | 
						|
// Values above 0.1 are likely to be compressible.
 | 
						|
// Values above 0.5 are very compressible.
 | 
						|
// Very small lengths will return 0.
 | 
						|
func Estimate(b []byte) float64 {
 | 
						|
	if len(b) < 16 {
 | 
						|
		return 0
 | 
						|
	}
 | 
						|
 | 
						|
	// Correctly predicted order 1
 | 
						|
	hits := 0
 | 
						|
	lastMatch := false
 | 
						|
	var o1 [256]byte
 | 
						|
	var hist [256]int
 | 
						|
	c1 := byte(0)
 | 
						|
	for _, c := range b {
 | 
						|
		if c == o1[c1] {
 | 
						|
			// We only count a hit if there was two correct predictions in a row.
 | 
						|
			if lastMatch {
 | 
						|
				hits++
 | 
						|
			}
 | 
						|
			lastMatch = true
 | 
						|
		} else {
 | 
						|
			lastMatch = false
 | 
						|
		}
 | 
						|
		o1[c1] = c
 | 
						|
		c1 = c
 | 
						|
		hist[c]++
 | 
						|
	}
 | 
						|
 | 
						|
	// Use x^0.6 to give better spread
 | 
						|
	prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)
 | 
						|
 | 
						|
	// Calculate histogram distribution
 | 
						|
	variance := float64(0)
 | 
						|
	avg := float64(len(b)) / 256
 | 
						|
 | 
						|
	for _, v := range hist {
 | 
						|
		Δ := float64(v) - avg
 | 
						|
		variance += Δ * Δ
 | 
						|
	}
 | 
						|
 | 
						|
	stddev := math.Sqrt(float64(variance)) / float64(len(b))
 | 
						|
	exp := math.Sqrt(1 / float64(len(b)))
 | 
						|
 | 
						|
	// Subtract expected stddev
 | 
						|
	stddev -= exp
 | 
						|
	if stddev < 0 {
 | 
						|
		stddev = 0
 | 
						|
	}
 | 
						|
	stddev *= 1 + exp
 | 
						|
 | 
						|
	// Use x^0.4 to give better spread
 | 
						|
	entropy := math.Pow(stddev, 0.4)
 | 
						|
 | 
						|
	// 50/50 weight between prediction and histogram distribution
 | 
						|
	return math.Pow((prediction+entropy)/2, 0.9)
 | 
						|
}
 | 
						|
 | 
						|
// ShannonEntropyBits returns the number of bits minimum required to represent
 | 
						|
// an entropy encoding of the input bytes.
 | 
						|
// https://en.wiktionary.org/wiki/Shannon_entropy
 | 
						|
func ShannonEntropyBits(b []byte) int {
 | 
						|
	if len(b) == 0 {
 | 
						|
		return 0
 | 
						|
	}
 | 
						|
	var hist [256]int
 | 
						|
	for _, c := range b {
 | 
						|
		hist[c]++
 | 
						|
	}
 | 
						|
	shannon := float64(0)
 | 
						|
	invTotal := 1.0 / float64(len(b))
 | 
						|
	for _, v := range hist[:] {
 | 
						|
		if v > 0 {
 | 
						|
			n := float64(v)
 | 
						|
			shannon += math.Ceil(-math.Log2(n*invTotal) * n)
 | 
						|
		}
 | 
						|
	}
 | 
						|
	return int(math.Ceil(shannon))
 | 
						|
}
 |