Sebastiaan van Stijn
2024-06-04 11:33:43 +02:00
parent dbdd3601eb
commit 9358f84668
146 changed files with 2661 additions and 1102 deletions

View File

@ -4,6 +4,7 @@ import (
"bytes"
"fmt"
"regexp"
"slices"
"strings"
"text/scanner"
"unicode"
@ -11,6 +12,11 @@ import (
"github.com/pkg/errors"
)
type EnvGetter interface {
Get(string) (string, bool)
Keys() []string
}
// Lex performs shell word splitting and variable expansion.
//
// Lex takes a string and an array of env variables and
@ -18,12 +24,15 @@ import (
// tokens. Tries to mimic bash shell process.
// It doesn't support all flavors of ${xx:...} formats but new ones can
// be added by adding code to the "special ${} format processing" section
//
// It is not safe to call methods on a Lex instance concurrently.
type Lex struct {
escapeToken rune
RawQuotes bool
RawEscapes bool
SkipProcessQuotes bool
SkipUnsetEnv bool
shellWord shellWord
}
// NewLex creates a new Lex which uses escapeToken to escape quotes.
@ -35,8 +44,9 @@ func NewLex(escapeToken rune) *Lex {
// and replace any env var references in 'word'. It will also
// return variables in word which were not found in the 'env' list,
// which is useful in later linting.
func (s *Lex) ProcessWord(word string, env []string) (string, map[string]struct{}, error) {
result, err := s.process(word, BuildEnvs(env))
// TODO: rename
func (s *Lex) ProcessWord(word string, env EnvGetter) (string, map[string]struct{}, error) {
result, err := s.process(word, env, true)
return result.Result, result.Unmatched, err
}
@ -47,18 +57,11 @@ func (s *Lex) ProcessWord(word string, env []string) (string, map[string]struct{
// this splitting is done **after** the env var substitutions are done.
// Note, each one is trimmed to remove leading and trailing spaces (unless
// they are quoted", but ProcessWord retains spaces between words.
func (s *Lex) ProcessWords(word string, env []string) ([]string, error) {
result, err := s.process(word, BuildEnvs(env))
func (s *Lex) ProcessWords(word string, env EnvGetter) ([]string, error) {
result, err := s.process(word, env, false)
return result.Words, err
}
// ProcessWordWithMap will use the 'env' list of environment variables,
// and replace any env var references in 'word'.
func (s *Lex) ProcessWordWithMap(word string, env map[string]string) (string, error) {
result, err := s.process(word, env)
return result.Result, err
}
type ProcessWordResult struct {
Result string
Words []string
@ -68,8 +71,26 @@ type ProcessWordResult struct {
// ProcessWordWithMatches will use the 'env' list of environment variables,
// replace any env var references in 'word' and return the env that were used.
func (s *Lex) ProcessWordWithMatches(word string, env map[string]string) (ProcessWordResult, error) {
sw := s.init(word, env)
func (s *Lex) ProcessWordWithMatches(word string, env EnvGetter) (ProcessWordResult, error) {
return s.process(word, env, true)
}
func (s *Lex) initWord(word string, env EnvGetter, capture bool) *shellWord {
sw := &s.shellWord
sw.Lex = s
sw.envs = env
sw.capture = capture
sw.rawEscapes = s.RawEscapes
if capture {
sw.matches = nil
sw.nonmatches = nil
}
sw.scanner.Init(strings.NewReader(word))
return sw
}
func (s *Lex) process(word string, env EnvGetter, capture bool) (ProcessWordResult, error) {
sw := s.initWord(word, env, capture)
word, words, err := sw.process(word)
return ProcessWordResult{
Result: word,
@ -79,47 +100,15 @@ func (s *Lex) ProcessWordWithMatches(word string, env map[string]string) (Proces
}, err
}
func (s *Lex) ProcessWordsWithMap(word string, env map[string]string) ([]string, error) {
result, err := s.process(word, env)
return result.Words, err
}
func (s *Lex) init(word string, env map[string]string) *shellWord {
sw := &shellWord{
envs: env,
escapeToken: s.escapeToken,
skipUnsetEnv: s.SkipUnsetEnv,
skipProcessQuotes: s.SkipProcessQuotes,
rawQuotes: s.RawQuotes,
rawEscapes: s.RawEscapes,
matches: make(map[string]struct{}),
nonmatches: make(map[string]struct{}),
}
sw.scanner.Init(strings.NewReader(word))
return sw
}
func (s *Lex) process(word string, env map[string]string) (*ProcessWordResult, error) {
sw := s.init(word, env)
word, words, err := sw.process(word)
return &ProcessWordResult{
Result: word,
Words: words,
Matched: sw.matches,
Unmatched: sw.nonmatches,
}, err
}
type shellWord struct {
scanner scanner.Scanner
envs map[string]string
escapeToken rune
rawQuotes bool
rawEscapes bool
skipUnsetEnv bool
skipProcessQuotes bool
matches map[string]struct{}
nonmatches map[string]struct{}
*Lex
wordsBuffer strings.Builder
scanner scanner.Scanner
envs EnvGetter
rawEscapes bool
capture bool // capture matches and nonmatches
matches map[string]struct{}
nonmatches map[string]struct{}
}
func (sw *shellWord) process(source string) (string, []string, error) {
@ -131,16 +120,16 @@ func (sw *shellWord) process(source string) (string, []string, error) {
}
type wordsStruct struct {
word string
buf *strings.Builder
words []string
inWord bool
}
func (w *wordsStruct) addChar(ch rune) {
if unicode.IsSpace(ch) && w.inWord {
if len(w.word) != 0 {
w.words = append(w.words, w.word)
w.word = ""
if w.buf.Len() != 0 {
w.words = append(w.words, w.buf.String())
w.buf.Reset()
w.inWord = false
}
} else if !unicode.IsSpace(ch) {
@ -149,7 +138,7 @@ func (w *wordsStruct) addChar(ch rune) {
}
func (w *wordsStruct) addRawChar(ch rune) {
w.word += string(ch)
w.buf.WriteRune(ch)
w.inWord = true
}
@ -160,16 +149,16 @@ func (w *wordsStruct) addString(str string) {
}
func (w *wordsStruct) addRawString(str string) {
w.word += str
w.buf.WriteString(str)
w.inWord = true
}
func (w *wordsStruct) getWords() []string {
if len(w.word) > 0 {
w.words = append(w.words, w.word)
if w.buf.Len() > 0 {
w.words = append(w.words, w.buf.String())
// Just in case we're called again by mistake
w.word = ""
w.buf.Reset()
w.inWord = false
}
return w.words
@ -178,13 +167,18 @@ func (w *wordsStruct) getWords() []string {
// Process the word, starting at 'pos', and stop when we get to the
// end of the word or the 'stopChar' character
func (sw *shellWord) processStopOn(stopChar rune, rawEscapes bool) (string, []string, error) {
var result bytes.Buffer
// result buffer can't be currently shared for shellWord as it is called internally
// by processDollar
var result strings.Builder
sw.wordsBuffer.Reset()
var words wordsStruct
words.buf = &sw.wordsBuffer
// no need to initialize all the time
var charFuncMapping = map[rune]func() (string, error){
'$': sw.processDollar,
}
if !sw.skipProcessQuotes {
if !sw.SkipProcessQuotes {
charFuncMapping['\''] = sw.processSingleQuote
charFuncMapping['"'] = sw.processDoubleQuote
}
@ -261,7 +255,7 @@ func (sw *shellWord) processSingleQuote() (string, error) {
var result bytes.Buffer
ch := sw.scanner.Next()
if sw.rawQuotes {
if sw.RawQuotes {
result.WriteRune(ch)
}
@ -271,7 +265,7 @@ func (sw *shellWord) processSingleQuote() (string, error) {
case scanner.EOF:
return "", errors.New("unexpected end of statement while looking for matching single-quote")
case '\'':
if sw.rawQuotes {
if sw.RawQuotes {
result.WriteRune(ch)
}
return result.String(), nil
@ -296,7 +290,7 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
var result bytes.Buffer
ch := sw.scanner.Next()
if sw.rawQuotes {
if sw.RawQuotes {
result.WriteRune(ch)
}
@ -306,7 +300,7 @@ func (sw *shellWord) processDoubleQuote() (string, error) {
return "", errors.New("unexpected end of statement while looking for matching double-quote")
case '"':
ch := sw.scanner.Next()
if sw.rawQuotes {
if sw.RawQuotes {
result.WriteRune(ch)
}
return result.String(), nil
@ -350,7 +344,7 @@ func (sw *shellWord) processDollar() (string, error) {
return "$", nil
}
value, found := sw.getEnv(name)
if !found && sw.skipUnsetEnv {
if !found && sw.SkipUnsetEnv {
return "$" + name, nil
}
return value, nil
@ -373,7 +367,7 @@ func (sw *shellWord) processDollar() (string, error) {
case '}':
// Normal ${xx} case
value, set := sw.getEnv(name)
if !set && sw.skipUnsetEnv {
if !set && sw.SkipUnsetEnv {
return fmt.Sprintf("${%s}", name), nil
}
return value, nil
@ -395,7 +389,7 @@ func (sw *shellWord) processDollar() (string, error) {
// Grab the current value of the variable in question so we
// can use it to determine what to do based on the modifier
value, set := sw.getEnv(name)
if sw.skipUnsetEnv && !set {
if sw.SkipUnsetEnv && !set {
return fmt.Sprintf("${%s%s%s}", name, chs, word), nil
}
@ -465,7 +459,7 @@ func (sw *shellWord) processDollar() (string, error) {
}
value, set := sw.getEnv(name)
if sw.skipUnsetEnv && !set {
if sw.SkipUnsetEnv && !set {
return fmt.Sprintf("${%s/%s/%s}", name, pattern, replacement), nil
}
@ -528,34 +522,51 @@ func isSpecialParam(char rune) bool {
}
func (sw *shellWord) getEnv(name string) (string, bool) {
for key, value := range sw.envs {
if EqualEnvKeys(name, key) {
v, ok := sw.envs.Get(name)
if ok {
if sw.capture {
if sw.matches == nil {
sw.matches = make(map[string]struct{})
}
sw.matches[name] = struct{}{}
return value, true
}
return v, true
}
if sw.capture {
if sw.nonmatches == nil {
sw.nonmatches = make(map[string]struct{})
}
sw.nonmatches[name] = struct{}{}
}
sw.nonmatches[name] = struct{}{}
return "", false
}
func BuildEnvs(env []string) map[string]string {
func EnvsFromSlice(env []string) EnvGetter {
envs := map[string]string{}
keys := make([]string, 0, len(env))
for _, e := range env {
i := strings.Index(e, "=")
if i < 0 {
envs[e] = ""
} else {
k := e[:i]
v := e[i+1:]
// overwrite value if key already exists
envs[k] = v
}
k, v, _ := strings.Cut(e, "=")
keys = append(keys, k)
envs[NormalizeEnvKey(k)] = v
}
return &envGetter{env: envs, keys: keys}
}
return envs
type envGetter struct {
env map[string]string
keys []string
}
var _ EnvGetter = &envGetter{}
func (e *envGetter) Get(key string) (string, bool) {
key = NormalizeEnvKey(key)
v, ok := e.env[key]
return v, ok
}
func (e *envGetter) Keys() []string {
return e.keys
}
// convertShellPatternToRegex converts a shell-like wildcard pattern
@ -647,11 +658,7 @@ func reversePattern(pattern string) string {
func reverseString(str string) string {
out := []rune(str)
outIdx := len(out) - 1
for i := 0; i < outIdx; i++ {
out[i], out[outIdx] = out[outIdx], out[i]
outIdx--
}
slices.Reverse(out)
return string(out)
}