CrazyMax 7bea00f3dd
vendor: update buildkit to v0.16.0-rc1
Signed-off-by: CrazyMax <1951866+crazy-max@users.noreply.github.com>
2024-09-04 16:58:37 +02:00

680 lines
17 KiB
Go

package shell
import (
"bytes"
"fmt"
"regexp"
"slices"
"strings"
"text/scanner"
"unicode"
"github.com/pkg/errors"
)
type EnvGetter interface {
Get(string) (string, bool)
Keys() []string
}
// Lex performs shell word splitting and variable expansion.
//
// Lex takes a string and an array of env variables and
// process all quotes (" and ') as well as $xxx and ${xxx} env variable
// tokens. Tries to mimic bash shell process.
// It doesn't support all flavors of ${xx:...} formats but new ones can
// be added by adding code to the "special ${} format processing" section
//
// It is not safe to call methods on a Lex instance concurrently.
type Lex struct {
escapeToken rune
RawQuotes bool
RawEscapes bool
SkipProcessQuotes bool
SkipUnsetEnv bool
shellWord shellWord
}
// NewLex creates a new Lex which uses escapeToken to escape quotes.
func NewLex(escapeToken rune) *Lex {
return &Lex{escapeToken: escapeToken}
}
// ProcessWord will use the 'env' list of environment variables,
// and replace any env var references in 'word'. It will also
// return variables in word which were not found in the 'env' list,
// which is useful in later linting.
// TODO: rename
func (s *Lex) ProcessWord(word string, env EnvGetter) (string, map[string]struct{}, error) {
result, err := s.process(word, env, true)
return result.Result, result.Unmatched, err
}
// ProcessWords will use the 'env' list of environment variables,
// and replace any env var references in 'word' then it will also
// return a slice of strings which represents the 'word'
// split up based on spaces - taking into account quotes. Note that
// this splitting is done **after** the env var substitutions are done.
// Note, each one is trimmed to remove leading and trailing spaces (unless
// they are quoted", but ProcessWord retains spaces between words.
func (s *Lex) ProcessWords(word string, env EnvGetter) ([]string, error) {
result, err := s.process(word, env, false)
return result.Words, err
}
type ProcessWordResult struct {
Result string
Words []string
Matched map[string]struct{}
Unmatched map[string]struct{}
}
// ProcessWordWithMatches will use the 'env' list of environment variables,
// replace any env var references in 'word' and return the env that were used.
func (s *Lex) ProcessWordWithMatches(word string, env EnvGetter) (ProcessWordResult, error) {
return s.process(word, env, true)
}
func (s *Lex) initWord(word string, env EnvGetter, capture bool) *shellWord {
sw := &s.shellWord
sw.Lex = s
sw.envs = env
sw.capture = capture
sw.rawEscapes = s.RawEscapes
if capture {
sw.matches = nil
sw.nonmatches = nil
}
sw.scanner.Init(strings.NewReader(word))
return sw
}
func (s *Lex) process(word string, env EnvGetter, capture bool) (ProcessWordResult, error) {
sw := s.initWord(word, env, capture)
word, words, err := sw.process(word)
return ProcessWordResult{
Result: word,
Words: words,
Matched: sw.matches,
Unmatched: sw.nonmatches,
}, err
}
type shellWord struct {
*Lex
wordsBuffer strings.Builder
scanner scanner.Scanner
envs EnvGetter
rawEscapes bool
capture bool // capture matches and nonmatches
matches map[string]struct{}
nonmatches map[string]struct{}
}
func (sw *shellWord) process(source string) (string, []string, error) {
word, words, err := sw.processStopOn(scanner.EOF, sw.rawEscapes)
if err != nil {
err = errors.Wrapf(err, "failed to process %q", source)
}
return word, words, err
}
type wordsStruct struct {
buf *strings.Builder
words []string
inWord bool
}
func (w *wordsStruct) addChar(ch rune) {
if unicode.IsSpace(ch) && w.inWord {
if w.buf.Len() != 0 {
w.words = append(w.words, w.buf.String())
w.buf.Reset()
w.inWord = false
}
} else if !unicode.IsSpace(ch) {
w.addRawChar(ch)
}
}
func (w *wordsStruct) addRawChar(ch rune) {
w.buf.WriteRune(ch)
w.inWord = true
}
func (w *wordsStruct) addString(str string) {
for _, ch := range str {
w.addChar(ch)
}
}
func (w *wordsStruct) addRawString(str string) {
w.buf.WriteString(str)
w.inWord = true
}
func (w *wordsStruct) getWords() []string {
if w.buf.Len() > 0 {
w.words = append(w.words, w.buf.String())
// Just in case we're called again by mistake
w.buf.Reset()
w.inWord = false
}
return w.words
}
// Process the word, starting at 'pos', and stop when we get to the
// end of the word or the 'stopChar' character
func (sw *shellWord) processStopOn(stopChar rune, rawEscapes bool) (string, []string, error) {
// result buffer can't be currently shared for shellWord as it is called internally
// by processDollar
var result strings.Builder
sw.wordsBuffer.Reset()
var words wordsStruct
words.buf = &sw.wordsBuffer
// no need to initialize all the time
var charFuncMapping = map[rune]func() (string, error){
'$': sw.processDollar,
}
if !sw.SkipProcessQuotes {
charFuncMapping['\''] = sw.processSingleQuote
charFuncMapping['"'] = sw.processDoubleQuote
}
// temporarily set sw.rawEscapes if needed
if rawEscapes != sw.rawEscapes {
sw.rawEscapes = rawEscapes
defer func() {
sw.rawEscapes = !rawEscapes
}()
}
for sw.scanner.Peek() != scanner.EOF {
ch := sw.scanner.Peek()
if stopChar != scanner.EOF && ch == stopChar {
sw.scanner.Next()
return result.String(), words.getWords(), nil
}
if fn, ok := charFuncMapping[ch]; ok {
// Call special processing func for certain chars
tmp, err := fn()
if err != nil {
return "", []string{}, err
}
result.WriteString(tmp)
if ch == rune('$') {
words.addString(tmp)
} else {
words.addRawString(tmp)
}
} else {
// Not special, just add it to the result
ch = sw.scanner.Next()
if ch == sw.escapeToken {
if sw.rawEscapes {
words.addRawChar(ch)
result.WriteRune(ch)
}
// '\' (default escape token, but ` allowed) escapes, except end of line
ch = sw.scanner.Next()
if ch == scanner.EOF {
break
}
words.addRawChar(ch)
} else {
words.addChar(ch)
}
result.WriteRune(ch)
}
}
if stopChar != scanner.EOF {
return "", []string{}, errors.Errorf("unexpected end of statement while looking for matching %s", string(stopChar))
}
return result.String(), words.getWords(), nil
}
func (sw *shellWord) processSingleQuote() (string, error) {
// All chars between single quotes are taken as-is
// Note, you can't escape '
//
// From the "sh" man page:
// Single Quotes
// Enclosing characters in single quotes preserves the literal meaning of
// all the characters (except single quotes, making it impossible to put
// single-quotes in a single-quoted string).
var result bytes.Buffer
ch := sw.scanner.Next()
if sw.RawQuotes {
result.WriteRune(ch)
}
for {
ch = sw.scanner.Next()
switch ch {
case scanner.EOF:
return "", errors.New("unexpected end of statement while looking for matching single-quote")
case '\'':
if sw.RawQuotes {
result.WriteRune(ch)
}
return result.String(), nil
}
result.WriteRune(ch)
}
}
func (sw *shellWord) processDoubleQuote() (string, error) {
// All chars up to the next " are taken as-is, even ', except any $ chars
// But you can escape " with a \ (or ` if escape token set accordingly)
//
// From the "sh" man page:
// Double Quotes
// Enclosing characters within double quotes preserves the literal meaning
// of all characters except dollarsign ($), backquote (`), and backslash
// (\). The backslash inside double quotes is historically weird, and
// serves to quote only the following characters:
// $ ` " \ <newline>.
// Otherwise it remains literal.
var result bytes.Buffer
ch := sw.scanner.Next()
if sw.RawQuotes {
result.WriteRune(ch)
}
for {
switch sw.scanner.Peek() {
case scanner.EOF:
return "", errors.New("unexpected end of statement while looking for matching double-quote")
case '"':
ch := sw.scanner.Next()
if sw.RawQuotes {
result.WriteRune(ch)
}
return result.String(), nil
case '$':
value, err := sw.processDollar()
if err != nil {
return "", err
}
result.WriteString(value)
default:
ch := sw.scanner.Next()
if ch == sw.escapeToken {
if sw.rawEscapes {
result.WriteRune(ch)
}
switch sw.scanner.Peek() {
case scanner.EOF:
// Ignore \ at end of word
continue
case '"', '$', sw.escapeToken:
// These chars can be escaped, all other \'s are left as-is
// Note: for now don't do anything special with ` chars.
// Not sure what to do with them anyway since we're not going
// to execute the text in there (not now anyway).
ch = sw.scanner.Next()
}
}
result.WriteRune(ch)
}
}
}
func (sw *shellWord) processDollar() (string, error) {
sw.scanner.Next()
// $xxx case
if sw.scanner.Peek() != '{' {
name := sw.processName()
if name == "" {
return "$", nil
}
value, found := sw.getEnv(name)
if !found && sw.SkipUnsetEnv {
return "$" + name, nil
}
return value, nil
}
sw.scanner.Next()
switch sw.scanner.Peek() {
case scanner.EOF:
return "", errors.New("syntax error: missing '}'")
case '{', '}', ':':
// Invalid ${{xx}, ${:xx}, ${:}. ${} case
return "", errors.New("syntax error: bad substitution")
}
name := sw.processName()
ch := sw.scanner.Next()
chs := string(ch)
nullIsUnset := false
switch ch {
case '}':
// Normal ${xx} case
value, set := sw.getEnv(name)
if !set && sw.SkipUnsetEnv {
return fmt.Sprintf("${%s}", name), nil
}
return value, nil
case ':':
nullIsUnset = true
ch = sw.scanner.Next()
chs += string(ch)
fallthrough
case '+', '-', '?', '#', '%':
rawEscapes := ch == '#' || ch == '%'
if nullIsUnset && rawEscapes {
return "", errors.Errorf("unsupported modifier (%s) in substitution", chs)
}
word, _, err := sw.processStopOn('}', rawEscapes)
if err != nil {
if sw.scanner.Peek() == scanner.EOF {
return "", errors.New("syntax error: missing '}'")
}
return "", err
}
// Grab the current value of the variable in question so we
// can use it to determine what to do based on the modifier
value, set := sw.getEnv(name)
if sw.SkipUnsetEnv && !set {
return fmt.Sprintf("${%s%s%s}", name, chs, word), nil
}
switch ch {
case '-':
if !set || (nullIsUnset && value == "") {
return word, nil
}
return value, nil
case '+':
if !set || (nullIsUnset && value == "") {
return "", nil
}
return word, nil
case '?':
if !set {
message := "is not allowed to be unset"
if word != "" {
message = word
}
return "", errors.Errorf("%s: %s", name, message)
}
if nullIsUnset && value == "" {
message := "is not allowed to be empty"
if word != "" {
message = word
}
return "", errors.Errorf("%s: %s", name, message)
}
return value, nil
case '%', '#':
// %/# matches the shortest pattern expansion, %%/## the longest
greedy := false
if len(word) > 0 && word[0] == byte(ch) {
greedy = true
word = word[1:]
}
if ch == '%' {
return trimSuffix(word, value, greedy)
}
return trimPrefix(word, value, greedy)
default:
return "", errors.Errorf("unsupported modifier (%s) in substitution", chs)
}
case '/':
replaceAll := sw.scanner.Peek() == '/'
if replaceAll {
sw.scanner.Next()
}
pattern, _, err := sw.processStopOn('/', true)
if err != nil {
if sw.scanner.Peek() == scanner.EOF {
return "", errors.New("syntax error: missing '/' in ${}")
}
return "", err
}
replacement, _, err := sw.processStopOn('}', true)
if err != nil {
if sw.scanner.Peek() == scanner.EOF {
return "", errors.New("syntax error: missing '}'")
}
return "", err
}
value, set := sw.getEnv(name)
if sw.SkipUnsetEnv && !set {
return fmt.Sprintf("${%s/%s/%s}", name, pattern, replacement), nil
}
re, err := convertShellPatternToRegex(pattern, true, false)
if err != nil {
return "", errors.Errorf("invalid pattern (%s) in substitution: %s", pattern, err)
}
if replaceAll {
value = re.ReplaceAllString(value, replacement)
} else {
if idx := re.FindStringIndex(value); idx != nil {
value = value[0:idx[0]] + replacement + value[idx[1]:]
}
}
return value, nil
default:
return "", errors.Errorf("unsupported modifier (%s) in substitution", chs)
}
}
func (sw *shellWord) processName() string {
// Read in a name (alphanumeric or _)
// If it starts with a numeric then just return $#
var name bytes.Buffer
for sw.scanner.Peek() != scanner.EOF {
ch := sw.scanner.Peek()
if name.Len() == 0 && unicode.IsDigit(ch) {
for sw.scanner.Peek() != scanner.EOF && unicode.IsDigit(sw.scanner.Peek()) {
// Keep reading until the first non-digit character, or EOF
ch = sw.scanner.Next()
name.WriteRune(ch)
}
return name.String()
}
if name.Len() == 0 && isSpecialParam(ch) {
ch = sw.scanner.Next()
return string(ch)
}
if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' {
break
}
ch = sw.scanner.Next()
name.WriteRune(ch)
}
return name.String()
}
// isSpecialParam checks if the provided character is a special parameters,
// as defined in http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02
func isSpecialParam(char rune) bool {
switch char {
case '@', '*', '#', '?', '-', '$', '!', '0':
// Special parameters
// http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02
return true
}
return false
}
func (sw *shellWord) getEnv(name string) (string, bool) {
v, ok := sw.envs.Get(name)
if ok {
if sw.capture {
if sw.matches == nil {
sw.matches = make(map[string]struct{})
}
sw.matches[name] = struct{}{}
}
return v, true
}
if sw.capture {
if sw.nonmatches == nil {
sw.nonmatches = make(map[string]struct{})
}
sw.nonmatches[name] = struct{}{}
}
return "", false
}
func EnvsFromSlice(env []string) EnvGetter {
envs := map[string]string{}
keys := make([]string, 0, len(env))
for _, e := range env {
k, v, _ := strings.Cut(e, "=")
keys = append(keys, k)
envs[NormalizeEnvKey(k)] = v
}
return &envGetter{env: envs, keys: keys}
}
type envGetter struct {
env map[string]string
keys []string
}
var _ EnvGetter = &envGetter{}
func (e *envGetter) Get(key string) (string, bool) {
key = NormalizeEnvKey(key)
v, ok := e.env[key]
return v, ok
}
func (e *envGetter) Keys() []string {
return e.keys
}
// convertShellPatternToRegex converts a shell-like wildcard pattern
// (? is a single char, * either the shortest or longest (greedy) string)
// to an equivalent regular expression.
//
// Based on
// https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13
// but without the bracket expressions (`[]`)
func convertShellPatternToRegex(pattern string, greedy bool, anchored bool) (*regexp.Regexp, error) {
var s scanner.Scanner
s.Init(strings.NewReader(pattern))
var out strings.Builder
out.Grow(len(pattern) + 4)
// match only at the beginning of the string
if anchored {
out.WriteByte('^')
}
// default: non-greedy wildcards
starPattern := ".*?"
if greedy {
starPattern = ".*"
}
for tok := s.Next(); tok != scanner.EOF; tok = s.Next() {
switch tok {
case '*':
out.WriteString(starPattern)
continue
case '?':
out.WriteByte('.')
continue
case '\\':
// } and / as part of ${} need to be escaped, but the escape isn't part
// of the pattern
if s.Peek() == '}' || s.Peek() == '/' {
continue
}
out.WriteRune('\\')
tok = s.Next()
if tok != '*' && tok != '?' && tok != '\\' {
return nil, errors.Errorf("invalid escape '\\%c'", tok)
}
// regex characters that need to be escaped
// escaping closing is optional, but done for consistency
case '[', ']', '{', '}', '.', '+', '(', ')', '|', '^', '$':
out.WriteByte('\\')
}
out.WriteRune(tok)
}
return regexp.Compile(out.String())
}
func trimPrefix(word, value string, greedy bool) (string, error) {
re, err := convertShellPatternToRegex(word, greedy, true)
if err != nil {
return "", errors.Errorf("invalid pattern (%s) in substitution: %s", word, err)
}
if idx := re.FindStringIndex(value); idx != nil {
value = value[idx[1]:]
}
return value, nil
}
// reverse without avoid reversing escapes, i.e. a\*c -> c\*a
func reversePattern(pattern string) string {
patternRunes := []rune(pattern)
out := make([]rune, len(patternRunes))
lastIdx := len(patternRunes) - 1
for i := 0; i <= lastIdx; {
tok := patternRunes[i]
outIdx := lastIdx - i
if tok == '\\' && i != lastIdx {
out[outIdx-1] = tok
// the pattern is taken from a ${var#pattern}, so the last
// character can't be an escape character
out[outIdx] = patternRunes[i+1]
i += 2
} else {
out[outIdx] = tok
i++
}
}
return string(out)
}
func reverseString(str string) string {
out := []rune(str)
slices.Reverse(out)
return string(out)
}
func trimSuffix(pattern, word string, greedy bool) (string, error) {
// regular expressions can't handle finding the shortest rightmost
// string so we reverse both search space and pattern to convert it
// to a leftmost search in both cases
pattern = reversePattern(pattern)
word = reverseString(word)
str, err := trimPrefix(pattern, word, greedy)
if err != nil {
return "", err
}
return reverseString(str), nil
}