package shell import ( "bytes" "fmt" "regexp" "slices" "strings" "text/scanner" "unicode" "github.com/pkg/errors" ) type EnvGetter interface { Get(string) (string, bool) Keys() []string } // Lex performs shell word splitting and variable expansion. // // Lex takes a string and an array of env variables and // process all quotes (" and ') as well as $xxx and ${xxx} env variable // tokens. Tries to mimic bash shell process. // It doesn't support all flavors of ${xx:...} formats but new ones can // be added by adding code to the "special ${} format processing" section // // It is not safe to call methods on a Lex instance concurrently. type Lex struct { escapeToken rune RawQuotes bool RawEscapes bool SkipProcessQuotes bool SkipUnsetEnv bool shellWord shellWord } // NewLex creates a new Lex which uses escapeToken to escape quotes. func NewLex(escapeToken rune) *Lex { return &Lex{escapeToken: escapeToken} } // ProcessWord will use the 'env' list of environment variables, // and replace any env var references in 'word'. It will also // return variables in word which were not found in the 'env' list, // which is useful in later linting. // TODO: rename func (s *Lex) ProcessWord(word string, env EnvGetter) (string, map[string]struct{}, error) { result, err := s.process(word, env, true) return result.Result, result.Unmatched, err } // ProcessWords will use the 'env' list of environment variables, // and replace any env var references in 'word' then it will also // return a slice of strings which represents the 'word' // split up based on spaces - taking into account quotes. Note that // this splitting is done **after** the env var substitutions are done. // Note, each one is trimmed to remove leading and trailing spaces (unless // they are quoted", but ProcessWord retains spaces between words. func (s *Lex) ProcessWords(word string, env EnvGetter) ([]string, error) { result, err := s.process(word, env, false) return result.Words, err } type ProcessWordResult struct { Result string Words []string Matched map[string]struct{} Unmatched map[string]struct{} } // ProcessWordWithMatches will use the 'env' list of environment variables, // replace any env var references in 'word' and return the env that were used. func (s *Lex) ProcessWordWithMatches(word string, env EnvGetter) (ProcessWordResult, error) { return s.process(word, env, true) } func (s *Lex) initWord(word string, env EnvGetter, capture bool) *shellWord { sw := &s.shellWord sw.Lex = s sw.envs = env sw.capture = capture sw.rawEscapes = s.RawEscapes if capture { sw.matches = nil sw.nonmatches = nil } sw.scanner.Init(strings.NewReader(word)) return sw } func (s *Lex) process(word string, env EnvGetter, capture bool) (ProcessWordResult, error) { sw := s.initWord(word, env, capture) word, words, err := sw.process(word) return ProcessWordResult{ Result: word, Words: words, Matched: sw.matches, Unmatched: sw.nonmatches, }, err } type shellWord struct { *Lex wordsBuffer strings.Builder scanner scanner.Scanner envs EnvGetter rawEscapes bool capture bool // capture matches and nonmatches matches map[string]struct{} nonmatches map[string]struct{} } func (sw *shellWord) process(source string) (string, []string, error) { word, words, err := sw.processStopOn(scanner.EOF, sw.rawEscapes) if err != nil { err = errors.Wrapf(err, "failed to process %q", source) } return word, words, err } type wordsStruct struct { buf *strings.Builder words []string inWord bool } func (w *wordsStruct) addChar(ch rune) { if unicode.IsSpace(ch) && w.inWord { if w.buf.Len() != 0 { w.words = append(w.words, w.buf.String()) w.buf.Reset() w.inWord = false } } else if !unicode.IsSpace(ch) { w.addRawChar(ch) } } func (w *wordsStruct) addRawChar(ch rune) { w.buf.WriteRune(ch) w.inWord = true } func (w *wordsStruct) addString(str string) { for _, ch := range str { w.addChar(ch) } } func (w *wordsStruct) addRawString(str string) { w.buf.WriteString(str) w.inWord = true } func (w *wordsStruct) getWords() []string { if w.buf.Len() > 0 { w.words = append(w.words, w.buf.String()) // Just in case we're called again by mistake w.buf.Reset() w.inWord = false } return w.words } // Process the word, starting at 'pos', and stop when we get to the // end of the word or the 'stopChar' character func (sw *shellWord) processStopOn(stopChar rune, rawEscapes bool) (string, []string, error) { // result buffer can't be currently shared for shellWord as it is called internally // by processDollar var result strings.Builder sw.wordsBuffer.Reset() var words wordsStruct words.buf = &sw.wordsBuffer // no need to initialize all the time var charFuncMapping = map[rune]func() (string, error){ '$': sw.processDollar, } if !sw.SkipProcessQuotes { charFuncMapping['\''] = sw.processSingleQuote charFuncMapping['"'] = sw.processDoubleQuote } // temporarily set sw.rawEscapes if needed if rawEscapes != sw.rawEscapes { sw.rawEscapes = rawEscapes defer func() { sw.rawEscapes = !rawEscapes }() } for sw.scanner.Peek() != scanner.EOF { ch := sw.scanner.Peek() if stopChar != scanner.EOF && ch == stopChar { sw.scanner.Next() return result.String(), words.getWords(), nil } if fn, ok := charFuncMapping[ch]; ok { // Call special processing func for certain chars tmp, err := fn() if err != nil { return "", []string{}, err } result.WriteString(tmp) if ch == rune('$') { words.addString(tmp) } else { words.addRawString(tmp) } } else { // Not special, just add it to the result ch = sw.scanner.Next() if ch == sw.escapeToken { if sw.rawEscapes { words.addRawChar(ch) result.WriteRune(ch) } // '\' (default escape token, but ` allowed) escapes, except end of line ch = sw.scanner.Next() if ch == scanner.EOF { break } words.addRawChar(ch) } else { words.addChar(ch) } result.WriteRune(ch) } } if stopChar != scanner.EOF { return "", []string{}, errors.Errorf("unexpected end of statement while looking for matching %s", string(stopChar)) } return result.String(), words.getWords(), nil } func (sw *shellWord) processSingleQuote() (string, error) { // All chars between single quotes are taken as-is // Note, you can't escape ' // // From the "sh" man page: // Single Quotes // Enclosing characters in single quotes preserves the literal meaning of // all the characters (except single quotes, making it impossible to put // single-quotes in a single-quoted string). var result bytes.Buffer ch := sw.scanner.Next() if sw.RawQuotes { result.WriteRune(ch) } for { ch = sw.scanner.Next() switch ch { case scanner.EOF: return "", errors.New("unexpected end of statement while looking for matching single-quote") case '\'': if sw.RawQuotes { result.WriteRune(ch) } return result.String(), nil } result.WriteRune(ch) } } func (sw *shellWord) processDoubleQuote() (string, error) { // All chars up to the next " are taken as-is, even ', except any $ chars // But you can escape " with a \ (or ` if escape token set accordingly) // // From the "sh" man page: // Double Quotes // Enclosing characters within double quotes preserves the literal meaning // of all characters except dollarsign ($), backquote (`), and backslash // (\). The backslash inside double quotes is historically weird, and // serves to quote only the following characters: // $ ` " \ . // Otherwise it remains literal. var result bytes.Buffer ch := sw.scanner.Next() if sw.RawQuotes { result.WriteRune(ch) } for { switch sw.scanner.Peek() { case scanner.EOF: return "", errors.New("unexpected end of statement while looking for matching double-quote") case '"': ch := sw.scanner.Next() if sw.RawQuotes { result.WriteRune(ch) } return result.String(), nil case '$': value, err := sw.processDollar() if err != nil { return "", err } result.WriteString(value) default: ch := sw.scanner.Next() if ch == sw.escapeToken { if sw.rawEscapes { result.WriteRune(ch) } switch sw.scanner.Peek() { case scanner.EOF: // Ignore \ at end of word continue case '"', '$', sw.escapeToken: // These chars can be escaped, all other \'s are left as-is // Note: for now don't do anything special with ` chars. // Not sure what to do with them anyway since we're not going // to execute the text in there (not now anyway). ch = sw.scanner.Next() } } result.WriteRune(ch) } } } func (sw *shellWord) processDollar() (string, error) { sw.scanner.Next() // $xxx case if sw.scanner.Peek() != '{' { name := sw.processName() if name == "" { return "$", nil } value, found := sw.getEnv(name) if !found && sw.SkipUnsetEnv { return "$" + name, nil } return value, nil } sw.scanner.Next() switch sw.scanner.Peek() { case scanner.EOF: return "", errors.New("syntax error: missing '}'") case '{', '}', ':': // Invalid ${{xx}, ${:xx}, ${:}. ${} case return "", errors.New("syntax error: bad substitution") } name := sw.processName() ch := sw.scanner.Next() chs := string(ch) nullIsUnset := false switch ch { case '}': // Normal ${xx} case value, set := sw.getEnv(name) if !set && sw.SkipUnsetEnv { return fmt.Sprintf("${%s}", name), nil } return value, nil case ':': nullIsUnset = true ch = sw.scanner.Next() chs += string(ch) fallthrough case '+', '-', '?', '#', '%': rawEscapes := ch == '#' || ch == '%' word, _, err := sw.processStopOn('}', rawEscapes) if err != nil { if sw.scanner.Peek() == scanner.EOF { return "", errors.New("syntax error: missing '}'") } return "", err } // Grab the current value of the variable in question so we // can use it to determine what to do based on the modifier value, set := sw.getEnv(name) if sw.SkipUnsetEnv && !set { return fmt.Sprintf("${%s%s%s}", name, chs, word), nil } switch ch { case '-': if !set || (nullIsUnset && value == "") { return word, nil } return value, nil case '+': if !set || (nullIsUnset && value == "") { return "", nil } return word, nil case '?': if !set { message := "is not allowed to be unset" if word != "" { message = word } return "", errors.Errorf("%s: %s", name, message) } if nullIsUnset && value == "" { message := "is not allowed to be empty" if word != "" { message = word } return "", errors.Errorf("%s: %s", name, message) } return value, nil case '%', '#': // %/# matches the shortest pattern expansion, %%/## the longest greedy := false if len(word) > 0 && word[0] == byte(ch) { greedy = true word = word[1:] } if ch == '%' { return trimSuffix(word, value, greedy) } return trimPrefix(word, value, greedy) default: return "", errors.Errorf("unsupported modifier (%s) in substitution", chs) } case '/': replaceAll := sw.scanner.Peek() == '/' if replaceAll { sw.scanner.Next() } pattern, _, err := sw.processStopOn('/', true) if err != nil { if sw.scanner.Peek() == scanner.EOF { return "", errors.New("syntax error: missing '/' in ${}") } return "", err } replacement, _, err := sw.processStopOn('}', true) if err != nil { if sw.scanner.Peek() == scanner.EOF { return "", errors.New("syntax error: missing '}'") } return "", err } value, set := sw.getEnv(name) if sw.SkipUnsetEnv && !set { return fmt.Sprintf("${%s/%s/%s}", name, pattern, replacement), nil } re, err := convertShellPatternToRegex(pattern, true, false) if err != nil { return "", errors.Errorf("invalid pattern (%s) in substitution: %s", pattern, err) } if replaceAll { value = re.ReplaceAllString(value, replacement) } else { if idx := re.FindStringIndex(value); idx != nil { value = value[0:idx[0]] + replacement + value[idx[1]:] } } return value, nil default: return "", errors.Errorf("unsupported modifier (%s) in substitution", chs) } } func (sw *shellWord) processName() string { // Read in a name (alphanumeric or _) // If it starts with a numeric then just return $# var name bytes.Buffer for sw.scanner.Peek() != scanner.EOF { ch := sw.scanner.Peek() if name.Len() == 0 && unicode.IsDigit(ch) { for sw.scanner.Peek() != scanner.EOF && unicode.IsDigit(sw.scanner.Peek()) { // Keep reading until the first non-digit character, or EOF ch = sw.scanner.Next() name.WriteRune(ch) } return name.String() } if name.Len() == 0 && isSpecialParam(ch) { ch = sw.scanner.Next() return string(ch) } if !unicode.IsLetter(ch) && !unicode.IsDigit(ch) && ch != '_' { break } ch = sw.scanner.Next() name.WriteRune(ch) } return name.String() } // isSpecialParam checks if the provided character is a special parameters, // as defined in http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02 func isSpecialParam(char rune) bool { switch char { case '@', '*', '#', '?', '-', '$', '!', '0': // Special parameters // http://pubs.opengroup.org/onlinepubs/009695399/utilities/xcu_chap02.html#tag_02_05_02 return true } return false } func (sw *shellWord) getEnv(name string) (string, bool) { v, ok := sw.envs.Get(name) if ok { if sw.capture { if sw.matches == nil { sw.matches = make(map[string]struct{}) } sw.matches[name] = struct{}{} } return v, true } if sw.capture { if sw.nonmatches == nil { sw.nonmatches = make(map[string]struct{}) } sw.nonmatches[name] = struct{}{} } return "", false } func EnvsFromSlice(env []string) EnvGetter { envs := map[string]string{} keys := make([]string, 0, len(env)) for _, e := range env { k, v, _ := strings.Cut(e, "=") keys = append(keys, k) envs[NormalizeEnvKey(k)] = v } return &envGetter{env: envs, keys: keys} } type envGetter struct { env map[string]string keys []string } var _ EnvGetter = &envGetter{} func (e *envGetter) Get(key string) (string, bool) { key = NormalizeEnvKey(key) v, ok := e.env[key] return v, ok } func (e *envGetter) Keys() []string { return e.keys } // convertShellPatternToRegex converts a shell-like wildcard pattern // (? is a single char, * either the shortest or longest (greedy) string) // to an equivalent regular expression. // // Based on // https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_13 // but without the bracket expressions (`[]`) func convertShellPatternToRegex(pattern string, greedy bool, anchored bool) (*regexp.Regexp, error) { var s scanner.Scanner s.Init(strings.NewReader(pattern)) var out strings.Builder out.Grow(len(pattern) + 4) // match only at the beginning of the string if anchored { out.WriteByte('^') } // default: non-greedy wildcards starPattern := ".*?" if greedy { starPattern = ".*" } for tok := s.Next(); tok != scanner.EOF; tok = s.Next() { switch tok { case '*': out.WriteString(starPattern) continue case '?': out.WriteByte('.') continue case '\\': // } and / as part of ${} need to be escaped, but the escape isn't part // of the pattern if s.Peek() == '}' || s.Peek() == '/' { continue } out.WriteRune('\\') tok = s.Next() if tok != '*' && tok != '?' && tok != '\\' { return nil, errors.Errorf("invalid escape '\\%c'", tok) } // regex characters that need to be escaped // escaping closing is optional, but done for consistency case '[', ']', '{', '}', '.', '+', '(', ')', '|', '^', '$': out.WriteByte('\\') } out.WriteRune(tok) } return regexp.Compile(out.String()) } func trimPrefix(word, value string, greedy bool) (string, error) { re, err := convertShellPatternToRegex(word, greedy, true) if err != nil { return "", errors.Errorf("invalid pattern (%s) in substitution: %s", word, err) } if idx := re.FindStringIndex(value); idx != nil { value = value[idx[1]:] } return value, nil } // reverse without avoid reversing escapes, i.e. a\*c -> c\*a func reversePattern(pattern string) string { patternRunes := []rune(pattern) out := make([]rune, len(patternRunes)) lastIdx := len(patternRunes) - 1 for i := 0; i <= lastIdx; { tok := patternRunes[i] outIdx := lastIdx - i if tok == '\\' && i != lastIdx { out[outIdx-1] = tok // the pattern is taken from a ${var#pattern}, so the last // character can't be an escape character out[outIdx] = patternRunes[i+1] i += 2 } else { out[outIdx] = tok i++ } } return string(out) } func reverseString(str string) string { out := []rune(str) slices.Reverse(out) return string(out) } func trimSuffix(pattern, word string, greedy bool) (string, error) { // regular expressions can't handle finding the shortest rightmost // string so we reverse both search space and pattern to convert it // to a leftmost search in both cases pattern = reversePattern(pattern) word = reverseString(word) str, err := trimPrefix(pattern, word, greedy) if err != nil { return "", err } return reverseString(str), nil }