mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-11-04 01:53:42 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			418 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			418 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
/*
 | 
						|
Copyright 2012 Google Inc. All Rights Reserved.
 | 
						|
 | 
						|
Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
you may not use this file except in compliance with the License.
 | 
						|
You may obtain a copy of the License at
 | 
						|
 | 
						|
    http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
 | 
						|
Unless required by applicable law or agreed to in writing, software
 | 
						|
distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
See the License for the specific language governing permissions and
 | 
						|
limitations under the License.
 | 
						|
*/
 | 
						|
 | 
						|
/*
 | 
						|
Package shlex implements a simple lexer which splits input in to tokens using
 | 
						|
shell-style rules for quoting and commenting.
 | 
						|
 | 
						|
The basic use case uses the default ASCII lexer to split a string into sub-strings:
 | 
						|
 | 
						|
  shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
 | 
						|
 | 
						|
To process a stream of strings:
 | 
						|
 | 
						|
  l := NewLexer(os.Stdin)
 | 
						|
  for ; token, err := l.Next(); err != nil {
 | 
						|
  	// process token
 | 
						|
  }
 | 
						|
 | 
						|
To access the raw token stream (which includes tokens for comments):
 | 
						|
 | 
						|
  t := NewTokenizer(os.Stdin)
 | 
						|
  for ; token, err := t.Next(); err != nil {
 | 
						|
	// process token
 | 
						|
  }
 | 
						|
 | 
						|
*/
 | 
						|
package shlex
 | 
						|
 | 
						|
import (
 | 
						|
	"bufio"
 | 
						|
	"fmt"
 | 
						|
	"io"
 | 
						|
	"strings"
 | 
						|
)
 | 
						|
 | 
						|
// TokenType is a top-level token classification: A word, space, comment, unknown.
 | 
						|
type TokenType int
 | 
						|
 | 
						|
// runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
 | 
						|
type runeTokenClass int
 | 
						|
 | 
						|
// the internal state used by the lexer state machine
 | 
						|
type lexerState int
 | 
						|
 | 
						|
// Token is a (type, value) pair representing a lexographical token.
 | 
						|
type Token struct {
 | 
						|
	tokenType TokenType
 | 
						|
	value     string
 | 
						|
}
 | 
						|
 | 
						|
// Equal reports whether tokens a, and b, are equal.
 | 
						|
// Two tokens are equal if both their types and values are equal. A nil token can
 | 
						|
// never be equal to another token.
 | 
						|
func (a *Token) Equal(b *Token) bool {
 | 
						|
	if a == nil || b == nil {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	if a.tokenType != b.tokenType {
 | 
						|
		return false
 | 
						|
	}
 | 
						|
	return a.value == b.value
 | 
						|
}
 | 
						|
 | 
						|
// Named classes of UTF-8 runes
 | 
						|
const (
 | 
						|
	spaceRunes            = " \t\r\n"
 | 
						|
	escapingQuoteRunes    = `"`
 | 
						|
	nonEscapingQuoteRunes = "'"
 | 
						|
	escapeRunes           = `\`
 | 
						|
	commentRunes          = "#"
 | 
						|
)
 | 
						|
 | 
						|
// Classes of rune token
 | 
						|
const (
 | 
						|
	unknownRuneClass runeTokenClass = iota
 | 
						|
	spaceRuneClass
 | 
						|
	escapingQuoteRuneClass
 | 
						|
	nonEscapingQuoteRuneClass
 | 
						|
	escapeRuneClass
 | 
						|
	commentRuneClass
 | 
						|
	eofRuneClass
 | 
						|
)
 | 
						|
 | 
						|
// Classes of lexographic token
 | 
						|
const (
 | 
						|
	UnknownToken TokenType = iota
 | 
						|
	WordToken
 | 
						|
	SpaceToken
 | 
						|
	CommentToken
 | 
						|
)
 | 
						|
 | 
						|
// Lexer state machine states
 | 
						|
const (
 | 
						|
	startState           lexerState = iota // no runes have been seen
 | 
						|
	inWordState                            // processing regular runes in a word
 | 
						|
	escapingState                          // we have just consumed an escape rune; the next rune is literal
 | 
						|
	escapingQuotedState                    // we have just consumed an escape rune within a quoted string
 | 
						|
	quotingEscapingState                   // we are within a quoted string that supports escaping ("...")
 | 
						|
	quotingState                           // we are within a string that does not support escaping ('...')
 | 
						|
	commentState                           // we are within a comment (everything following an unquoted or unescaped #
 | 
						|
)
 | 
						|
 | 
						|
// tokenClassifier is used for classifying rune characters.
 | 
						|
type tokenClassifier map[rune]runeTokenClass
 | 
						|
 | 
						|
func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) {
 | 
						|
	for _, runeChar := range runes {
 | 
						|
		typeMap[runeChar] = tokenType
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// newDefaultClassifier creates a new classifier for ASCII characters.
 | 
						|
func newDefaultClassifier() tokenClassifier {
 | 
						|
	t := tokenClassifier{}
 | 
						|
	t.addRuneClass(spaceRunes, spaceRuneClass)
 | 
						|
	t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
 | 
						|
	t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
 | 
						|
	t.addRuneClass(escapeRunes, escapeRuneClass)
 | 
						|
	t.addRuneClass(commentRunes, commentRuneClass)
 | 
						|
	return t
 | 
						|
}
 | 
						|
 | 
						|
// ClassifyRune classifiees a rune
 | 
						|
func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
 | 
						|
	return t[runeVal]
 | 
						|
}
 | 
						|
 | 
						|
// Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
 | 
						|
type Lexer Tokenizer
 | 
						|
 | 
						|
// NewLexer creates a new lexer from an input stream.
 | 
						|
func NewLexer(r io.Reader) *Lexer {
 | 
						|
 | 
						|
	return (*Lexer)(NewTokenizer(r))
 | 
						|
}
 | 
						|
 | 
						|
// Next returns the next word, or an error. If there are no more words,
 | 
						|
// the error will be io.EOF.
 | 
						|
func (l *Lexer) Next() (string, error) {
 | 
						|
	for {
 | 
						|
		token, err := (*Tokenizer)(l).Next()
 | 
						|
		if err != nil {
 | 
						|
			return "", err
 | 
						|
		}
 | 
						|
		switch token.tokenType {
 | 
						|
		case WordToken:
 | 
						|
			return token.value, nil
 | 
						|
		case CommentToken:
 | 
						|
			// skip comments
 | 
						|
		default:
 | 
						|
			return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Tokenizer turns an input stream into a sequence of typed tokens
 | 
						|
type Tokenizer struct {
 | 
						|
	input      bufio.Reader
 | 
						|
	classifier tokenClassifier
 | 
						|
}
 | 
						|
 | 
						|
// NewTokenizer creates a new tokenizer from an input stream.
 | 
						|
func NewTokenizer(r io.Reader) *Tokenizer {
 | 
						|
	input := bufio.NewReader(r)
 | 
						|
	classifier := newDefaultClassifier()
 | 
						|
	return &Tokenizer{
 | 
						|
		input:      *input,
 | 
						|
		classifier: classifier}
 | 
						|
}
 | 
						|
 | 
						|
// scanStream scans the stream for the next token using the internal state machine.
 | 
						|
// It will panic if it encounters a rune which it does not know how to handle.
 | 
						|
func (t *Tokenizer) scanStream() (*Token, error) {
 | 
						|
	state := startState
 | 
						|
	var tokenType TokenType
 | 
						|
	var value []rune
 | 
						|
	var nextRune rune
 | 
						|
	var nextRuneType runeTokenClass
 | 
						|
	var err error
 | 
						|
 | 
						|
	for {
 | 
						|
		nextRune, _, err = t.input.ReadRune()
 | 
						|
		nextRuneType = t.classifier.ClassifyRune(nextRune)
 | 
						|
 | 
						|
		if err == io.EOF {
 | 
						|
			nextRuneType = eofRuneClass
 | 
						|
			err = nil
 | 
						|
		} else if err != nil {
 | 
						|
			return nil, err
 | 
						|
		}
 | 
						|
 | 
						|
		switch state {
 | 
						|
		case startState: // no runes read yet
 | 
						|
			{
 | 
						|
				switch nextRuneType {
 | 
						|
				case eofRuneClass:
 | 
						|
					{
 | 
						|
						return nil, io.EOF
 | 
						|
					}
 | 
						|
				case spaceRuneClass:
 | 
						|
					{
 | 
						|
					}
 | 
						|
				case escapingQuoteRuneClass:
 | 
						|
					{
 | 
						|
						tokenType = WordToken
 | 
						|
						state = quotingEscapingState
 | 
						|
					}
 | 
						|
				case nonEscapingQuoteRuneClass:
 | 
						|
					{
 | 
						|
						tokenType = WordToken
 | 
						|
						state = quotingState
 | 
						|
					}
 | 
						|
				case escapeRuneClass:
 | 
						|
					{
 | 
						|
						tokenType = WordToken
 | 
						|
						state = escapingState
 | 
						|
					}
 | 
						|
				case commentRuneClass:
 | 
						|
					{
 | 
						|
						tokenType = CommentToken
 | 
						|
						state = commentState
 | 
						|
					}
 | 
						|
				default:
 | 
						|
					{
 | 
						|
						tokenType = WordToken
 | 
						|
						value = append(value, nextRune)
 | 
						|
						state = inWordState
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		case inWordState: // in a regular word
 | 
						|
			{
 | 
						|
				switch nextRuneType {
 | 
						|
				case eofRuneClass:
 | 
						|
					{
 | 
						|
						token := &Token{
 | 
						|
							tokenType: tokenType,
 | 
						|
							value:     string(value)}
 | 
						|
						return token, err
 | 
						|
					}
 | 
						|
				case spaceRuneClass:
 | 
						|
					{
 | 
						|
						t.input.UnreadRune()
 | 
						|
						token := &Token{
 | 
						|
							tokenType: tokenType,
 | 
						|
							value:     string(value)}
 | 
						|
						return token, err
 | 
						|
					}
 | 
						|
				case escapingQuoteRuneClass:
 | 
						|
					{
 | 
						|
						state = quotingEscapingState
 | 
						|
					}
 | 
						|
				case nonEscapingQuoteRuneClass:
 | 
						|
					{
 | 
						|
						state = quotingState
 | 
						|
					}
 | 
						|
				case escapeRuneClass:
 | 
						|
					{
 | 
						|
						state = escapingState
 | 
						|
					}
 | 
						|
				default:
 | 
						|
					{
 | 
						|
						value = append(value, nextRune)
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		case escapingState: // the rune after an escape character
 | 
						|
			{
 | 
						|
				switch nextRuneType {
 | 
						|
				case eofRuneClass:
 | 
						|
					{
 | 
						|
						err = fmt.Errorf("EOF found after escape character")
 | 
						|
						token := &Token{
 | 
						|
							tokenType: tokenType,
 | 
						|
							value:     string(value)}
 | 
						|
						return token, err
 | 
						|
					}
 | 
						|
				default:
 | 
						|
					{
 | 
						|
						state = inWordState
 | 
						|
						value = append(value, nextRune)
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		case escapingQuotedState: // the next rune after an escape character, in double quotes
 | 
						|
			{
 | 
						|
				switch nextRuneType {
 | 
						|
				case eofRuneClass:
 | 
						|
					{
 | 
						|
						err = fmt.Errorf("EOF found after escape character")
 | 
						|
						token := &Token{
 | 
						|
							tokenType: tokenType,
 | 
						|
							value:     string(value)}
 | 
						|
						return token, err
 | 
						|
					}
 | 
						|
				default:
 | 
						|
					{
 | 
						|
						state = quotingEscapingState
 | 
						|
						value = append(value, nextRune)
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		case quotingEscapingState: // in escaping double quotes
 | 
						|
			{
 | 
						|
				switch nextRuneType {
 | 
						|
				case eofRuneClass:
 | 
						|
					{
 | 
						|
						err = fmt.Errorf("EOF found when expecting closing quote")
 | 
						|
						token := &Token{
 | 
						|
							tokenType: tokenType,
 | 
						|
							value:     string(value)}
 | 
						|
						return token, err
 | 
						|
					}
 | 
						|
				case escapingQuoteRuneClass:
 | 
						|
					{
 | 
						|
						state = inWordState
 | 
						|
					}
 | 
						|
				case escapeRuneClass:
 | 
						|
					{
 | 
						|
						state = escapingQuotedState
 | 
						|
					}
 | 
						|
				default:
 | 
						|
					{
 | 
						|
						value = append(value, nextRune)
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		case quotingState: // in non-escaping single quotes
 | 
						|
			{
 | 
						|
				switch nextRuneType {
 | 
						|
				case eofRuneClass:
 | 
						|
					{
 | 
						|
						err = fmt.Errorf("EOF found when expecting closing quote")
 | 
						|
						token := &Token{
 | 
						|
							tokenType: tokenType,
 | 
						|
							value:     string(value)}
 | 
						|
						return token, err
 | 
						|
					}
 | 
						|
				case nonEscapingQuoteRuneClass:
 | 
						|
					{
 | 
						|
						state = inWordState
 | 
						|
					}
 | 
						|
				default:
 | 
						|
					{
 | 
						|
						value = append(value, nextRune)
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		case commentState: // in a comment
 | 
						|
			{
 | 
						|
				switch nextRuneType {
 | 
						|
				case eofRuneClass:
 | 
						|
					{
 | 
						|
						token := &Token{
 | 
						|
							tokenType: tokenType,
 | 
						|
							value:     string(value)}
 | 
						|
						return token, err
 | 
						|
					}
 | 
						|
				case spaceRuneClass:
 | 
						|
					{
 | 
						|
						if nextRune == '\n' {
 | 
						|
							state = startState
 | 
						|
							token := &Token{
 | 
						|
								tokenType: tokenType,
 | 
						|
								value:     string(value)}
 | 
						|
							return token, err
 | 
						|
						} else {
 | 
						|
							value = append(value, nextRune)
 | 
						|
						}
 | 
						|
					}
 | 
						|
				default:
 | 
						|
					{
 | 
						|
						value = append(value, nextRune)
 | 
						|
					}
 | 
						|
				}
 | 
						|
			}
 | 
						|
		default:
 | 
						|
			{
 | 
						|
				return nil, fmt.Errorf("Unexpected state: %v", state)
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
}
 | 
						|
 | 
						|
// Next returns the next token in the stream.
 | 
						|
func (t *Tokenizer) Next() (*Token, error) {
 | 
						|
	return t.scanStream()
 | 
						|
}
 | 
						|
 | 
						|
// Split partitions a string into a slice of strings.
 | 
						|
func Split(s string) ([]string, error) {
 | 
						|
	l := NewLexer(strings.NewReader(s))
 | 
						|
	subStrings := make([]string, 0)
 | 
						|
	for {
 | 
						|
		word, err := l.Next()
 | 
						|
		if err != nil {
 | 
						|
			if err == io.EOF {
 | 
						|
				return subStrings, nil
 | 
						|
			}
 | 
						|
			return subStrings, err
 | 
						|
		}
 | 
						|
		subStrings = append(subStrings, word)
 | 
						|
	}
 | 
						|
}
 |