mirror of
				https://gitea.com/Lydanne/buildx.git
				synced 2025-10-31 16:13:45 +08:00 
			
		
		
		
	
		
			
				
	
	
		
			418 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
			
		
		
	
	
			418 lines
		
	
	
		
			9.8 KiB
		
	
	
	
		
			Go
		
	
	
	
	
	
| /*
 | |
| Copyright 2012 Google Inc. All Rights Reserved.
 | |
| 
 | |
| Licensed under the Apache License, Version 2.0 (the "License");
 | |
| you may not use this file except in compliance with the License.
 | |
| You may obtain a copy of the License at
 | |
| 
 | |
|     http://www.apache.org/licenses/LICENSE-2.0
 | |
| 
 | |
| Unless required by applicable law or agreed to in writing, software
 | |
| distributed under the License is distributed on an "AS IS" BASIS,
 | |
| WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| See the License for the specific language governing permissions and
 | |
| limitations under the License.
 | |
| */
 | |
| 
 | |
| /*
 | |
| Package shlex implements a simple lexer which splits input in to tokens using
 | |
| shell-style rules for quoting and commenting.
 | |
| 
 | |
| The basic use case uses the default ASCII lexer to split a string into sub-strings:
 | |
| 
 | |
|   shlex.Split("one \"two three\" four") -> []string{"one", "two three", "four"}
 | |
| 
 | |
| To process a stream of strings:
 | |
| 
 | |
|   l := NewLexer(os.Stdin)
 | |
|   for ; token, err := l.Next(); err != nil {
 | |
|   	// process token
 | |
|   }
 | |
| 
 | |
| To access the raw token stream (which includes tokens for comments):
 | |
| 
 | |
|   t := NewTokenizer(os.Stdin)
 | |
|   for ; token, err := t.Next(); err != nil {
 | |
| 	// process token
 | |
|   }
 | |
| 
 | |
| */
 | |
| package shlex
 | |
| 
 | |
| import (
 | |
| 	"bufio"
 | |
| 	"fmt"
 | |
| 	"io"
 | |
| 	"strings"
 | |
| )
 | |
| 
 | |
| // TokenType is a top-level token classification: A word, space, comment, unknown.
 | |
| type TokenType int
 | |
| 
 | |
| // runeTokenClass is the type of a UTF-8 character classification: A quote, space, escape.
 | |
| type runeTokenClass int
 | |
| 
 | |
| // the internal state used by the lexer state machine
 | |
| type lexerState int
 | |
| 
 | |
| // Token is a (type, value) pair representing a lexographical token.
 | |
| type Token struct {
 | |
| 	tokenType TokenType
 | |
| 	value     string
 | |
| }
 | |
| 
 | |
| // Equal reports whether tokens a, and b, are equal.
 | |
| // Two tokens are equal if both their types and values are equal. A nil token can
 | |
| // never be equal to another token.
 | |
| func (a *Token) Equal(b *Token) bool {
 | |
| 	if a == nil || b == nil {
 | |
| 		return false
 | |
| 	}
 | |
| 	if a.tokenType != b.tokenType {
 | |
| 		return false
 | |
| 	}
 | |
| 	return a.value == b.value
 | |
| }
 | |
| 
 | |
| // Named classes of UTF-8 runes
 | |
| const (
 | |
| 	spaceRunes            = " \t\r\n"
 | |
| 	escapingQuoteRunes    = `"`
 | |
| 	nonEscapingQuoteRunes = "'"
 | |
| 	escapeRunes           = `\`
 | |
| 	commentRunes          = "#"
 | |
| )
 | |
| 
 | |
| // Classes of rune token
 | |
| const (
 | |
| 	unknownRuneClass runeTokenClass = iota
 | |
| 	spaceRuneClass
 | |
| 	escapingQuoteRuneClass
 | |
| 	nonEscapingQuoteRuneClass
 | |
| 	escapeRuneClass
 | |
| 	commentRuneClass
 | |
| 	eofRuneClass
 | |
| )
 | |
| 
 | |
| // Classes of lexographic token
 | |
| const (
 | |
| 	UnknownToken TokenType = iota
 | |
| 	WordToken
 | |
| 	SpaceToken
 | |
| 	CommentToken
 | |
| )
 | |
| 
 | |
| // Lexer state machine states
 | |
| const (
 | |
| 	startState           lexerState = iota // no runes have been seen
 | |
| 	inWordState                            // processing regular runes in a word
 | |
| 	escapingState                          // we have just consumed an escape rune; the next rune is literal
 | |
| 	escapingQuotedState                    // we have just consumed an escape rune within a quoted string
 | |
| 	quotingEscapingState                   // we are within a quoted string that supports escaping ("...")
 | |
| 	quotingState                           // we are within a string that does not support escaping ('...')
 | |
| 	commentState                           // we are within a comment (everything following an unquoted or unescaped #
 | |
| )
 | |
| 
 | |
| // tokenClassifier is used for classifying rune characters.
 | |
| type tokenClassifier map[rune]runeTokenClass
 | |
| 
 | |
| func (typeMap tokenClassifier) addRuneClass(runes string, tokenType runeTokenClass) {
 | |
| 	for _, runeChar := range runes {
 | |
| 		typeMap[runeChar] = tokenType
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // newDefaultClassifier creates a new classifier for ASCII characters.
 | |
| func newDefaultClassifier() tokenClassifier {
 | |
| 	t := tokenClassifier{}
 | |
| 	t.addRuneClass(spaceRunes, spaceRuneClass)
 | |
| 	t.addRuneClass(escapingQuoteRunes, escapingQuoteRuneClass)
 | |
| 	t.addRuneClass(nonEscapingQuoteRunes, nonEscapingQuoteRuneClass)
 | |
| 	t.addRuneClass(escapeRunes, escapeRuneClass)
 | |
| 	t.addRuneClass(commentRunes, commentRuneClass)
 | |
| 	return t
 | |
| }
 | |
| 
 | |
| // ClassifyRune classifiees a rune
 | |
| func (t tokenClassifier) ClassifyRune(runeVal rune) runeTokenClass {
 | |
| 	return t[runeVal]
 | |
| }
 | |
| 
 | |
| // Lexer turns an input stream into a sequence of tokens. Whitespace and comments are skipped.
 | |
| type Lexer Tokenizer
 | |
| 
 | |
| // NewLexer creates a new lexer from an input stream.
 | |
| func NewLexer(r io.Reader) *Lexer {
 | |
| 
 | |
| 	return (*Lexer)(NewTokenizer(r))
 | |
| }
 | |
| 
 | |
| // Next returns the next word, or an error. If there are no more words,
 | |
| // the error will be io.EOF.
 | |
| func (l *Lexer) Next() (string, error) {
 | |
| 	for {
 | |
| 		token, err := (*Tokenizer)(l).Next()
 | |
| 		if err != nil {
 | |
| 			return "", err
 | |
| 		}
 | |
| 		switch token.tokenType {
 | |
| 		case WordToken:
 | |
| 			return token.value, nil
 | |
| 		case CommentToken:
 | |
| 			// skip comments
 | |
| 		default:
 | |
| 			return "", fmt.Errorf("Unknown token type: %v", token.tokenType)
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Tokenizer turns an input stream into a sequence of typed tokens
 | |
| type Tokenizer struct {
 | |
| 	input      bufio.Reader
 | |
| 	classifier tokenClassifier
 | |
| }
 | |
| 
 | |
| // NewTokenizer creates a new tokenizer from an input stream.
 | |
| func NewTokenizer(r io.Reader) *Tokenizer {
 | |
| 	input := bufio.NewReader(r)
 | |
| 	classifier := newDefaultClassifier()
 | |
| 	return &Tokenizer{
 | |
| 		input:      *input,
 | |
| 		classifier: classifier}
 | |
| }
 | |
| 
 | |
| // scanStream scans the stream for the next token using the internal state machine.
 | |
| // It will panic if it encounters a rune which it does not know how to handle.
 | |
| func (t *Tokenizer) scanStream() (*Token, error) {
 | |
| 	state := startState
 | |
| 	var tokenType TokenType
 | |
| 	var value []rune
 | |
| 	var nextRune rune
 | |
| 	var nextRuneType runeTokenClass
 | |
| 	var err error
 | |
| 
 | |
| 	for {
 | |
| 		nextRune, _, err = t.input.ReadRune()
 | |
| 		nextRuneType = t.classifier.ClassifyRune(nextRune)
 | |
| 
 | |
| 		if err == io.EOF {
 | |
| 			nextRuneType = eofRuneClass
 | |
| 			err = nil
 | |
| 		} else if err != nil {
 | |
| 			return nil, err
 | |
| 		}
 | |
| 
 | |
| 		switch state {
 | |
| 		case startState: // no runes read yet
 | |
| 			{
 | |
| 				switch nextRuneType {
 | |
| 				case eofRuneClass:
 | |
| 					{
 | |
| 						return nil, io.EOF
 | |
| 					}
 | |
| 				case spaceRuneClass:
 | |
| 					{
 | |
| 					}
 | |
| 				case escapingQuoteRuneClass:
 | |
| 					{
 | |
| 						tokenType = WordToken
 | |
| 						state = quotingEscapingState
 | |
| 					}
 | |
| 				case nonEscapingQuoteRuneClass:
 | |
| 					{
 | |
| 						tokenType = WordToken
 | |
| 						state = quotingState
 | |
| 					}
 | |
| 				case escapeRuneClass:
 | |
| 					{
 | |
| 						tokenType = WordToken
 | |
| 						state = escapingState
 | |
| 					}
 | |
| 				case commentRuneClass:
 | |
| 					{
 | |
| 						tokenType = CommentToken
 | |
| 						state = commentState
 | |
| 					}
 | |
| 				default:
 | |
| 					{
 | |
| 						tokenType = WordToken
 | |
| 						value = append(value, nextRune)
 | |
| 						state = inWordState
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		case inWordState: // in a regular word
 | |
| 			{
 | |
| 				switch nextRuneType {
 | |
| 				case eofRuneClass:
 | |
| 					{
 | |
| 						token := &Token{
 | |
| 							tokenType: tokenType,
 | |
| 							value:     string(value)}
 | |
| 						return token, err
 | |
| 					}
 | |
| 				case spaceRuneClass:
 | |
| 					{
 | |
| 						t.input.UnreadRune()
 | |
| 						token := &Token{
 | |
| 							tokenType: tokenType,
 | |
| 							value:     string(value)}
 | |
| 						return token, err
 | |
| 					}
 | |
| 				case escapingQuoteRuneClass:
 | |
| 					{
 | |
| 						state = quotingEscapingState
 | |
| 					}
 | |
| 				case nonEscapingQuoteRuneClass:
 | |
| 					{
 | |
| 						state = quotingState
 | |
| 					}
 | |
| 				case escapeRuneClass:
 | |
| 					{
 | |
| 						state = escapingState
 | |
| 					}
 | |
| 				default:
 | |
| 					{
 | |
| 						value = append(value, nextRune)
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		case escapingState: // the rune after an escape character
 | |
| 			{
 | |
| 				switch nextRuneType {
 | |
| 				case eofRuneClass:
 | |
| 					{
 | |
| 						err = fmt.Errorf("EOF found after escape character")
 | |
| 						token := &Token{
 | |
| 							tokenType: tokenType,
 | |
| 							value:     string(value)}
 | |
| 						return token, err
 | |
| 					}
 | |
| 				default:
 | |
| 					{
 | |
| 						state = inWordState
 | |
| 						value = append(value, nextRune)
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		case escapingQuotedState: // the next rune after an escape character, in double quotes
 | |
| 			{
 | |
| 				switch nextRuneType {
 | |
| 				case eofRuneClass:
 | |
| 					{
 | |
| 						err = fmt.Errorf("EOF found after escape character")
 | |
| 						token := &Token{
 | |
| 							tokenType: tokenType,
 | |
| 							value:     string(value)}
 | |
| 						return token, err
 | |
| 					}
 | |
| 				default:
 | |
| 					{
 | |
| 						state = quotingEscapingState
 | |
| 						value = append(value, nextRune)
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		case quotingEscapingState: // in escaping double quotes
 | |
| 			{
 | |
| 				switch nextRuneType {
 | |
| 				case eofRuneClass:
 | |
| 					{
 | |
| 						err = fmt.Errorf("EOF found when expecting closing quote")
 | |
| 						token := &Token{
 | |
| 							tokenType: tokenType,
 | |
| 							value:     string(value)}
 | |
| 						return token, err
 | |
| 					}
 | |
| 				case escapingQuoteRuneClass:
 | |
| 					{
 | |
| 						state = inWordState
 | |
| 					}
 | |
| 				case escapeRuneClass:
 | |
| 					{
 | |
| 						state = escapingQuotedState
 | |
| 					}
 | |
| 				default:
 | |
| 					{
 | |
| 						value = append(value, nextRune)
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		case quotingState: // in non-escaping single quotes
 | |
| 			{
 | |
| 				switch nextRuneType {
 | |
| 				case eofRuneClass:
 | |
| 					{
 | |
| 						err = fmt.Errorf("EOF found when expecting closing quote")
 | |
| 						token := &Token{
 | |
| 							tokenType: tokenType,
 | |
| 							value:     string(value)}
 | |
| 						return token, err
 | |
| 					}
 | |
| 				case nonEscapingQuoteRuneClass:
 | |
| 					{
 | |
| 						state = inWordState
 | |
| 					}
 | |
| 				default:
 | |
| 					{
 | |
| 						value = append(value, nextRune)
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		case commentState: // in a comment
 | |
| 			{
 | |
| 				switch nextRuneType {
 | |
| 				case eofRuneClass:
 | |
| 					{
 | |
| 						token := &Token{
 | |
| 							tokenType: tokenType,
 | |
| 							value:     string(value)}
 | |
| 						return token, err
 | |
| 					}
 | |
| 				case spaceRuneClass:
 | |
| 					{
 | |
| 						if nextRune == '\n' {
 | |
| 							state = startState
 | |
| 							token := &Token{
 | |
| 								tokenType: tokenType,
 | |
| 								value:     string(value)}
 | |
| 							return token, err
 | |
| 						} else {
 | |
| 							value = append(value, nextRune)
 | |
| 						}
 | |
| 					}
 | |
| 				default:
 | |
| 					{
 | |
| 						value = append(value, nextRune)
 | |
| 					}
 | |
| 				}
 | |
| 			}
 | |
| 		default:
 | |
| 			{
 | |
| 				return nil, fmt.Errorf("Unexpected state: %v", state)
 | |
| 			}
 | |
| 		}
 | |
| 	}
 | |
| }
 | |
| 
 | |
| // Next returns the next token in the stream.
 | |
| func (t *Tokenizer) Next() (*Token, error) {
 | |
| 	return t.scanStream()
 | |
| }
 | |
| 
 | |
| // Split partitions a string into a slice of strings.
 | |
| func Split(s string) ([]string, error) {
 | |
| 	l := NewLexer(strings.NewReader(s))
 | |
| 	subStrings := make([]string, 0)
 | |
| 	for {
 | |
| 		word, err := l.Next()
 | |
| 		if err != nil {
 | |
| 			if err == io.EOF {
 | |
| 				return subStrings, nil
 | |
| 			}
 | |
| 			return subStrings, err
 | |
| 		}
 | |
| 		subStrings = append(subStrings, word)
 | |
| 	}
 | |
| }
 | 
