vendor: update buildkit v0.14-dev version 549891b

Brings in formatter for lint requests.

Signed-off-by: Tonis Tiigi <tonistiigi@gmail.com>
This commit is contained in:
Tonis Tiigi
2024-04-11 07:49:31 -07:00
parent 3e90cc4b84
commit da3435ed3a
107 changed files with 6830 additions and 2446 deletions

View File

@ -0,0 +1,171 @@
package parser
import (
"bufio"
"bytes"
"encoding/json"
"fmt"
"regexp"
"strings"
"github.com/pkg/errors"
)
const (
keySyntax = "syntax"
keyEscape = "escape"
)
var validDirectives = map[string]struct{}{
keySyntax: {},
keyEscape: {},
}
type Directive struct {
Name string
Value string
Location []Range
}
// DirectiveParser is a parser for Dockerfile directives that enforces the
// quirks of the directive parser.
type DirectiveParser struct {
line int
regexp *regexp.Regexp
seen map[string]struct{}
done bool
}
func (d *DirectiveParser) setComment(comment string) {
d.regexp = regexp.MustCompile(fmt.Sprintf(`^%s\s*([a-zA-Z][a-zA-Z0-9]*)\s*=\s*(.+?)\s*$`, comment))
}
func (d *DirectiveParser) ParseLine(line []byte) (*Directive, error) {
d.line++
if d.done {
return nil, nil
}
if d.regexp == nil {
d.setComment("#")
}
match := d.regexp.FindSubmatch(line)
if len(match) == 0 {
d.done = true
return nil, nil
}
k := strings.ToLower(string(match[1]))
if _, ok := validDirectives[k]; !ok {
d.done = true
return nil, nil
}
if d.seen == nil {
d.seen = map[string]struct{}{}
}
if _, ok := d.seen[k]; ok {
return nil, errors.Errorf("only one %s parser directive can be used", k)
}
d.seen[k] = struct{}{}
v := string(match[2])
directive := Directive{
Name: k,
Value: v,
Location: []Range{{
Start: Position{Line: d.line},
End: Position{Line: d.line},
}},
}
return &directive, nil
}
func (d *DirectiveParser) ParseAll(data []byte) ([]*Directive, error) {
scanner := bufio.NewScanner(bytes.NewReader(data))
var directives []*Directive
for scanner.Scan() {
if d.done {
break
}
d, err := d.ParseLine(scanner.Bytes())
if err != nil {
return directives, err
}
if d != nil {
directives = append(directives, d)
}
}
return directives, nil
}
// DetectSyntax returns the syntax of provided input.
//
// The traditional dockerfile directives '# syntax = ...' are used by default,
// however, the function will also fallback to c-style directives '// syntax = ...'
// and json-encoded directives '{ "syntax": "..." }'. Finally, starting lines
// with '#!' are treated as shebangs and ignored.
//
// This allows for a flexible range of input formats, and appropriate syntax
// selection.
func DetectSyntax(dt []byte) (string, string, []Range, bool) {
dt, hadShebang, err := discardShebang(dt)
if err != nil {
return "", "", nil, false
}
line := 0
if hadShebang {
line++
}
// use default directive parser, and search for #syntax=
directiveParser := DirectiveParser{line: line}
if syntax, cmdline, loc, ok := detectSyntaxFromParser(dt, directiveParser); ok {
return syntax, cmdline, loc, true
}
// use directive with different comment prefix, and search for //syntax=
directiveParser = DirectiveParser{line: line}
directiveParser.setComment("//")
if syntax, cmdline, loc, ok := detectSyntaxFromParser(dt, directiveParser); ok {
return syntax, cmdline, loc, true
}
// search for possible json directives
var directive struct {
Syntax string `json:"syntax"`
}
if err := json.Unmarshal(dt, &directive); err == nil {
if directive.Syntax != "" {
loc := []Range{{
Start: Position{Line: line},
End: Position{Line: line},
}}
return directive.Syntax, directive.Syntax, loc, true
}
}
return "", "", nil, false
}
func detectSyntaxFromParser(dt []byte, parser DirectiveParser) (string, string, []Range, bool) {
directives, _ := parser.ParseAll(dt)
for _, d := range directives {
// check for syntax directive before erroring out, since the error
// might have occurred *after* the syntax directive
if d.Name == keySyntax {
p, _, _ := strings.Cut(d.Value, " ")
return p, d.Value, d.Location, true
}
}
return "", "", nil, false
}
func discardShebang(dt []byte) ([]byte, bool, error) {
line, rest, _ := bytes.Cut(dt, []byte("\n"))
if bytes.HasPrefix(line, []byte("#!")) {
return rest, true, nil
}
return dt, false, nil
}

View File

@ -0,0 +1,59 @@
package parser
import (
"github.com/moby/buildkit/util/stack"
"github.com/pkg/errors"
)
// ErrorLocation gives a location in source code that caused the error
type ErrorLocation struct {
Locations [][]Range
error
}
// Unwrap unwraps to the next error
func (e *ErrorLocation) Unwrap() error {
return e.error
}
// Range is a code section between two positions
type Range struct {
Start Position
End Position
}
// Position is a point in source code
type Position struct {
Line int
Character int
}
func withLocation(err error, start, end int) error {
return WithLocation(err, toRanges(start, end))
}
// WithLocation extends an error with a source code location
func WithLocation(err error, location []Range) error {
if err == nil {
return nil
}
var el *ErrorLocation
if errors.As(err, &el) {
el.Locations = append(el.Locations, location)
return err
}
return stack.Enable(&ErrorLocation{
error: err,
Locations: [][]Range{location},
})
}
func toRanges(start, end int) (r []Range) {
if end <= start {
end = start
}
for i := start; i <= end; i++ {
r = append(r, Range{Start: Position{Line: i}, End: Position{Line: i}})
}
return
}

View File

@ -0,0 +1,367 @@
package parser
// line parsers are dispatch calls that parse a single unit of text into a
// Node object which contains the whole statement. Dockerfiles have varied
// (but not usually unique, see ONBUILD for a unique example) parsing rules
// per-command, and these unify the processing in a way that makes it
// manageable.
import (
"encoding/json"
"strings"
"unicode"
"unicode/utf8"
"github.com/pkg/errors"
)
var (
errDockerfileNotStringArray = errors.New("when using JSON array syntax, arrays must be comprised of strings only")
)
const (
commandLabel = "LABEL"
)
// ignore the current argument. This will still leave a command parsed, but
// will not incorporate the arguments into the ast.
func parseIgnore(rest string, d *directives) (*Node, map[string]bool, error) {
return &Node{}, nil, nil
}
// used for onbuild. Could potentially be used for anything that represents a
// statement with sub-statements.
//
// ONBUILD RUN foo bar -> (onbuild (run foo bar))
func parseSubCommand(rest string, d *directives) (*Node, map[string]bool, error) {
if rest == "" {
return nil, nil, nil
}
child, err := newNodeFromLine(rest, d, nil)
if err != nil {
return nil, nil, err
}
return &Node{Children: []*Node{child}}, nil, nil
}
// helper to parse words (i.e space delimited or quoted strings) in a statement.
// The quotes are preserved as part of this function and they are stripped later
// as part of processWords().
func parseWords(rest string, d *directives) []string {
const (
inSpaces = iota // looking for start of a word
inWord
inQuote
)
words := []string{}
phase := inSpaces
word := ""
quote := '\000'
blankOK := false
var ch rune
var chWidth int
for pos := 0; pos <= len(rest); pos += chWidth {
if pos != len(rest) {
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
}
if phase == inSpaces { // Looking for start of word
if pos == len(rest) { // end of input
break
}
if unicode.IsSpace(ch) { // skip spaces
continue
}
phase = inWord // found it, fall through
}
if (phase == inWord || phase == inQuote) && (pos == len(rest)) {
if blankOK || len(word) > 0 {
words = append(words, word)
}
break
}
if phase == inWord {
if unicode.IsSpace(ch) {
phase = inSpaces
if blankOK || len(word) > 0 {
words = append(words, word)
}
word = ""
blankOK = false
continue
}
if ch == '\'' || ch == '"' {
quote = ch
blankOK = true
phase = inQuote
}
if ch == d.escapeToken {
if pos+chWidth == len(rest) {
continue // just skip an escape token at end of line
}
// If we're not quoted and we see an escape token, then always just
// add the escape token plus the char to the word, even if the char
// is a quote.
word += string(ch)
pos += chWidth
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
}
word += string(ch)
continue
}
if phase == inQuote {
if ch == quote {
phase = inWord
}
// The escape token is special except for ' quotes - can't escape anything for '
if ch == d.escapeToken && quote != '\'' {
if pos+chWidth == len(rest) {
phase = inWord
continue // just skip the escape token at end
}
pos += chWidth
word += string(ch)
ch, chWidth = utf8.DecodeRuneInString(rest[pos:])
}
word += string(ch)
}
}
return words
}
// parse environment like statements. Note that this does *not* handle
// variable interpolation, which will be handled in the evaluator.
func parseNameVal(rest string, key string, d *directives) (*Node, error) {
// This is kind of tricky because we need to support the old
// variant: KEY name value
// as well as the new one: KEY name=value ...
// The trigger to know which one is being used will be whether we hit
// a space or = first. space ==> old, "=" ==> new
words := parseWords(rest, d)
if len(words) == 0 {
return nil, nil
}
// Old format (KEY name value)
if !strings.Contains(words[0], "=") {
parts := reWhitespace.Split(rest, 2)
if len(parts) < 2 {
return nil, errors.Errorf("%s must have two arguments", key)
}
return newKeyValueNode(parts[0], parts[1]), nil
}
var rootNode *Node
var prevNode *Node
for _, word := range words {
if !strings.Contains(word, "=") {
return nil, errors.Errorf("Syntax error - can't find = in %q. Must be of the form: name=value", word)
}
parts := strings.SplitN(word, "=", 2)
node := newKeyValueNode(parts[0], parts[1])
rootNode, prevNode = appendKeyValueNode(node, rootNode, prevNode)
}
return rootNode, nil
}
func newKeyValueNode(key, value string) *Node {
return &Node{
Value: key,
Next: &Node{Value: value},
}
}
func appendKeyValueNode(node, rootNode, prevNode *Node) (*Node, *Node) {
if rootNode == nil {
rootNode = node
}
if prevNode != nil {
prevNode.Next = node
}
prevNode = node.Next
return rootNode, prevNode
}
func parseEnv(rest string, d *directives) (*Node, map[string]bool, error) {
node, err := parseNameVal(rest, "ENV", d)
return node, nil, err
}
func parseLabel(rest string, d *directives) (*Node, map[string]bool, error) {
node, err := parseNameVal(rest, commandLabel, d)
return node, nil, err
}
// parses a statement containing one or more keyword definition(s) and/or
// value assignments, like `name1 name2= name3="" name4=value`.
// Note that this is a stricter format than the old format of assignment,
// allowed by parseNameVal(), in a way that this only allows assignment of the
// form `keyword=[<value>]` like `name2=`, `name3=""`, and `name4=value` above.
// In addition, a keyword definition alone is of the form `keyword` like `name1`
// above. And the assignments `name2=` and `name3=""` are equivalent and
// assign an empty value to the respective keywords.
func parseNameOrNameVal(rest string, d *directives) (*Node, map[string]bool, error) {
words := parseWords(rest, d)
if len(words) == 0 {
return nil, nil, nil
}
var (
rootnode *Node
prevNode *Node
)
for i, word := range words {
node := &Node{}
node.Value = word
if i == 0 {
rootnode = node
} else {
prevNode.Next = node
}
prevNode = node
}
return rootnode, nil, nil
}
// parses a whitespace-delimited set of arguments. The result is effectively a
// linked list of string arguments.
func parseStringsWhitespaceDelimited(rest string, d *directives) (*Node, map[string]bool, error) {
if rest == "" {
return nil, nil, nil
}
node := &Node{}
rootnode := node
prevnode := node
for _, str := range reWhitespace.Split(rest, -1) { // use regexp
prevnode = node
node.Value = str
node.Next = &Node{}
node = node.Next
}
// XXX to get around regexp.Split *always* providing an empty string at the
// end due to how our loop is constructed, nil out the last node in the
// chain.
prevnode.Next = nil
return rootnode, nil, nil
}
// parseString just wraps the string in quotes and returns a working node.
func parseString(rest string, d *directives) (*Node, map[string]bool, error) {
if rest == "" {
return nil, nil, nil
}
n := &Node{}
n.Value = rest
return n, nil, nil
}
// parseJSON converts JSON arrays to an AST.
func parseJSON(rest string, d *directives) (*Node, map[string]bool, error) {
rest = strings.TrimLeftFunc(rest, unicode.IsSpace)
if !strings.HasPrefix(rest, "[") {
return nil, nil, errors.Errorf("Error parsing %q as a JSON array", rest)
}
var myJSON []interface{}
if err := json.NewDecoder(strings.NewReader(rest)).Decode(&myJSON); err != nil {
return nil, nil, err
}
var top, prev *Node
for _, str := range myJSON {
s, ok := str.(string)
if !ok {
return nil, nil, errDockerfileNotStringArray
}
node := &Node{Value: s}
if prev == nil {
top = node
} else {
prev.Next = node
}
prev = node
}
return top, map[string]bool{"json": true}, nil
}
// parseMaybeJSON determines if the argument appears to be a JSON array. If
// so, passes to parseJSON; if not, quotes the result and returns a single
// node.
func parseMaybeJSON(rest string, d *directives) (*Node, map[string]bool, error) {
if rest == "" {
return nil, nil, nil
}
node, attrs, err := parseJSON(rest, d)
if err == nil {
return node, attrs, nil
}
if err == errDockerfileNotStringArray {
return nil, nil, err
}
node = &Node{}
node.Value = rest
return node, nil, nil
}
// parseMaybeJSONToList determines if the argument appears to be a JSON array. If
// so, passes to parseJSON; if not, attempts to parse it as a whitespace
// delimited string.
func parseMaybeJSONToList(rest string, d *directives) (*Node, map[string]bool, error) {
node, attrs, err := parseJSON(rest, d)
if err == nil {
return node, attrs, nil
}
if err == errDockerfileNotStringArray {
return nil, nil, err
}
return parseStringsWhitespaceDelimited(rest, d)
}
// The HEALTHCHECK command is like parseMaybeJSON, but has an extra type argument.
func parseHealthConfig(rest string, d *directives) (*Node, map[string]bool, error) {
// Find end of first argument
var sep int
for ; sep < len(rest); sep++ {
if unicode.IsSpace(rune(rest[sep])) {
break
}
}
next := sep
for ; next < len(rest); next++ {
if !unicode.IsSpace(rune(rest[next])) {
break
}
}
if sep == 0 {
return nil, nil, nil
}
typ := rest[:sep]
cmd, attrs, err := parseMaybeJSON(rest[next:], d)
if err != nil {
return nil, nil, err
}
return &Node{Value: typ, Next: cmd}, attrs, err
}

View File

@ -0,0 +1,552 @@
// The parser package implements a parser that transforms a raw byte-stream
// into a low-level Abstract Syntax Tree.
package parser
import (
"bufio"
"bytes"
"fmt"
"io"
"regexp"
"strconv"
"strings"
"unicode"
"github.com/moby/buildkit/frontend/dockerfile/command"
"github.com/moby/buildkit/frontend/dockerfile/shell"
"github.com/pkg/errors"
)
// Node is a structure used to represent a parse tree.
//
// In the node there are three fields, Value, Next, and Children. Value is the
// current token's string value. Next is always the next non-child token, and
// children contains all the children. Here's an example:
//
// (value next (child child-next child-next-next) next-next)
//
// This data structure is frankly pretty lousy for handling complex languages,
// but lucky for us the Dockerfile isn't very complicated. This structure
// works a little more effectively than a "proper" parse tree for our needs.
type Node struct {
Value string // actual content
Next *Node // the next item in the current sexp
Children []*Node // the children of this sexp
Heredocs []Heredoc // extra heredoc content attachments
Attributes map[string]bool // special attributes for this node
Original string // original line used before parsing
Flags []string // only top Node should have this set
StartLine int // the line in the original dockerfile where the node begins
EndLine int // the line in the original dockerfile where the node ends
PrevComment []string
}
// Location return the location of node in source code
func (node *Node) Location() []Range {
return toRanges(node.StartLine, node.EndLine)
}
// Dump dumps the AST defined by `node` as a list of sexps.
// Returns a string suitable for printing.
func (node *Node) Dump() string {
str := strings.ToLower(node.Value)
if len(node.Flags) > 0 {
str += fmt.Sprintf(" %q", node.Flags)
}
for _, n := range node.Children {
str += "(" + n.Dump() + ")\n"
}
for n := node.Next; n != nil; n = n.Next {
if len(n.Children) > 0 {
str += " " + n.Dump()
} else {
str += " " + strconv.Quote(n.Value)
}
}
return strings.TrimSpace(str)
}
func (node *Node) lines(start, end int) {
node.StartLine = start
node.EndLine = end
}
func (node *Node) canContainHeredoc() bool {
// check for compound commands, like ONBUILD
if ok := heredocCompoundDirectives[strings.ToLower(node.Value)]; ok {
if node.Next != nil && len(node.Next.Children) > 0 {
node = node.Next.Children[0]
}
}
if ok := heredocDirectives[strings.ToLower(node.Value)]; !ok {
return false
}
if isJSON := node.Attributes["json"]; isJSON {
return false
}
return true
}
// AddChild adds a new child node, and updates line information
func (node *Node) AddChild(child *Node, startLine, endLine int) {
child.lines(startLine, endLine)
if node.StartLine < 0 {
node.StartLine = startLine
}
node.EndLine = endLine
node.Children = append(node.Children, child)
}
type Heredoc struct {
Name string
FileDescriptor uint
Expand bool
Chomp bool
Content string
}
var (
dispatch map[string]func(string, *directives) (*Node, map[string]bool, error)
reWhitespace = regexp.MustCompile(`[\t\v\f\r ]+`)
reComment = regexp.MustCompile(`^#.*$`)
reHeredoc = regexp.MustCompile(`^(\d*)<<(-?)([^<]*)$`)
reLeadingTabs = regexp.MustCompile(`(?m)^\t+`)
)
// DefaultEscapeToken is the default escape token
const DefaultEscapeToken = '\\'
var (
// Directives allowed to contain heredocs
heredocDirectives = map[string]bool{
command.Add: true,
command.Copy: true,
command.Run: true,
}
// Directives allowed to contain directives containing heredocs
heredocCompoundDirectives = map[string]bool{
command.Onbuild: true,
}
)
// directives is the structure used during a build run to hold the state of
// parsing directives.
type directives struct {
parser DirectiveParser
escapeToken rune // Current escape token
lineContinuationRegex *regexp.Regexp // Current line continuation regex
}
// setEscapeToken sets the default token for escaping characters and as line-
// continuation token in a Dockerfile. Only ` (backtick) and \ (backslash) are
// allowed as token.
func (d *directives) setEscapeToken(s string) error {
if s != "`" && s != `\` {
return errors.Errorf("invalid escape token '%s' does not match ` or \\", s)
}
d.escapeToken = rune(s[0])
// The escape token is used both to escape characters in a line and as line
// continuation token. If it's the last non-whitespace token, it is used as
// line-continuation token, *unless* preceded by an escape-token.
//
// The second branch in the regular expression handles line-continuation
// tokens on their own line, which don't have any character preceding them.
//
// Due to Go lacking negative look-ahead matching, this regular expression
// does not currently handle a line-continuation token preceded by an *escaped*
// escape-token ("foo \\\").
d.lineContinuationRegex = regexp.MustCompile(`([^\` + s + `])\` + s + `[ \t]*$|^\` + s + `[ \t]*$`)
return nil
}
// possibleParserDirective looks for parser directives, eg '# escapeToken=<char>'.
// Parser directives must precede any builder instruction or other comments,
// and cannot be repeated.
func (d *directives) possibleParserDirective(line string) error {
directive, err := d.parser.ParseLine([]byte(line))
if err != nil {
return err
}
if directive != nil && directive.Name == keyEscape {
return d.setEscapeToken(directive.Value)
}
return nil
}
// newDefaultDirectives returns a new directives structure with the default escapeToken token
func newDefaultDirectives() *directives {
d := &directives{}
d.setEscapeToken(string(DefaultEscapeToken))
return d
}
func init() {
// Dispatch Table. see line_parsers.go for the parse functions.
// The command is parsed and mapped to the line parser. The line parser
// receives the arguments but not the command, and returns an AST after
// reformulating the arguments according to the rules in the parser
// functions. Errors are propagated up by Parse() and the resulting AST can
// be incorporated directly into the existing AST as a next.
dispatch = map[string]func(string, *directives) (*Node, map[string]bool, error){
command.Add: parseMaybeJSONToList,
command.Arg: parseNameOrNameVal,
command.Cmd: parseMaybeJSON,
command.Copy: parseMaybeJSONToList,
command.Entrypoint: parseMaybeJSON,
command.Env: parseEnv,
command.Expose: parseStringsWhitespaceDelimited,
command.From: parseStringsWhitespaceDelimited,
command.Healthcheck: parseHealthConfig,
command.Label: parseLabel,
command.Maintainer: parseString,
command.Onbuild: parseSubCommand,
command.Run: parseMaybeJSON,
command.Shell: parseMaybeJSON,
command.StopSignal: parseString,
command.User: parseString,
command.Volume: parseMaybeJSONToList,
command.Workdir: parseString,
}
}
// newNodeFromLine splits the line into parts, and dispatches to a function
// based on the command and command arguments. A Node is created from the
// result of the dispatch.
func newNodeFromLine(line string, d *directives, comments []string) (*Node, error) {
cmd, flags, args, err := splitCommand(line)
if err != nil {
return nil, err
}
fn := dispatch[strings.ToLower(cmd)]
// Ignore invalid Dockerfile instructions
if fn == nil {
fn = parseIgnore
}
next, attrs, err := fn(args, d)
if err != nil {
return nil, err
}
return &Node{
Value: cmd,
Original: line,
Flags: flags,
Next: next,
Attributes: attrs,
PrevComment: comments,
}, nil
}
// Result contains the bundled outputs from parsing a Dockerfile.
type Result struct {
AST *Node
EscapeToken rune
Warnings []Warning
}
// Warning contains information to identify and locate a warning generated
// during parsing.
type Warning struct {
Short string
Detail [][]byte
URL string
Location *Range
}
// PrintWarnings to the writer
func (r *Result) PrintWarnings(out io.Writer) {
if len(r.Warnings) == 0 {
return
}
for _, w := range r.Warnings {
fmt.Fprintf(out, "[WARNING]: %s\n", w.Short)
}
if len(r.Warnings) > 0 {
fmt.Fprintf(out, "[WARNING]: Empty continuation lines will become errors in a future release.\n")
}
}
// Parse consumes lines from a provided Reader, parses each line into an AST
// and returns the results of doing so.
func Parse(rwc io.Reader) (*Result, error) {
d := newDefaultDirectives()
currentLine := 0
root := &Node{StartLine: -1}
scanner := bufio.NewScanner(rwc)
scanner.Split(scanLines)
warnings := []Warning{}
var comments []string
var err error
for scanner.Scan() {
bytesRead := scanner.Bytes()
if currentLine == 0 {
// First line, strip the byte-order-marker if present
bytesRead = bytes.TrimPrefix(bytesRead, utf8bom)
}
if isComment(bytesRead) {
comment := strings.TrimSpace(string(bytesRead[1:]))
if comment == "" {
comments = nil
} else {
comments = append(comments, comment)
}
}
bytesRead, err = processLine(d, bytesRead, true)
if err != nil {
return nil, withLocation(err, currentLine, 0)
}
currentLine++
startLine := currentLine
line, isEndOfLine := trimContinuationCharacter(string(bytesRead), d)
if isEndOfLine && line == "" {
continue
}
var hasEmptyContinuationLine bool
for !isEndOfLine && scanner.Scan() {
bytesRead, err := processLine(d, scanner.Bytes(), false)
if err != nil {
return nil, withLocation(err, currentLine, 0)
}
currentLine++
if isComment(scanner.Bytes()) {
// original line was a comment (processLine strips comments)
continue
}
if isEmptyContinuationLine(bytesRead) {
hasEmptyContinuationLine = true
continue
}
continuationLine := string(bytesRead)
continuationLine, isEndOfLine = trimContinuationCharacter(continuationLine, d)
line += continuationLine
}
if hasEmptyContinuationLine {
warnings = append(warnings, Warning{
Short: "Empty continuation line found in: " + line,
Detail: [][]byte{[]byte("Empty continuation lines will become errors in a future release")},
URL: "https://github.com/moby/moby/pull/33719",
Location: &Range{Start: Position{Line: currentLine}, End: Position{Line: currentLine}},
})
}
child, err := newNodeFromLine(line, d, comments)
if err != nil {
return nil, withLocation(err, startLine, currentLine)
}
if child.canContainHeredoc() {
heredocs, err := heredocsFromLine(line)
if err != nil {
return nil, withLocation(err, startLine, currentLine)
}
for _, heredoc := range heredocs {
terminator := []byte(heredoc.Name)
terminated := false
for scanner.Scan() {
bytesRead := scanner.Bytes()
currentLine++
possibleTerminator := trimNewline(bytesRead)
if heredoc.Chomp {
possibleTerminator = trimLeadingTabs(possibleTerminator)
}
if bytes.Equal(possibleTerminator, terminator) {
terminated = true
break
}
heredoc.Content += string(bytesRead)
}
if !terminated {
return nil, withLocation(errors.New("unterminated heredoc"), startLine, currentLine)
}
child.Heredocs = append(child.Heredocs, heredoc)
}
}
root.AddChild(child, startLine, currentLine)
comments = nil
}
if root.StartLine < 0 {
return nil, withLocation(errors.New("file with no instructions"), currentLine, 0)
}
return &Result{
AST: root,
Warnings: warnings,
EscapeToken: d.escapeToken,
}, withLocation(handleScannerError(scanner.Err()), currentLine, 0)
}
// heredocFromMatch extracts a heredoc from a possible heredoc regex match.
func heredocFromMatch(match []string) (*Heredoc, error) {
if len(match) == 0 {
return nil, nil
}
fd, _ := strconv.ParseUint(match[1], 10, 0)
chomp := match[2] == "-"
rest := match[3]
if len(rest) == 0 {
return nil, nil
}
shlex := shell.NewLex('\\')
shlex.SkipUnsetEnv = true
// Attempt to parse both the heredoc both with *and* without quotes.
// If there are quotes in one but not the other, then we know that some
// part of the heredoc word is quoted, so we shouldn't expand the content.
shlex.RawQuotes = false
words, err := shlex.ProcessWords(rest, []string{})
if err != nil {
return nil, err
}
// quick sanity check that rest is a single word
if len(words) != 1 {
return nil, nil
}
shlex.RawQuotes = true
wordsRaw, err := shlex.ProcessWords(rest, []string{})
if err != nil {
return nil, err
}
if len(wordsRaw) != len(words) {
return nil, errors.Errorf("internal lexing of heredoc produced inconsistent results: %s", rest)
}
word := words[0]
wordQuoteCount := strings.Count(word, `'`) + strings.Count(word, `"`)
wordRaw := wordsRaw[0]
wordRawQuoteCount := strings.Count(wordRaw, `'`) + strings.Count(wordRaw, `"`)
expand := wordQuoteCount == wordRawQuoteCount
return &Heredoc{
Name: word,
Expand: expand,
Chomp: chomp,
FileDescriptor: uint(fd),
}, nil
}
// ParseHeredoc parses a heredoc word from a target string, returning the
// components from the doc.
func ParseHeredoc(src string) (*Heredoc, error) {
return heredocFromMatch(reHeredoc.FindStringSubmatch(src))
}
// MustParseHeredoc is a variant of ParseHeredoc that discards the error, if
// there was one present.
func MustParseHeredoc(src string) *Heredoc {
heredoc, _ := ParseHeredoc(src)
return heredoc
}
func heredocsFromLine(line string) ([]Heredoc, error) {
shlex := shell.NewLex('\\')
shlex.RawQuotes = true
shlex.RawEscapes = true
shlex.SkipUnsetEnv = true
words, _ := shlex.ProcessWords(line, []string{})
var docs []Heredoc
for _, word := range words {
heredoc, err := ParseHeredoc(word)
if err != nil {
return nil, err
}
if heredoc != nil {
docs = append(docs, *heredoc)
}
}
return docs, nil
}
// ChompHeredocContent chomps leading tabs from the heredoc.
func ChompHeredocContent(src string) string {
return reLeadingTabs.ReplaceAllString(src, "")
}
func trimComments(src []byte) []byte {
return reComment.ReplaceAll(src, []byte{})
}
func trimLeadingWhitespace(src []byte) []byte {
return bytes.TrimLeftFunc(src, unicode.IsSpace)
}
func trimLeadingTabs(src []byte) []byte {
return bytes.TrimLeft(src, "\t")
}
func trimNewline(src []byte) []byte {
return bytes.TrimRight(src, "\r\n")
}
func isComment(line []byte) bool {
return reComment.Match(trimLeadingWhitespace(trimNewline(line)))
}
func isEmptyContinuationLine(line []byte) bool {
return len(trimLeadingWhitespace(trimNewline(line))) == 0
}
var utf8bom = []byte{0xEF, 0xBB, 0xBF}
func trimContinuationCharacter(line string, d *directives) (string, bool) {
if d.lineContinuationRegex.MatchString(line) {
line = d.lineContinuationRegex.ReplaceAllString(line, "$1")
return line, false
}
return line, true
}
// TODO: remove stripLeftWhitespace after deprecation period. It seems silly
// to preserve whitespace on continuation lines. Why is that done?
func processLine(d *directives, token []byte, stripLeftWhitespace bool) ([]byte, error) {
token = trimNewline(token)
if stripLeftWhitespace {
token = trimLeadingWhitespace(token)
}
return trimComments(token), d.possibleParserDirective(string(token))
}
// Variation of bufio.ScanLines that preserves the line endings
func scanLines(data []byte, atEOF bool) (advance int, token []byte, err error) {
if atEOF && len(data) == 0 {
return 0, nil, nil
}
if i := bytes.IndexByte(data, '\n'); i >= 0 {
return i + 1, data[0 : i+1], nil
}
if atEOF {
return len(data), data, nil
}
return 0, nil, nil
}
func handleScannerError(err error) error {
switch err {
case bufio.ErrTooLong:
return errors.Errorf("dockerfile line greater than max allowed size of %d", bufio.MaxScanTokenSize-1)
default:
return err
}
}

View File

@ -0,0 +1,117 @@
package parser
import (
"strings"
"unicode"
)
// splitCommand takes a single line of text and parses out the cmd and args,
// which are used for dispatching to more exact parsing functions.
func splitCommand(line string) (string, []string, string, error) {
var args string
var flags []string
// Make sure we get the same results irrespective of leading/trailing spaces
cmdline := reWhitespace.Split(strings.TrimSpace(line), 2)
if len(cmdline) == 2 {
var err error
args, flags, err = extractBuilderFlags(cmdline[1])
if err != nil {
return "", nil, "", err
}
}
return cmdline[0], flags, strings.TrimSpace(args), nil
}
func extractBuilderFlags(line string) (string, []string, error) {
// Parses the BuilderFlags and returns the remaining part of the line
const (
inSpaces = iota // looking for start of a word
inWord
inQuote
)
words := []string{}
phase := inSpaces
word := ""
quote := '\000'
blankOK := false
var ch rune
for pos := 0; pos <= len(line); pos++ {
if pos != len(line) {
ch = rune(line[pos])
}
if phase == inSpaces { // Looking for start of word
if pos == len(line) { // end of input
break
}
if unicode.IsSpace(ch) { // skip spaces
continue
}
// Only keep going if the next word starts with --
if ch != '-' || pos+1 == len(line) || rune(line[pos+1]) != '-' {
return line[pos:], words, nil
}
phase = inWord // found something with "--", fall through
}
if (phase == inWord || phase == inQuote) && (pos == len(line)) {
if word != "--" && (blankOK || len(word) > 0) {
words = append(words, word)
}
break
}
if phase == inWord {
if unicode.IsSpace(ch) {
phase = inSpaces
if word == "--" {
return line[pos:], words, nil
}
if blankOK || len(word) > 0 {
words = append(words, word)
}
word = ""
blankOK = false
continue
}
if ch == '\'' || ch == '"' {
quote = ch
blankOK = true
phase = inQuote
continue
}
if ch == '\\' {
if pos+1 == len(line) {
continue // just skip \ at end
}
pos++
ch = rune(line[pos])
}
word += string(ch)
continue
}
if phase == inQuote {
if ch == quote {
phase = inWord
continue
}
if ch == '\\' {
if pos+1 == len(line) {
phase = inWord
continue // just skip \ at end
}
pos++
ch = rune(line[pos])
}
word += string(ch)
}
}
return "", words, nil
}