Sebastiaan van Stijn d75c650792
vendor: github.com/moby/go-archive v0.1.0
full diff: https://github.com/moby/go-archive/compare/21f3f3385ab7...v0.1.0

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2025-04-16 13:49:57 +02:00

1170 lines
36 KiB
Go

// Package archive provides helper functions for dealing with archive files.
package archive
import (
"archive/tar"
"context"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"runtime"
"strings"
"syscall"
"time"
"github.com/containerd/log"
"github.com/moby/patternmatcher"
"github.com/moby/sys/sequential"
"github.com/moby/sys/user"
"github.com/moby/go-archive/compression"
"github.com/moby/go-archive/tarheader"
)
// ImpliedDirectoryMode represents the mode (Unix permissions) applied to directories that are implied by files in a
// tar, but that do not have their own header entry.
//
// The permissions mask is stored in a constant instead of locally to ensure that magic numbers do not
// proliferate in the codebase. The default value 0755 has been selected based on the default umask of 0022, and
// a convention of mkdir(1) calling mkdir(2) with permissions of 0777, resulting in a final value of 0755.
//
// This value is currently implementation-defined, and not captured in any cross-runtime specification. Thus, it is
// subject to change in Moby at any time -- image authors who require consistent or known directory permissions
// should explicitly control them by ensuring that header entries exist for any applicable path.
const ImpliedDirectoryMode = 0o755
type (
// Compression is the state represents if compressed or not.
//
// Deprecated: use [compression.Compression].
Compression = compression.Compression
// WhiteoutFormat is the format of whiteouts unpacked
WhiteoutFormat int
ChownOpts struct {
UID int
GID int
}
// TarOptions wraps the tar options.
TarOptions struct {
IncludeFiles []string
ExcludePatterns []string
Compression compression.Compression
NoLchown bool
IDMap user.IdentityMapping
ChownOpts *ChownOpts
IncludeSourceDir bool
// WhiteoutFormat is the expected on disk format for whiteout files.
// This format will be converted to the standard format on pack
// and from the standard format on unpack.
WhiteoutFormat WhiteoutFormat
// When unpacking, specifies whether overwriting a directory with a
// non-directory is allowed and vice versa.
NoOverwriteDirNonDir bool
// For each include when creating an archive, the included name will be
// replaced with the matching name from this map.
RebaseNames map[string]string
InUserNS bool
// Allow unpacking to succeed in spite of failures to set extended
// attributes on the unpacked files due to the destination filesystem
// not supporting them or a lack of permissions. Extended attributes
// were probably in the archive for a reason, so set this option at
// your own peril.
BestEffortXattrs bool
}
)
// Archiver implements the Archiver interface and allows the reuse of most utility functions of
// this package with a pluggable Untar function. Also, to facilitate the passing of specific id
// mappings for untar, an Archiver can be created with maps which will then be passed to Untar operations.
type Archiver struct {
Untar func(io.Reader, string, *TarOptions) error
IDMapping user.IdentityMapping
}
// NewDefaultArchiver returns a new Archiver without any IdentityMapping
func NewDefaultArchiver() *Archiver {
return &Archiver{Untar: Untar}
}
// breakoutError is used to differentiate errors related to breaking out
// When testing archive breakout in the unit tests, this error is expected
// in order for the test to pass.
type breakoutError error
const (
Uncompressed = compression.None // Deprecated: use [compression.None].
Bzip2 = compression.Bzip2 // Deprecated: use [compression.Bzip2].
Gzip = compression.Gzip // Deprecated: use [compression.Gzip].
Xz = compression.Xz // Deprecated: use [compression.Xz].
Zstd = compression.Zstd // Deprecated: use [compression.Zstd].
)
const (
AUFSWhiteoutFormat WhiteoutFormat = 0 // AUFSWhiteoutFormat is the default format for whiteouts
OverlayWhiteoutFormat WhiteoutFormat = 1 // OverlayWhiteoutFormat formats whiteout according to the overlay standard.
)
// IsArchivePath checks if the (possibly compressed) file at the given path
// starts with a tar file header.
func IsArchivePath(path string) bool {
file, err := os.Open(path)
if err != nil {
return false
}
defer file.Close()
rdr, err := compression.DecompressStream(file)
if err != nil {
return false
}
defer rdr.Close()
r := tar.NewReader(rdr)
_, err = r.Next()
return err == nil
}
// DetectCompression detects the compression algorithm of the source.
//
// Deprecated: use [compression.Detect].
func DetectCompression(source []byte) compression.Compression {
return compression.Detect(source)
}
// DecompressStream decompresses the archive and returns a ReaderCloser with the decompressed archive.
//
// Deprecated: use [compression.DecompressStream].
func DecompressStream(archive io.Reader) (io.ReadCloser, error) {
return compression.DecompressStream(archive)
}
// CompressStream compresses the dest with specified compression algorithm.
//
// Deprecated: use [compression.CompressStream].
func CompressStream(dest io.Writer, comp compression.Compression) (io.WriteCloser, error) {
return compression.CompressStream(dest, comp)
}
// TarModifierFunc is a function that can be passed to ReplaceFileTarWrapper to
// modify the contents or header of an entry in the archive. If the file already
// exists in the archive the TarModifierFunc will be called with the Header and
// a reader which will return the files content. If the file does not exist both
// header and content will be nil.
type TarModifierFunc func(path string, header *tar.Header, content io.Reader) (*tar.Header, []byte, error)
// ReplaceFileTarWrapper converts inputTarStream to a new tar stream. Files in the
// tar stream are modified if they match any of the keys in mods.
func ReplaceFileTarWrapper(inputTarStream io.ReadCloser, mods map[string]TarModifierFunc) io.ReadCloser {
pipeReader, pipeWriter := io.Pipe()
go func() {
tarReader := tar.NewReader(inputTarStream)
tarWriter := tar.NewWriter(pipeWriter)
defer inputTarStream.Close()
defer tarWriter.Close()
modify := func(name string, original *tar.Header, modifier TarModifierFunc, tarReader io.Reader) error {
header, data, err := modifier(name, original, tarReader)
switch {
case err != nil:
return err
case header == nil:
return nil
}
if header.Name == "" {
header.Name = name
}
header.Size = int64(len(data))
if err := tarWriter.WriteHeader(header); err != nil {
return err
}
if len(data) != 0 {
if _, err := tarWriter.Write(data); err != nil {
return err
}
}
return nil
}
var err error
var originalHeader *tar.Header
for {
originalHeader, err = tarReader.Next()
if errors.Is(err, io.EOF) {
break
}
if err != nil {
pipeWriter.CloseWithError(err)
return
}
modifier, ok := mods[originalHeader.Name]
if !ok {
// No modifiers for this file, copy the header and data
if err := tarWriter.WriteHeader(originalHeader); err != nil {
pipeWriter.CloseWithError(err)
return
}
if err := copyWithBuffer(tarWriter, tarReader); err != nil {
pipeWriter.CloseWithError(err)
return
}
continue
}
delete(mods, originalHeader.Name)
if err := modify(originalHeader.Name, originalHeader, modifier, tarReader); err != nil {
pipeWriter.CloseWithError(err)
return
}
}
// Apply the modifiers that haven't matched any files in the archive
for name, modifier := range mods {
if err := modify(name, nil, modifier, nil); err != nil {
pipeWriter.CloseWithError(err)
return
}
}
pipeWriter.Close()
}()
return pipeReader
}
// FileInfoHeaderNoLookups creates a partially-populated tar.Header from fi.
//
// Deprecated: use [tarheader.FileInfoHeaderNoLookups].
func FileInfoHeaderNoLookups(fi os.FileInfo, link string) (*tar.Header, error) {
return tarheader.FileInfoHeaderNoLookups(fi, link)
}
// FileInfoHeader creates a populated Header from fi.
//
// Compared to the archive/tar package, this function fills in less information
// but is safe to call from a chrooted process. The AccessTime and ChangeTime
// fields are not set in the returned header, ModTime is truncated to one-second
// precision, and the Uname and Gname fields are only set when fi is a FileInfo
// value returned from tar.Header.FileInfo().
func FileInfoHeader(name string, fi os.FileInfo, link string) (*tar.Header, error) {
hdr, err := tarheader.FileInfoHeaderNoLookups(fi, link)
if err != nil {
return nil, err
}
hdr.Format = tar.FormatPAX
hdr.ModTime = hdr.ModTime.Truncate(time.Second)
hdr.AccessTime = time.Time{}
hdr.ChangeTime = time.Time{}
hdr.Mode = int64(chmodTarEntry(os.FileMode(hdr.Mode)))
hdr.Name = canonicalTarName(name, fi.IsDir())
return hdr, nil
}
const paxSchilyXattr = "SCHILY.xattr."
// ReadSecurityXattrToTarHeader reads security.capability xattr from filesystem
// to a tar header
func ReadSecurityXattrToTarHeader(path string, hdr *tar.Header) error {
const (
// Values based on linux/include/uapi/linux/capability.h
xattrCapsSz2 = 20
versionOffset = 3
vfsCapRevision2 = 2
vfsCapRevision3 = 3
)
capability, _ := lgetxattr(path, "security.capability")
if capability != nil {
if capability[versionOffset] == vfsCapRevision3 {
// Convert VFS_CAP_REVISION_3 to VFS_CAP_REVISION_2 as root UID makes no
// sense outside the user namespace the archive is built in.
capability[versionOffset] = vfsCapRevision2
capability = capability[:xattrCapsSz2]
}
if hdr.PAXRecords == nil {
hdr.PAXRecords = make(map[string]string)
}
hdr.PAXRecords[paxSchilyXattr+"security.capability"] = string(capability)
}
return nil
}
type tarWhiteoutConverter interface {
ConvertWrite(*tar.Header, string, os.FileInfo) (*tar.Header, error)
ConvertRead(*tar.Header, string) (bool, error)
}
type tarAppender struct {
TarWriter *tar.Writer
// for hardlink mapping
SeenFiles map[uint64]string
IdentityMapping user.IdentityMapping
ChownOpts *ChownOpts
// For packing and unpacking whiteout files in the
// non standard format. The whiteout files defined
// by the AUFS standard are used as the tar whiteout
// standard.
WhiteoutConverter tarWhiteoutConverter
}
func newTarAppender(idMapping user.IdentityMapping, writer io.Writer, chownOpts *ChownOpts) *tarAppender {
return &tarAppender{
SeenFiles: make(map[uint64]string),
TarWriter: tar.NewWriter(writer),
IdentityMapping: idMapping,
ChownOpts: chownOpts,
}
}
// canonicalTarName provides a platform-independent and consistent POSIX-style
// path for files and directories to be archived regardless of the platform.
func canonicalTarName(name string, isDir bool) string {
name = filepath.ToSlash(name)
// suffix with '/' for directories
if isDir && !strings.HasSuffix(name, "/") {
name += "/"
}
return name
}
// addTarFile adds to the tar archive a file from `path` as `name`
func (ta *tarAppender) addTarFile(path, name string) error {
fi, err := os.Lstat(path)
if err != nil {
return err
}
var link string
if fi.Mode()&os.ModeSymlink != 0 {
var err error
link, err = os.Readlink(path)
if err != nil {
return err
}
}
hdr, err := FileInfoHeader(name, fi, link)
if err != nil {
return err
}
if err := ReadSecurityXattrToTarHeader(path, hdr); err != nil {
return err
}
// if it's not a directory and has more than 1 link,
// it's hard linked, so set the type flag accordingly
if !fi.IsDir() && hasHardlinks(fi) {
inode, err := getInodeFromStat(fi.Sys())
if err != nil {
return err
}
// a link should have a name that it links too
// and that linked name should be first in the tar archive
if oldpath, ok := ta.SeenFiles[inode]; ok {
hdr.Typeflag = tar.TypeLink
hdr.Linkname = oldpath
hdr.Size = 0 // This Must be here for the writer math to add up!
} else {
ta.SeenFiles[inode] = name
}
}
// check whether the file is overlayfs whiteout
// if yes, skip re-mapping container ID mappings.
isOverlayWhiteout := fi.Mode()&os.ModeCharDevice != 0 && hdr.Devmajor == 0 && hdr.Devminor == 0
// handle re-mapping container ID mappings back to host ID mappings before
// writing tar headers/files. We skip whiteout files because they were written
// by the kernel and already have proper ownership relative to the host
if !isOverlayWhiteout && !strings.HasPrefix(filepath.Base(hdr.Name), WhiteoutPrefix) && !ta.IdentityMapping.Empty() {
uid, gid, err := getFileUIDGID(fi.Sys())
if err != nil {
return err
}
hdr.Uid, hdr.Gid, err = ta.IdentityMapping.ToContainer(uid, gid)
if err != nil {
return err
}
}
// explicitly override with ChownOpts
if ta.ChownOpts != nil {
hdr.Uid = ta.ChownOpts.UID
hdr.Gid = ta.ChownOpts.GID
}
if ta.WhiteoutConverter != nil {
wo, err := ta.WhiteoutConverter.ConvertWrite(hdr, path, fi)
if err != nil {
return err
}
// If a new whiteout file exists, write original hdr, then
// replace hdr with wo to be written after. Whiteouts should
// always be written after the original. Note the original
// hdr may have been updated to be a whiteout with returning
// a whiteout header
if wo != nil {
if err := ta.TarWriter.WriteHeader(hdr); err != nil {
return err
}
if hdr.Typeflag == tar.TypeReg && hdr.Size > 0 {
return fmt.Errorf("tar: cannot use whiteout for non-empty file")
}
hdr = wo
}
}
if err := ta.TarWriter.WriteHeader(hdr); err != nil {
return err
}
if hdr.Typeflag == tar.TypeReg && hdr.Size > 0 {
// We use sequential file access to avoid depleting the standby list on
// Windows. On Linux, this equates to a regular os.Open.
file, err := sequential.Open(path)
if err != nil {
return err
}
err = copyWithBuffer(ta.TarWriter, file)
file.Close()
if err != nil {
return err
}
}
return nil
}
func createTarFile(path, extractDir string, hdr *tar.Header, reader io.Reader, opts *TarOptions) error {
var (
Lchown = true
inUserns, bestEffortXattrs bool
chownOpts *ChownOpts
)
// TODO(thaJeztah): make opts a required argument.
if opts != nil {
Lchown = !opts.NoLchown
inUserns = opts.InUserNS // TODO(thaJeztah): consider deprecating opts.InUserNS and detect locally.
chownOpts = opts.ChownOpts
bestEffortXattrs = opts.BestEffortXattrs
}
// hdr.Mode is in linux format, which we can use for sycalls,
// but for os.Foo() calls we need the mode converted to os.FileMode,
// so use hdrInfo.Mode() (they differ for e.g. setuid bits)
hdrInfo := hdr.FileInfo()
switch hdr.Typeflag {
case tar.TypeDir:
// Create directory unless it exists as a directory already.
// In that case we just want to merge the two
if fi, err := os.Lstat(path); err != nil || !fi.IsDir() {
if err := os.Mkdir(path, hdrInfo.Mode()); err != nil {
return err
}
}
case tar.TypeReg:
// Source is regular file. We use sequential file access to avoid depleting
// the standby list on Windows. On Linux, this equates to a regular os.OpenFile.
file, err := sequential.OpenFile(path, os.O_CREATE|os.O_WRONLY, hdrInfo.Mode())
if err != nil {
return err
}
if err := copyWithBuffer(file, reader); err != nil {
_ = file.Close()
return err
}
_ = file.Close()
case tar.TypeBlock, tar.TypeChar:
if inUserns { // cannot create devices in a userns
log.G(context.TODO()).WithFields(log.Fields{"path": path, "type": hdr.Typeflag}).Debug("skipping device nodes in a userns")
return nil
}
// Handle this is an OS-specific way
if err := handleTarTypeBlockCharFifo(hdr, path); err != nil {
return err
}
case tar.TypeFifo:
// Handle this is an OS-specific way
if err := handleTarTypeBlockCharFifo(hdr, path); err != nil {
if inUserns && errors.Is(err, syscall.EPERM) {
// In most cases, cannot create a fifo if running in user namespace
log.G(context.TODO()).WithFields(log.Fields{"error": err, "path": path, "type": hdr.Typeflag}).Debug("creating fifo node in a userns")
return nil
}
return err
}
case tar.TypeLink:
// #nosec G305 -- The target path is checked for path traversal.
targetPath := filepath.Join(extractDir, hdr.Linkname)
// check for hardlink breakout
if !strings.HasPrefix(targetPath, extractDir) {
return breakoutError(fmt.Errorf("invalid hardlink %q -> %q", targetPath, hdr.Linkname))
}
if err := os.Link(targetPath, path); err != nil {
return err
}
case tar.TypeSymlink:
// path -> hdr.Linkname = targetPath
// e.g. /extractDir/path/to/symlink -> ../2/file = /extractDir/path/2/file
targetPath := filepath.Join(filepath.Dir(path), hdr.Linkname) // #nosec G305 -- The target path is checked for path traversal.
// the reason we don't need to check symlinks in the path (with FollowSymlinkInScope) is because
// that symlink would first have to be created, which would be caught earlier, at this very check:
if !strings.HasPrefix(targetPath, extractDir) {
return breakoutError(fmt.Errorf("invalid symlink %q -> %q", path, hdr.Linkname))
}
if err := os.Symlink(hdr.Linkname, path); err != nil {
return err
}
case tar.TypeXGlobalHeader:
log.G(context.TODO()).Debug("PAX Global Extended Headers found and ignored")
return nil
default:
return fmt.Errorf("unhandled tar header type %d", hdr.Typeflag)
}
// Lchown is not supported on Windows.
if Lchown && runtime.GOOS != "windows" {
if chownOpts == nil {
chownOpts = &ChownOpts{UID: hdr.Uid, GID: hdr.Gid}
}
if err := os.Lchown(path, chownOpts.UID, chownOpts.GID); err != nil {
var msg string
if inUserns && errors.Is(err, syscall.EINVAL) {
msg = " (try increasing the number of subordinate IDs in /etc/subuid and /etc/subgid)"
}
return fmt.Errorf("failed to Lchown %q for UID %d, GID %d%s: %w", path, hdr.Uid, hdr.Gid, msg, err)
}
}
var xattrErrs []string
for key, value := range hdr.PAXRecords {
xattr, ok := strings.CutPrefix(key, paxSchilyXattr)
if !ok {
continue
}
if err := lsetxattr(path, xattr, []byte(value), 0); err != nil {
if bestEffortXattrs && errors.Is(err, syscall.ENOTSUP) || errors.Is(err, syscall.EPERM) {
// EPERM occurs if modifying xattrs is not allowed. This can
// happen when running in userns with restrictions (ChromeOS).
xattrErrs = append(xattrErrs, err.Error())
continue
}
return err
}
}
if len(xattrErrs) > 0 {
log.G(context.TODO()).WithFields(log.Fields{
"errors": xattrErrs,
}).Warn("ignored xattrs in archive: underlying filesystem doesn't support them")
}
// There is no LChmod, so ignore mode for symlink. Also, this
// must happen after chown, as that can modify the file mode
if err := handleLChmod(hdr, path, hdrInfo); err != nil {
return err
}
aTime := boundTime(latestTime(hdr.AccessTime, hdr.ModTime))
mTime := boundTime(hdr.ModTime)
// chtimes doesn't support a NOFOLLOW flag atm
if hdr.Typeflag == tar.TypeLink {
if fi, err := os.Lstat(hdr.Linkname); err == nil && (fi.Mode()&os.ModeSymlink == 0) {
if err := chtimes(path, aTime, mTime); err != nil {
return err
}
}
} else if hdr.Typeflag != tar.TypeSymlink {
if err := chtimes(path, aTime, mTime); err != nil {
return err
}
} else {
if err := lchtimes(path, aTime, mTime); err != nil {
return err
}
}
return nil
}
// Tar creates an archive from the directory at `path`, and returns it as a
// stream of bytes.
func Tar(path string, comp compression.Compression) (io.ReadCloser, error) {
return TarWithOptions(path, &TarOptions{Compression: comp})
}
// TarWithOptions creates an archive from the directory at `path`, only including files whose relative
// paths are included in `options.IncludeFiles` (if non-nil) or not in `options.ExcludePatterns`.
func TarWithOptions(srcPath string, options *TarOptions) (io.ReadCloser, error) {
tb, err := NewTarballer(srcPath, options)
if err != nil {
return nil, err
}
go tb.Do()
return tb.Reader(), nil
}
// Tarballer is a lower-level interface to TarWithOptions which gives the caller
// control over which goroutine the archiving operation executes on.
type Tarballer struct {
srcPath string
options *TarOptions
pm *patternmatcher.PatternMatcher
pipeReader *io.PipeReader
pipeWriter *io.PipeWriter
compressWriter io.WriteCloser
whiteoutConverter tarWhiteoutConverter
}
// NewTarballer constructs a new tarballer. The arguments are the same as for
// TarWithOptions.
func NewTarballer(srcPath string, options *TarOptions) (*Tarballer, error) {
pm, err := patternmatcher.New(options.ExcludePatterns)
if err != nil {
return nil, err
}
pipeReader, pipeWriter := io.Pipe()
compressWriter, err := compression.CompressStream(pipeWriter, options.Compression)
if err != nil {
return nil, err
}
return &Tarballer{
// Fix the source path to work with long path names. This is a no-op
// on platforms other than Windows.
srcPath: addLongPathPrefix(srcPath),
options: options,
pm: pm,
pipeReader: pipeReader,
pipeWriter: pipeWriter,
compressWriter: compressWriter,
whiteoutConverter: getWhiteoutConverter(options.WhiteoutFormat),
}, nil
}
// Reader returns the reader for the created archive.
func (t *Tarballer) Reader() io.ReadCloser {
return t.pipeReader
}
// Do performs the archiving operation in the background. The resulting archive
// can be read from t.Reader(). Do should only be called once on each Tarballer
// instance.
func (t *Tarballer) Do() {
ta := newTarAppender(
t.options.IDMap,
t.compressWriter,
t.options.ChownOpts,
)
ta.WhiteoutConverter = t.whiteoutConverter
defer func() {
// Make sure to check the error on Close.
if err := ta.TarWriter.Close(); err != nil {
log.G(context.TODO()).Errorf("Can't close tar writer: %s", err)
}
if err := t.compressWriter.Close(); err != nil {
log.G(context.TODO()).Errorf("Can't close compress writer: %s", err)
}
if err := t.pipeWriter.Close(); err != nil {
log.G(context.TODO()).Errorf("Can't close pipe writer: %s", err)
}
}()
// In general we log errors here but ignore them because
// during e.g. a diff operation the container can continue
// mutating the filesystem and we can see transient errors
// from this
stat, err := os.Lstat(t.srcPath)
if err != nil {
return
}
if !stat.IsDir() {
// We can't later join a non-dir with any includes because the
// 'walk' will error if "file/." is stat-ed and "file" is not a
// directory. So, we must split the source path and use the
// basename as the include.
if len(t.options.IncludeFiles) > 0 {
log.G(context.TODO()).Warn("Tar: Can't archive a file with includes")
}
dir, base := SplitPathDirEntry(t.srcPath)
t.srcPath = dir
t.options.IncludeFiles = []string{base}
}
if len(t.options.IncludeFiles) == 0 {
t.options.IncludeFiles = []string{"."}
}
seen := make(map[string]bool)
for _, include := range t.options.IncludeFiles {
rebaseName := t.options.RebaseNames[include]
var (
parentMatchInfo []patternmatcher.MatchInfo
parentDirs []string
)
walkRoot := getWalkRoot(t.srcPath, include)
// TODO(thaJeztah): should this error be handled?
_ = filepath.WalkDir(walkRoot, func(filePath string, f os.DirEntry, err error) error {
if err != nil {
log.G(context.TODO()).Errorf("Tar: Can't stat file %s to tar: %s", t.srcPath, err)
return nil
}
relFilePath, err := filepath.Rel(t.srcPath, filePath)
if err != nil || (!t.options.IncludeSourceDir && relFilePath == "." && f.IsDir()) {
// Error getting relative path OR we are looking
// at the source directory path. Skip in both situations.
return nil
}
if t.options.IncludeSourceDir && include == "." && relFilePath != "." {
relFilePath = strings.Join([]string{".", relFilePath}, string(filepath.Separator))
}
skip := false
// If "include" is an exact match for the current file
// then even if there's an "excludePatterns" pattern that
// matches it, don't skip it. IOW, assume an explicit 'include'
// is asking for that file no matter what - which is true
// for some files, like .dockerignore and Dockerfile (sometimes)
if include != relFilePath {
for len(parentDirs) != 0 {
lastParentDir := parentDirs[len(parentDirs)-1]
if strings.HasPrefix(relFilePath, lastParentDir+string(os.PathSeparator)) {
break
}
parentDirs = parentDirs[:len(parentDirs)-1]
parentMatchInfo = parentMatchInfo[:len(parentMatchInfo)-1]
}
var matchInfo patternmatcher.MatchInfo
if len(parentMatchInfo) != 0 {
skip, matchInfo, err = t.pm.MatchesUsingParentResults(relFilePath, parentMatchInfo[len(parentMatchInfo)-1])
} else {
skip, matchInfo, err = t.pm.MatchesUsingParentResults(relFilePath, patternmatcher.MatchInfo{})
}
if err != nil {
log.G(context.TODO()).Errorf("Error matching %s: %v", relFilePath, err)
return err
}
if f.IsDir() {
parentDirs = append(parentDirs, relFilePath)
parentMatchInfo = append(parentMatchInfo, matchInfo)
}
}
if skip {
// If we want to skip this file and its a directory
// then we should first check to see if there's an
// excludes pattern (e.g. !dir/file) that starts with this
// dir. If so then we can't skip this dir.
// Its not a dir then so we can just return/skip.
if !f.IsDir() {
return nil
}
// No exceptions (!...) in patterns so just skip dir
if !t.pm.Exclusions() {
return filepath.SkipDir
}
dirSlash := relFilePath + string(filepath.Separator)
for _, pat := range t.pm.Patterns() {
if !pat.Exclusion() {
continue
}
if strings.HasPrefix(pat.String()+string(filepath.Separator), dirSlash) {
// found a match - so can't skip this dir
return nil
}
}
// No matching exclusion dir so just skip dir
return filepath.SkipDir
}
if seen[relFilePath] {
return nil
}
seen[relFilePath] = true
// Rename the base resource.
if rebaseName != "" {
var replacement string
if rebaseName != string(filepath.Separator) {
// Special case the root directory to replace with an
// empty string instead so that we don't end up with
// double slashes in the paths.
replacement = rebaseName
}
relFilePath = strings.Replace(relFilePath, include, replacement, 1)
}
if err := ta.addTarFile(filePath, relFilePath); err != nil {
log.G(context.TODO()).Errorf("Can't add file %s to tar: %s", filePath, err)
// if pipe is broken, stop writing tar stream to it
if errors.Is(err, io.ErrClosedPipe) {
return err
}
}
return nil
})
}
}
// Unpack unpacks the decompressedArchive to dest with options.
func Unpack(decompressedArchive io.Reader, dest string, options *TarOptions) error {
tr := tar.NewReader(decompressedArchive)
var dirs []*tar.Header
whiteoutConverter := getWhiteoutConverter(options.WhiteoutFormat)
// Iterate through the files in the archive.
loop:
for {
hdr, err := tr.Next()
if errors.Is(err, io.EOF) {
// end of tar archive
break
}
if err != nil {
return err
}
// ignore XGlobalHeader early to avoid creating parent directories for them
if hdr.Typeflag == tar.TypeXGlobalHeader {
log.G(context.TODO()).Debugf("PAX Global Extended Headers found for %s and ignored", hdr.Name)
continue
}
// Normalize name, for safety and for a simple is-root check
// This keeps "../" as-is, but normalizes "/../" to "/". Or Windows:
// This keeps "..\" as-is, but normalizes "\..\" to "\".
hdr.Name = filepath.Clean(hdr.Name)
for _, exclude := range options.ExcludePatterns {
if strings.HasPrefix(hdr.Name, exclude) {
continue loop
}
}
// Ensure that the parent directory exists.
err = createImpliedDirectories(dest, hdr, options)
if err != nil {
return err
}
// #nosec G305 -- The joined path is checked for path traversal.
path := filepath.Join(dest, hdr.Name)
rel, err := filepath.Rel(dest, path)
if err != nil {
return err
}
if strings.HasPrefix(rel, ".."+string(os.PathSeparator)) {
return breakoutError(fmt.Errorf("%q is outside of %q", hdr.Name, dest))
}
// If path exits we almost always just want to remove and replace it
// The only exception is when it is a directory *and* the file from
// the layer is also a directory. Then we want to merge them (i.e.
// just apply the metadata from the layer).
if fi, err := os.Lstat(path); err == nil {
if options.NoOverwriteDirNonDir && fi.IsDir() && hdr.Typeflag != tar.TypeDir {
// If NoOverwriteDirNonDir is true then we cannot replace
// an existing directory with a non-directory from the archive.
return fmt.Errorf("cannot overwrite directory %q with non-directory %q", path, dest)
}
if options.NoOverwriteDirNonDir && !fi.IsDir() && hdr.Typeflag == tar.TypeDir {
// If NoOverwriteDirNonDir is true then we cannot replace
// an existing non-directory with a directory from the archive.
return fmt.Errorf("cannot overwrite non-directory %q with directory %q", path, dest)
}
if fi.IsDir() && hdr.Name == "." {
continue
}
if !fi.IsDir() || hdr.Typeflag != tar.TypeDir {
if err := os.RemoveAll(path); err != nil {
return err
}
}
}
if err := remapIDs(options.IDMap, hdr); err != nil {
return err
}
if whiteoutConverter != nil {
writeFile, err := whiteoutConverter.ConvertRead(hdr, path)
if err != nil {
return err
}
if !writeFile {
continue
}
}
if err := createTarFile(path, dest, hdr, tr, options); err != nil {
return err
}
// Directory mtimes must be handled at the end to avoid further
// file creation in them to modify the directory mtime
if hdr.Typeflag == tar.TypeDir {
dirs = append(dirs, hdr)
}
}
for _, hdr := range dirs {
// #nosec G305 -- The header was checked for path traversal before it was appended to the dirs slice.
path := filepath.Join(dest, hdr.Name)
if err := chtimes(path, boundTime(latestTime(hdr.AccessTime, hdr.ModTime)), boundTime(hdr.ModTime)); err != nil {
return err
}
}
return nil
}
// createImpliedDirectories will create all parent directories of the current path with default permissions, if they do
// not already exist. This is possible as the tar format supports 'implicit' directories, where their existence is
// defined by the paths of files in the tar, but there are no header entries for the directories themselves, and thus
// we most both create them and choose metadata like permissions.
//
// The caller should have performed filepath.Clean(hdr.Name), so hdr.Name will now be in the filepath format for the OS
// on which the daemon is running. This precondition is required because this function assumes a OS-specific path
// separator when checking that a path is not the root.
func createImpliedDirectories(dest string, hdr *tar.Header, options *TarOptions) error {
// Not the root directory, ensure that the parent directory exists
if !strings.HasSuffix(hdr.Name, string(os.PathSeparator)) {
parent := filepath.Dir(hdr.Name)
parentPath := filepath.Join(dest, parent)
if _, err := os.Lstat(parentPath); err != nil && os.IsNotExist(err) {
// RootPair() is confined inside this loop as most cases will not require a call, so we can spend some
// unneeded function calls in the uncommon case to encapsulate logic -- implied directories are a niche
// usage that reduces the portability of an image.
uid, gid := options.IDMap.RootPair()
err = user.MkdirAllAndChown(parentPath, ImpliedDirectoryMode, uid, gid, user.WithOnlyNew)
if err != nil {
return err
}
}
}
return nil
}
// Untar reads a stream of bytes from `archive`, parses it as a tar archive,
// and unpacks it into the directory at `dest`.
// The archive may be compressed with one of the following algorithms:
// identity (uncompressed), gzip, bzip2, xz.
//
// FIXME: specify behavior when target path exists vs. doesn't exist.
func Untar(tarArchive io.Reader, dest string, options *TarOptions) error {
return untarHandler(tarArchive, dest, options, true)
}
// UntarUncompressed reads a stream of bytes from `archive`, parses it as a tar archive,
// and unpacks it into the directory at `dest`.
// The archive must be an uncompressed stream.
func UntarUncompressed(tarArchive io.Reader, dest string, options *TarOptions) error {
return untarHandler(tarArchive, dest, options, false)
}
// Handler for teasing out the automatic decompression
func untarHandler(tarArchive io.Reader, dest string, options *TarOptions, decompress bool) error {
if tarArchive == nil {
return errors.New("empty archive")
}
dest = filepath.Clean(dest)
if options == nil {
options = &TarOptions{}
}
if options.ExcludePatterns == nil {
options.ExcludePatterns = []string{}
}
r := tarArchive
if decompress {
decompressedArchive, err := compression.DecompressStream(tarArchive)
if err != nil {
return err
}
defer decompressedArchive.Close()
r = decompressedArchive
}
return Unpack(r, dest, options)
}
// TarUntar is a convenience function which calls Tar and Untar, with the output of one piped into the other.
// If either Tar or Untar fails, TarUntar aborts and returns the error.
func (archiver *Archiver) TarUntar(src, dst string) error {
archive, err := Tar(src, compression.None)
if err != nil {
return err
}
defer archive.Close()
return archiver.Untar(archive, dst, &TarOptions{
IDMap: archiver.IDMapping,
})
}
// UntarPath untar a file from path to a destination, src is the source tar file path.
func (archiver *Archiver) UntarPath(src, dst string) error {
archive, err := os.Open(src)
if err != nil {
return err
}
defer archive.Close()
return archiver.Untar(archive, dst, &TarOptions{
IDMap: archiver.IDMapping,
})
}
// CopyWithTar creates a tar archive of filesystem path `src`, and
// unpacks it at filesystem path `dst`.
// The archive is streamed directly with fixed buffering and no
// intermediary disk IO.
func (archiver *Archiver) CopyWithTar(src, dst string) error {
srcSt, err := os.Stat(src)
if err != nil {
return err
}
if !srcSt.IsDir() {
return archiver.CopyFileWithTar(src, dst)
}
// if this Archiver is set up with ID mapping we need to create
// the new destination directory with the remapped root UID/GID pair
// as owner
uid, gid := archiver.IDMapping.RootPair()
// Create dst, copy src's content into it
if err := user.MkdirAllAndChown(dst, 0o755, uid, gid, user.WithOnlyNew); err != nil {
return err
}
return archiver.TarUntar(src, dst)
}
// CopyFileWithTar emulates the behavior of the 'cp' command-line
// for a single file. It copies a regular file from path `src` to
// path `dst`, and preserves all its metadata.
func (archiver *Archiver) CopyFileWithTar(src, dst string) (err error) {
srcSt, err := os.Stat(src)
if err != nil {
return err
}
if srcSt.IsDir() {
return errors.New("can't copy a directory")
}
// Clean up the trailing slash. This must be done in an operating
// system specific manner.
if dst[len(dst)-1] == os.PathSeparator {
dst = filepath.Join(dst, filepath.Base(src))
}
// Create the holding directory if necessary
if err := os.MkdirAll(filepath.Dir(dst), 0o700); err != nil {
return err
}
r, w := io.Pipe()
errC := make(chan error, 1)
go func() {
defer close(errC)
errC <- func() error {
defer w.Close()
srcF, err := os.Open(src)
if err != nil {
return err
}
defer srcF.Close()
hdr, err := tarheader.FileInfoHeaderNoLookups(srcSt, "")
if err != nil {
return err
}
hdr.Format = tar.FormatPAX
hdr.ModTime = hdr.ModTime.Truncate(time.Second)
hdr.AccessTime = time.Time{}
hdr.ChangeTime = time.Time{}
hdr.Name = filepath.Base(dst)
hdr.Mode = int64(chmodTarEntry(os.FileMode(hdr.Mode)))
if err := remapIDs(archiver.IDMapping, hdr); err != nil {
return err
}
tw := tar.NewWriter(w)
defer tw.Close()
if err := tw.WriteHeader(hdr); err != nil {
return err
}
if err := copyWithBuffer(tw, srcF); err != nil {
return err
}
return nil
}()
}()
defer func() {
if er := <-errC; err == nil && er != nil {
err = er
}
}()
err = archiver.Untar(r, filepath.Dir(dst), nil)
if err != nil {
r.CloseWithError(err)
}
return err
}
// IdentityMapping returns the IdentityMapping of the archiver.
func (archiver *Archiver) IdentityMapping() user.IdentityMapping {
return archiver.IDMapping
}
func remapIDs(idMapping user.IdentityMapping, hdr *tar.Header) error {
uid, gid, err := idMapping.ToHost(hdr.Uid, hdr.Gid)
hdr.Uid, hdr.Gid = uid, gid
return err
}