Source file src/cmd/vendor/golang.org/x/mod/zip/zip.go

     1  // Copyright 2019 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // Package zip provides functions for creating and extracting module zip files.
     6  //
     7  // Module zip files have several restrictions listed below. These are necessary
     8  // to ensure that module zip files can be extracted consistently on supported
     9  // platforms and file systems.
    10  //
    11  // • All file paths within a zip file must start with "<module>@<version>/",
    12  // where "<module>" is the module path and "<version>" is the version.
    13  // The module path must be valid (see [golang.org/x/mod/module.CheckPath]).
    14  // The version must be valid and canonical (see
    15  // [golang.org/x/mod/module.CanonicalVersion]). The path must have a major
    16  // version suffix consistent with the version (see
    17  // [golang.org/x/mod/module.Check]). The part of the file path after the
    18  // "<module>@<version>/" prefix must be valid (see
    19  // [golang.org/x/mod/module.CheckFilePath]).
    20  //
    21  // • No two file paths may be equal under Unicode case-folding (see
    22  // [strings.EqualFold]).
    23  //
    24  // • A go.mod file may or may not appear in the top-level directory. If present,
    25  // it must be named "go.mod", not any other case. Files named "go.mod"
    26  // are not allowed in any other directory.
    27  //
    28  // • The total size in bytes of a module zip file may be at most [MaxZipFile]
    29  // bytes (500 MiB). The total uncompressed size of the files within the
    30  // zip may also be at most [MaxZipFile] bytes.
    31  //
    32  // • Each file's uncompressed size must match its declared 64-bit uncompressed
    33  // size in the zip file header.
    34  //
    35  // • If the zip contains files named "<module>@<version>/go.mod" or
    36  // "<module>@<version>/LICENSE", their sizes in bytes may be at most
    37  // [MaxGoMod] or [MaxLICENSE], respectively (both are 16 MiB).
    38  //
    39  // • Empty directories are ignored. File permissions and timestamps are also
    40  // ignored.
    41  //
    42  // • Symbolic links and other irregular files are not allowed.
    43  //
    44  // Note that this package does not provide hashing functionality. See
    45  // [golang.org/x/mod/sumdb/dirhash].
    46  package zip
    47  
    48  import (
    49  	"archive/zip"
    50  	"bytes"
    51  	"errors"
    52  	"fmt"
    53  	"io"
    54  	"os"
    55  	"os/exec"
    56  	"path"
    57  	"path/filepath"
    58  	"strings"
    59  	"time"
    60  	"unicode"
    61  	"unicode/utf8"
    62  
    63  	"golang.org/x/mod/module"
    64  )
    65  
    66  const (
    67  	// MaxZipFile is the maximum size in bytes of a module zip file. The
    68  	// go command will report an error if either the zip file or its extracted
    69  	// content is larger than this.
    70  	MaxZipFile = 500 << 20
    71  
    72  	// MaxGoMod is the maximum size in bytes of a go.mod file within a
    73  	// module zip file.
    74  	MaxGoMod = 16 << 20
    75  
    76  	// MaxLICENSE is the maximum size in bytes of a LICENSE file within a
    77  	// module zip file.
    78  	MaxLICENSE = 16 << 20
    79  )
    80  
    81  // File provides an abstraction for a file in a directory, zip, or anything
    82  // else that looks like a file.
    83  type File interface {
    84  	// Path returns a clean slash-separated relative path from the module root
    85  	// directory to the file.
    86  	Path() string
    87  
    88  	// Lstat returns information about the file. If the file is a symbolic link,
    89  	// Lstat returns information about the link itself, not the file it points to.
    90  	Lstat() (os.FileInfo, error)
    91  
    92  	// Open provides access to the data within a regular file. Open may return
    93  	// an error if called on a directory or symbolic link.
    94  	Open() (io.ReadCloser, error)
    95  }
    96  
    97  // CheckedFiles reports whether a set of files satisfy the name and size
    98  // constraints required by module zip files. The constraints are listed in the
    99  // package documentation.
   100  //
   101  // Functions that produce this report may include slightly different sets of
   102  // files. See documentation for CheckFiles, CheckDir, and CheckZip for details.
   103  type CheckedFiles struct {
   104  	// Valid is a list of file paths that should be included in a zip file.
   105  	Valid []string
   106  
   107  	// Omitted is a list of files that are ignored when creating a module zip
   108  	// file, along with the reason each file is ignored.
   109  	Omitted []FileError
   110  
   111  	// Invalid is a list of files that should not be included in a module zip
   112  	// file, along with the reason each file is invalid.
   113  	Invalid []FileError
   114  
   115  	// SizeError is non-nil if the total uncompressed size of the valid files
   116  	// exceeds the module zip size limit or if the zip file itself exceeds the
   117  	// limit.
   118  	SizeError error
   119  }
   120  
   121  // Err returns an error if [CheckedFiles] does not describe a valid module zip
   122  // file. [CheckedFiles.SizeError] is returned if that field is set.
   123  // A [FileErrorList] is returned
   124  // if there are one or more invalid files. Other errors may be returned in the
   125  // future.
   126  func (cf CheckedFiles) Err() error {
   127  	if cf.SizeError != nil {
   128  		return cf.SizeError
   129  	}
   130  	if len(cf.Invalid) > 0 {
   131  		return FileErrorList(cf.Invalid)
   132  	}
   133  	return nil
   134  }
   135  
   136  type FileErrorList []FileError
   137  
   138  func (el FileErrorList) Error() string {
   139  	buf := &strings.Builder{}
   140  	sep := ""
   141  	for _, e := range el {
   142  		buf.WriteString(sep)
   143  		buf.WriteString(e.Error())
   144  		sep = "\n"
   145  	}
   146  	return buf.String()
   147  }
   148  
   149  type FileError struct {
   150  	Path string
   151  	Err  error
   152  }
   153  
   154  func (e FileError) Error() string {
   155  	return fmt.Sprintf("%s: %s", e.Path, e.Err)
   156  }
   157  
   158  func (e FileError) Unwrap() error {
   159  	return e.Err
   160  }
   161  
   162  var (
   163  	// Predefined error messages for invalid files. Not exhaustive.
   164  	errPathNotClean    = errors.New("file path is not clean")
   165  	errPathNotRelative = errors.New("file path is not relative")
   166  	errGoModCase       = errors.New("go.mod files must have lowercase names")
   167  	errGoModSize       = fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod)
   168  	errLICENSESize     = fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE)
   169  
   170  	// Predefined error messages for omitted files. Not exhaustive.
   171  	errVCS           = errors.New("directory is a version control repository")
   172  	errVendored      = errors.New("file is in vendor directory")
   173  	errSubmoduleFile = errors.New("file is in another module")
   174  	errSubmoduleDir  = errors.New("directory is in another module")
   175  	errHgArchivalTxt = errors.New("file is inserted by 'hg archive' and is always omitted")
   176  	errSymlink       = errors.New("file is a symbolic link")
   177  	errNotRegular    = errors.New("not a regular file")
   178  )
   179  
   180  // CheckFiles reports whether a list of files satisfy the name and size
   181  // constraints listed in the package documentation. The returned CheckedFiles
   182  // record contains lists of valid, invalid, and omitted files. Every file in
   183  // the given list will be included in exactly one of those lists.
   184  //
   185  // CheckFiles returns an error if the returned CheckedFiles does not describe
   186  // a valid module zip file (according to CheckedFiles.Err). The returned
   187  // CheckedFiles is still populated when an error is returned.
   188  //
   189  // Note that CheckFiles will not open any files, so Create may still fail when
   190  // CheckFiles is successful due to I/O errors and reported size differences.
   191  func CheckFiles(files []File) (CheckedFiles, error) {
   192  	cf, _, _ := checkFiles(files)
   193  	return cf, cf.Err()
   194  }
   195  
   196  // checkFiles implements CheckFiles and also returns lists of valid files and
   197  // their sizes, corresponding to cf.Valid. It omits files in submodules, files
   198  // in vendored packages, symlinked files, and various other unwanted files.
   199  //
   200  // The lists returned are used in Create to avoid repeated calls to File.Lstat.
   201  func checkFiles(files []File) (cf CheckedFiles, validFiles []File, validSizes []int64) {
   202  	errPaths := make(map[string]struct{})
   203  	addError := func(path string, omitted bool, err error) {
   204  		if _, ok := errPaths[path]; ok {
   205  			return
   206  		}
   207  		errPaths[path] = struct{}{}
   208  		fe := FileError{Path: path, Err: err}
   209  		if omitted {
   210  			cf.Omitted = append(cf.Omitted, fe)
   211  		} else {
   212  			cf.Invalid = append(cf.Invalid, fe)
   213  		}
   214  	}
   215  
   216  	// Find directories containing go.mod files (other than the root).
   217  	// Files in these directories will be omitted.
   218  	// These directories will not be included in the output zip.
   219  	haveGoMod := make(map[string]bool)
   220  	for _, f := range files {
   221  		p := f.Path()
   222  		dir, base := path.Split(p)
   223  		if strings.EqualFold(base, "go.mod") {
   224  			info, err := f.Lstat()
   225  			if err != nil {
   226  				addError(p, false, err)
   227  				continue
   228  			}
   229  			if info.Mode().IsRegular() {
   230  				haveGoMod[dir] = true
   231  			}
   232  		}
   233  	}
   234  
   235  	inSubmodule := func(p string) bool {
   236  		for {
   237  			dir, _ := path.Split(p)
   238  			if dir == "" {
   239  				return false
   240  			}
   241  			if haveGoMod[dir] {
   242  				return true
   243  			}
   244  			p = dir[:len(dir)-1]
   245  		}
   246  	}
   247  
   248  	collisions := make(collisionChecker)
   249  	maxSize := int64(MaxZipFile)
   250  	for _, f := range files {
   251  		p := f.Path()
   252  		if p != path.Clean(p) {
   253  			addError(p, false, errPathNotClean)
   254  			continue
   255  		}
   256  		if path.IsAbs(p) {
   257  			addError(p, false, errPathNotRelative)
   258  			continue
   259  		}
   260  		if isVendoredPackage(p) {
   261  			// Skip files in vendored packages.
   262  			addError(p, true, errVendored)
   263  			continue
   264  		}
   265  		if inSubmodule(p) {
   266  			// Skip submodule files.
   267  			addError(p, true, errSubmoduleFile)
   268  			continue
   269  		}
   270  		if p == ".hg_archival.txt" {
   271  			// Inserted by hg archive.
   272  			// The go command drops this regardless of the VCS being used.
   273  			addError(p, true, errHgArchivalTxt)
   274  			continue
   275  		}
   276  		if err := module.CheckFilePath(p); err != nil {
   277  			addError(p, false, err)
   278  			continue
   279  		}
   280  		if strings.ToLower(p) == "go.mod" && p != "go.mod" {
   281  			addError(p, false, errGoModCase)
   282  			continue
   283  		}
   284  		info, err := f.Lstat()
   285  		if err != nil {
   286  			addError(p, false, err)
   287  			continue
   288  		}
   289  		if err := collisions.check(p, info.IsDir()); err != nil {
   290  			addError(p, false, err)
   291  			continue
   292  		}
   293  		if info.Mode()&os.ModeType == os.ModeSymlink {
   294  			// Skip symbolic links (golang.org/issue/27093).
   295  			addError(p, true, errSymlink)
   296  			continue
   297  		}
   298  		if !info.Mode().IsRegular() {
   299  			addError(p, true, errNotRegular)
   300  			continue
   301  		}
   302  		size := info.Size()
   303  		if size >= 0 && size <= maxSize {
   304  			maxSize -= size
   305  		} else if cf.SizeError == nil {
   306  			cf.SizeError = fmt.Errorf("module source tree too large (max size is %d bytes)", MaxZipFile)
   307  		}
   308  		if p == "go.mod" && size > MaxGoMod {
   309  			addError(p, false, errGoModSize)
   310  			continue
   311  		}
   312  		if p == "LICENSE" && size > MaxLICENSE {
   313  			addError(p, false, errLICENSESize)
   314  			continue
   315  		}
   316  
   317  		cf.Valid = append(cf.Valid, p)
   318  		validFiles = append(validFiles, f)
   319  		validSizes = append(validSizes, info.Size())
   320  	}
   321  
   322  	return cf, validFiles, validSizes
   323  }
   324  
   325  // CheckDir reports whether the files in dir satisfy the name and size
   326  // constraints listed in the package documentation. The returned [CheckedFiles]
   327  // record contains lists of valid, invalid, and omitted files. If a directory is
   328  // omitted (for example, a nested module or vendor directory), it will appear in
   329  // the omitted list, but its files won't be listed.
   330  //
   331  // CheckDir returns an error if it encounters an I/O error or if the returned
   332  // [CheckedFiles] does not describe a valid module zip file (according to
   333  // [CheckedFiles.Err]). The returned [CheckedFiles] is still populated when such
   334  // an error is returned.
   335  //
   336  // Note that CheckDir will not open any files, so [CreateFromDir] may still fail
   337  // when CheckDir is successful due to I/O errors.
   338  func CheckDir(dir string) (CheckedFiles, error) {
   339  	// List files (as CreateFromDir would) and check which ones are omitted
   340  	// or invalid.
   341  	files, omitted, err := listFilesInDir(dir)
   342  	if err != nil {
   343  		return CheckedFiles{}, err
   344  	}
   345  	cf, cfErr := CheckFiles(files)
   346  	_ = cfErr // ignore this error; we'll generate our own after rewriting paths.
   347  
   348  	// Replace all paths with file system paths.
   349  	// Paths returned by CheckFiles will be slash-separated paths relative to dir.
   350  	// That's probably not appropriate for error messages.
   351  	for i := range cf.Valid {
   352  		cf.Valid[i] = filepath.Join(dir, cf.Valid[i])
   353  	}
   354  	cf.Omitted = append(cf.Omitted, omitted...)
   355  	for i := range cf.Omitted {
   356  		cf.Omitted[i].Path = filepath.Join(dir, cf.Omitted[i].Path)
   357  	}
   358  	for i := range cf.Invalid {
   359  		cf.Invalid[i].Path = filepath.Join(dir, cf.Invalid[i].Path)
   360  	}
   361  	return cf, cf.Err()
   362  }
   363  
   364  // CheckZip reports whether the files contained in a zip file satisfy the name
   365  // and size constraints listed in the package documentation.
   366  //
   367  // CheckZip returns an error if the returned [CheckedFiles] does not describe
   368  // a valid module zip file (according to [CheckedFiles.Err]). The returned
   369  // CheckedFiles is still populated when an error is returned. CheckZip will
   370  // also return an error if the module path or version is malformed or if it
   371  // encounters an error reading the zip file.
   372  //
   373  // Note that CheckZip does not read individual files, so [Unzip] may still fail
   374  // when CheckZip is successful due to I/O errors.
   375  func CheckZip(m module.Version, zipFile string) (CheckedFiles, error) {
   376  	f, err := os.Open(zipFile)
   377  	if err != nil {
   378  		return CheckedFiles{}, err
   379  	}
   380  	defer f.Close()
   381  	_, cf, err := checkZip(m, f)
   382  	return cf, err
   383  }
   384  
   385  // checkZip implements checkZip and also returns the *zip.Reader. This is
   386  // used in Unzip to avoid redundant I/O.
   387  func checkZip(m module.Version, f *os.File) (*zip.Reader, CheckedFiles, error) {
   388  	// Make sure the module path and version are valid.
   389  	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
   390  		return nil, CheckedFiles{}, fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
   391  	}
   392  	if err := module.Check(m.Path, m.Version); err != nil {
   393  		return nil, CheckedFiles{}, err
   394  	}
   395  
   396  	// Check the total file size.
   397  	info, err := f.Stat()
   398  	if err != nil {
   399  		return nil, CheckedFiles{}, err
   400  	}
   401  	zipSize := info.Size()
   402  	if zipSize > MaxZipFile {
   403  		cf := CheckedFiles{SizeError: fmt.Errorf("module zip file is too large (%d bytes; limit is %d bytes)", zipSize, MaxZipFile)}
   404  		return nil, cf, cf.Err()
   405  	}
   406  
   407  	// Check for valid file names, collisions.
   408  	var cf CheckedFiles
   409  	addError := func(zf *zip.File, err error) {
   410  		cf.Invalid = append(cf.Invalid, FileError{Path: zf.Name, Err: err})
   411  	}
   412  	z, err := zip.NewReader(f, zipSize)
   413  	if err != nil {
   414  		return nil, CheckedFiles{}, err
   415  	}
   416  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   417  	collisions := make(collisionChecker)
   418  	var size int64
   419  	for _, zf := range z.File {
   420  		if !strings.HasPrefix(zf.Name, prefix) {
   421  			addError(zf, fmt.Errorf("path does not have prefix %q", prefix))
   422  			continue
   423  		}
   424  		name := zf.Name[len(prefix):]
   425  		if name == "" {
   426  			continue
   427  		}
   428  		isDir := strings.HasSuffix(name, "/")
   429  		if isDir {
   430  			name = name[:len(name)-1]
   431  		}
   432  		if path.Clean(name) != name {
   433  			addError(zf, errPathNotClean)
   434  			continue
   435  		}
   436  		if err := module.CheckFilePath(name); err != nil {
   437  			addError(zf, err)
   438  			continue
   439  		}
   440  		if err := collisions.check(name, isDir); err != nil {
   441  			addError(zf, err)
   442  			continue
   443  		}
   444  		if isDir {
   445  			continue
   446  		}
   447  		if base := path.Base(name); strings.EqualFold(base, "go.mod") {
   448  			if base != name {
   449  				addError(zf, fmt.Errorf("go.mod file not in module root directory"))
   450  				continue
   451  			}
   452  			if name != "go.mod" {
   453  				addError(zf, errGoModCase)
   454  				continue
   455  			}
   456  		}
   457  		sz := int64(zf.UncompressedSize64)
   458  		if sz >= 0 && MaxZipFile-size >= sz {
   459  			size += sz
   460  		} else if cf.SizeError == nil {
   461  			cf.SizeError = fmt.Errorf("total uncompressed size of module contents too large (max size is %d bytes)", MaxZipFile)
   462  		}
   463  		if name == "go.mod" && sz > MaxGoMod {
   464  			addError(zf, fmt.Errorf("go.mod file too large (max size is %d bytes)", MaxGoMod))
   465  			continue
   466  		}
   467  		if name == "LICENSE" && sz > MaxLICENSE {
   468  			addError(zf, fmt.Errorf("LICENSE file too large (max size is %d bytes)", MaxLICENSE))
   469  			continue
   470  		}
   471  		cf.Valid = append(cf.Valid, zf.Name)
   472  	}
   473  
   474  	return z, cf, cf.Err()
   475  }
   476  
   477  // Create builds a zip archive for module m from an abstract list of files
   478  // and writes it to w.
   479  //
   480  // Create verifies the restrictions described in the package documentation
   481  // and should not produce an archive that [Unzip] cannot extract. Create does not
   482  // include files in the output archive if they don't belong in the module zip.
   483  // In particular, Create will not include files in modules found in
   484  // subdirectories, most files in vendor directories, or irregular files (such
   485  // as symbolic links) in the output archive.
   486  func Create(w io.Writer, m module.Version, files []File) (err error) {
   487  	defer func() {
   488  		if err != nil {
   489  			err = &zipError{verb: "create zip", err: err}
   490  		}
   491  	}()
   492  
   493  	// Check that the version is canonical, the module path is well-formed, and
   494  	// the major version suffix matches the major version.
   495  	if vers := module.CanonicalVersion(m.Version); vers != m.Version {
   496  		return fmt.Errorf("version %q is not canonical (should be %q)", m.Version, vers)
   497  	}
   498  	if err := module.Check(m.Path, m.Version); err != nil {
   499  		return err
   500  	}
   501  
   502  	// Check whether files are valid, not valid, or should be omitted.
   503  	// Also check that the valid files don't exceed the maximum size.
   504  	cf, validFiles, validSizes := checkFiles(files)
   505  	if err := cf.Err(); err != nil {
   506  		return err
   507  	}
   508  
   509  	// Create the module zip file.
   510  	zw := zip.NewWriter(w)
   511  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   512  
   513  	addFile := func(f File, path string, size int64) error {
   514  		rc, err := f.Open()
   515  		if err != nil {
   516  			return err
   517  		}
   518  		defer rc.Close()
   519  		w, err := zw.Create(prefix + path)
   520  		if err != nil {
   521  			return err
   522  		}
   523  		lr := &io.LimitedReader{R: rc, N: size + 1}
   524  		if _, err := io.Copy(w, lr); err != nil {
   525  			return err
   526  		}
   527  		if lr.N <= 0 {
   528  			return fmt.Errorf("file %q is larger than declared size", path)
   529  		}
   530  		return nil
   531  	}
   532  
   533  	for i, f := range validFiles {
   534  		p := f.Path()
   535  		size := validSizes[i]
   536  		if err := addFile(f, p, size); err != nil {
   537  			return err
   538  		}
   539  	}
   540  
   541  	return zw.Close()
   542  }
   543  
   544  // CreateFromDir creates a module zip file for module m from the contents of
   545  // a directory, dir. The zip content is written to w.
   546  //
   547  // CreateFromDir verifies the restrictions described in the package
   548  // documentation and should not produce an archive that [Unzip] cannot extract.
   549  // CreateFromDir does not include files in the output archive if they don't
   550  // belong in the module zip. In particular, CreateFromDir will not include
   551  // files in modules found in subdirectories, most files in vendor directories,
   552  // or irregular files (such as symbolic links) in the output archive.
   553  // Additionally, unlike [Create], CreateFromDir will not include directories
   554  // named ".bzr", ".git", ".hg", or ".svn".
   555  func CreateFromDir(w io.Writer, m module.Version, dir string) (err error) {
   556  	defer func() {
   557  		if zerr, ok := err.(*zipError); ok {
   558  			zerr.path = dir
   559  		} else if err != nil {
   560  			err = &zipError{verb: "create zip from directory", path: dir, err: err}
   561  		}
   562  	}()
   563  
   564  	files, _, err := listFilesInDir(dir)
   565  	if err != nil {
   566  		return err
   567  	}
   568  
   569  	return Create(w, m, files)
   570  }
   571  
   572  // CreateFromVCS creates a module zip file for module m from the contents of a
   573  // VCS repository stored locally. The zip content is written to w.
   574  //
   575  // repoRoot must be an absolute path to the base of the repository, such as
   576  // "/Users/some-user/some-repo".
   577  //
   578  // revision is the revision of the repository to create the zip from. Examples
   579  // include HEAD or SHA sums for git repositories.
   580  //
   581  // subdir must be the relative path from the base of the repository, such as
   582  // "sub/dir". To create a zip from the base of the repository, pass an empty
   583  // string.
   584  //
   585  // If CreateFromVCS returns [UnrecognizedVCSError], consider falling back to
   586  // [CreateFromDir].
   587  func CreateFromVCS(w io.Writer, m module.Version, repoRoot, revision, subdir string) (err error) {
   588  	defer func() {
   589  		if zerr, ok := err.(*zipError); ok {
   590  			zerr.path = repoRoot
   591  		} else if err != nil {
   592  			err = &zipError{verb: "create zip from version control system", path: repoRoot, err: err}
   593  		}
   594  	}()
   595  
   596  	var filesToCreate []File
   597  
   598  	switch {
   599  	case isGitRepo(repoRoot):
   600  		files, err := filesInGitRepo(repoRoot, revision, subdir)
   601  		if err != nil {
   602  			return err
   603  		}
   604  
   605  		filesToCreate = files
   606  	default:
   607  		return &UnrecognizedVCSError{RepoRoot: repoRoot}
   608  	}
   609  
   610  	return Create(w, m, filesToCreate)
   611  }
   612  
   613  // UnrecognizedVCSError indicates that no recognized version control system was
   614  // found in the given directory.
   615  type UnrecognizedVCSError struct {
   616  	RepoRoot string
   617  }
   618  
   619  func (e *UnrecognizedVCSError) Error() string {
   620  	return fmt.Sprintf("could not find a recognized version control system at %q", e.RepoRoot)
   621  }
   622  
   623  // filesInGitRepo filters out any files that are git ignored in the directory.
   624  func filesInGitRepo(dir, rev, subdir string) ([]File, error) {
   625  	stderr := bytes.Buffer{}
   626  	stdout := bytes.Buffer{}
   627  
   628  	// Incredibly, git produces different archives depending on whether
   629  	// it is running on a Windows system or not, in an attempt to normalize
   630  	// text file line endings. Setting -c core.autocrlf=input means only
   631  	// translate files on the way into the repo, not on the way out (archive).
   632  	// The -c core.eol=lf should be unnecessary but set it anyway.
   633  	//
   634  	// Note: We use git archive to understand which files are actually included,
   635  	// ignoring things like .gitignore'd files. We could also use other
   636  	// techniques like git ls-files, but this approach most closely matches what
   637  	// the Go command does, which is beneficial.
   638  	//
   639  	// Note: some of this code copied from https://go.googlesource.com/go/+/refs/tags/go1.16.5/src/cmd/go/internal/modfetch/codehost/git.go#826.
   640  	cmd := exec.Command("git", "-c", "core.autocrlf=input", "-c", "core.eol=lf", "archive", "--format=zip", rev)
   641  	if subdir != "" {
   642  		cmd.Args = append(cmd.Args, subdir)
   643  	}
   644  	cmd.Dir = dir
   645  	cmd.Env = append(os.Environ(), "PWD="+dir)
   646  	cmd.Stdout = &stdout
   647  	cmd.Stderr = &stderr
   648  	if err := cmd.Run(); err != nil {
   649  		return nil, fmt.Errorf("error running `git archive`: %w, %s", err, stderr.String())
   650  	}
   651  
   652  	rawReader := bytes.NewReader(stdout.Bytes())
   653  	zipReader, err := zip.NewReader(rawReader, int64(stdout.Len()))
   654  	if err != nil {
   655  		return nil, err
   656  	}
   657  
   658  	haveLICENSE := false
   659  	var fs []File
   660  	for _, zf := range zipReader.File {
   661  		if !strings.HasPrefix(zf.Name, subdir) || strings.HasSuffix(zf.Name, "/") {
   662  			continue
   663  		}
   664  
   665  		n := strings.TrimPrefix(zf.Name, subdir)
   666  		if n == "" {
   667  			continue
   668  		}
   669  		n = strings.TrimPrefix(n, "/")
   670  
   671  		fs = append(fs, zipFile{
   672  			name: n,
   673  			f:    zf,
   674  		})
   675  		if n == "LICENSE" {
   676  			haveLICENSE = true
   677  		}
   678  	}
   679  
   680  	if !haveLICENSE && subdir != "" {
   681  		// Note: this method of extracting the license from the root copied from
   682  		// https://go.googlesource.com/go/+/refs/tags/go1.20.4/src/cmd/go/internal/modfetch/coderepo.go#1118
   683  		// https://go.googlesource.com/go/+/refs/tags/go1.20.4/src/cmd/go/internal/modfetch/codehost/git.go#657
   684  		cmd := exec.Command("git", "cat-file", "blob", rev+":LICENSE")
   685  		cmd.Dir = dir
   686  		cmd.Env = append(os.Environ(), "PWD="+dir)
   687  		stdout := bytes.Buffer{}
   688  		cmd.Stdout = &stdout
   689  		if err := cmd.Run(); err == nil {
   690  			fs = append(fs, dataFile{name: "LICENSE", data: stdout.Bytes()})
   691  		}
   692  	}
   693  
   694  	return fs, nil
   695  }
   696  
   697  // isGitRepo reports whether the given directory is a git repo.
   698  func isGitRepo(dir string) bool {
   699  	stdout := &bytes.Buffer{}
   700  	cmd := exec.Command("git", "rev-parse", "--git-dir")
   701  	cmd.Dir = dir
   702  	cmd.Env = append(os.Environ(), "PWD="+dir)
   703  	cmd.Stdout = stdout
   704  	if err := cmd.Run(); err != nil {
   705  		return false
   706  	}
   707  	gitDir := strings.TrimSpace(stdout.String())
   708  	if !filepath.IsAbs(gitDir) {
   709  		gitDir = filepath.Join(dir, gitDir)
   710  	}
   711  	wantDir := filepath.Join(dir, ".git")
   712  	return wantDir == gitDir
   713  }
   714  
   715  type dirFile struct {
   716  	filePath, slashPath string
   717  	info                os.FileInfo
   718  }
   719  
   720  func (f dirFile) Path() string                 { return f.slashPath }
   721  func (f dirFile) Lstat() (os.FileInfo, error)  { return f.info, nil }
   722  func (f dirFile) Open() (io.ReadCloser, error) { return os.Open(f.filePath) }
   723  
   724  type zipFile struct {
   725  	name string
   726  	f    *zip.File
   727  }
   728  
   729  func (f zipFile) Path() string                 { return f.name }
   730  func (f zipFile) Lstat() (os.FileInfo, error)  { return f.f.FileInfo(), nil }
   731  func (f zipFile) Open() (io.ReadCloser, error) { return f.f.Open() }
   732  
   733  type dataFile struct {
   734  	name string
   735  	data []byte
   736  }
   737  
   738  func (f dataFile) Path() string                 { return f.name }
   739  func (f dataFile) Lstat() (os.FileInfo, error)  { return dataFileInfo{f}, nil }
   740  func (f dataFile) Open() (io.ReadCloser, error) { return io.NopCloser(bytes.NewReader(f.data)), nil }
   741  
   742  type dataFileInfo struct {
   743  	f dataFile
   744  }
   745  
   746  func (fi dataFileInfo) Name() string       { return path.Base(fi.f.name) }
   747  func (fi dataFileInfo) Size() int64        { return int64(len(fi.f.data)) }
   748  func (fi dataFileInfo) Mode() os.FileMode  { return 0644 }
   749  func (fi dataFileInfo) ModTime() time.Time { return time.Time{} }
   750  func (fi dataFileInfo) IsDir() bool        { return false }
   751  func (fi dataFileInfo) Sys() interface{}   { return nil }
   752  
   753  // isVendoredPackage attempts to report whether the given filename is contained
   754  // in a package whose import path contains (but does not end with) the component
   755  // "vendor".
   756  //
   757  // Unfortunately, isVendoredPackage reports false positives for files in any
   758  // non-top-level package whose import path ends in "vendor".
   759  func isVendoredPackage(name string) bool {
   760  	var i int
   761  	if strings.HasPrefix(name, "vendor/") {
   762  		i += len("vendor/")
   763  	} else if j := strings.Index(name, "/vendor/"); j >= 0 {
   764  		// This offset looks incorrect; this should probably be
   765  		//
   766  		// 	i = j + len("/vendor/")
   767  		//
   768  		// (See https://golang.org/issue/31562 and https://golang.org/issue/37397.)
   769  		// Unfortunately, we can't fix it without invalidating module checksums.
   770  		i += len("/vendor/")
   771  	} else {
   772  		return false
   773  	}
   774  	return strings.Contains(name[i:], "/")
   775  }
   776  
   777  // Unzip extracts the contents of a module zip file to a directory.
   778  //
   779  // Unzip checks all restrictions listed in the package documentation and returns
   780  // an error if the zip archive is not valid. In some cases, files may be written
   781  // to dir before an error is returned (for example, if a file's uncompressed
   782  // size does not match its declared size).
   783  //
   784  // dir may or may not exist: Unzip will create it and any missing parent
   785  // directories if it doesn't exist. If dir exists, it must be empty.
   786  func Unzip(dir string, m module.Version, zipFile string) (err error) {
   787  	defer func() {
   788  		if err != nil {
   789  			err = &zipError{verb: "unzip", path: zipFile, err: err}
   790  		}
   791  	}()
   792  
   793  	// Check that the directory is empty. Don't create it yet in case there's
   794  	// an error reading the zip.
   795  	if files, _ := os.ReadDir(dir); len(files) > 0 {
   796  		return fmt.Errorf("target directory %v exists and is not empty", dir)
   797  	}
   798  
   799  	// Open the zip and check that it satisfies all restrictions.
   800  	f, err := os.Open(zipFile)
   801  	if err != nil {
   802  		return err
   803  	}
   804  	defer f.Close()
   805  	z, cf, err := checkZip(m, f)
   806  	if err != nil {
   807  		return err
   808  	}
   809  	if err := cf.Err(); err != nil {
   810  		return err
   811  	}
   812  
   813  	// Unzip, enforcing sizes declared in the zip file.
   814  	prefix := fmt.Sprintf("%s@%s/", m.Path, m.Version)
   815  	if err := os.MkdirAll(dir, 0777); err != nil {
   816  		return err
   817  	}
   818  	for _, zf := range z.File {
   819  		name := zf.Name[len(prefix):]
   820  		if name == "" || strings.HasSuffix(name, "/") {
   821  			continue
   822  		}
   823  		dst := filepath.Join(dir, name)
   824  		if err := os.MkdirAll(filepath.Dir(dst), 0777); err != nil {
   825  			return err
   826  		}
   827  		w, err := os.OpenFile(dst, os.O_WRONLY|os.O_CREATE|os.O_EXCL, 0444)
   828  		if err != nil {
   829  			return err
   830  		}
   831  		r, err := zf.Open()
   832  		if err != nil {
   833  			w.Close()
   834  			return err
   835  		}
   836  		lr := &io.LimitedReader{R: r, N: int64(zf.UncompressedSize64) + 1}
   837  		_, err = io.Copy(w, lr)
   838  		r.Close()
   839  		if err != nil {
   840  			w.Close()
   841  			return err
   842  		}
   843  		if err := w.Close(); err != nil {
   844  			return err
   845  		}
   846  		if lr.N <= 0 {
   847  			return fmt.Errorf("uncompressed size of file %s is larger than declared size (%d bytes)", zf.Name, zf.UncompressedSize64)
   848  		}
   849  	}
   850  
   851  	return nil
   852  }
   853  
   854  // collisionChecker finds case-insensitive name collisions and paths that
   855  // are listed as both files and directories.
   856  //
   857  // The keys of this map are processed with strToFold. pathInfo has the original
   858  // path for each folded path.
   859  type collisionChecker map[string]pathInfo
   860  
   861  type pathInfo struct {
   862  	path  string
   863  	isDir bool
   864  }
   865  
   866  func (cc collisionChecker) check(p string, isDir bool) error {
   867  	fold := strToFold(p)
   868  	if other, ok := cc[fold]; ok {
   869  		if p != other.path {
   870  			return fmt.Errorf("case-insensitive file name collision: %q and %q", other.path, p)
   871  		}
   872  		if isDir != other.isDir {
   873  			return fmt.Errorf("entry %q is both a file and a directory", p)
   874  		}
   875  		if !isDir {
   876  			return fmt.Errorf("multiple entries for file %q", p)
   877  		}
   878  		// It's not an error if check is called with the same directory multiple
   879  		// times. check is called recursively on parent directories, so check
   880  		// may be called on the same directory many times.
   881  	} else {
   882  		cc[fold] = pathInfo{path: p, isDir: isDir}
   883  	}
   884  
   885  	if parent := path.Dir(p); parent != "." {
   886  		return cc.check(parent, true)
   887  	}
   888  	return nil
   889  }
   890  
   891  // listFilesInDir walks the directory tree rooted at dir and returns a list of
   892  // files, as well as a list of directories and files that were skipped (for
   893  // example, nested modules and symbolic links).
   894  func listFilesInDir(dir string) (files []File, omitted []FileError, err error) {
   895  	err = filepath.Walk(dir, func(filePath string, info os.FileInfo, err error) error {
   896  		if err != nil {
   897  			return err
   898  		}
   899  		relPath, err := filepath.Rel(dir, filePath)
   900  		if err != nil {
   901  			return err
   902  		}
   903  		slashPath := filepath.ToSlash(relPath)
   904  
   905  		// Skip some subdirectories inside vendor, but maintain bug
   906  		// golang.org/issue/31562, described in isVendoredPackage.
   907  		// We would like Create and CreateFromDir to produce the same result
   908  		// for a set of files, whether expressed as a directory tree or zip.
   909  		if isVendoredPackage(slashPath) {
   910  			omitted = append(omitted, FileError{Path: slashPath, Err: errVendored})
   911  			return nil
   912  		}
   913  
   914  		if info.IsDir() {
   915  			if filePath == dir {
   916  				// Don't skip the top-level directory.
   917  				return nil
   918  			}
   919  
   920  			// Skip VCS directories.
   921  			// fossil repos are regular files with arbitrary names, so we don't try
   922  			// to exclude them.
   923  			switch filepath.Base(filePath) {
   924  			case ".bzr", ".git", ".hg", ".svn":
   925  				omitted = append(omitted, FileError{Path: slashPath, Err: errVCS})
   926  				return filepath.SkipDir
   927  			}
   928  
   929  			// Skip submodules (directories containing go.mod files).
   930  			if goModInfo, err := os.Lstat(filepath.Join(filePath, "go.mod")); err == nil && !goModInfo.IsDir() {
   931  				omitted = append(omitted, FileError{Path: slashPath, Err: errSubmoduleDir})
   932  				return filepath.SkipDir
   933  			}
   934  			return nil
   935  		}
   936  
   937  		// Skip irregular files and files in vendor directories.
   938  		// Irregular files are ignored. They're typically symbolic links.
   939  		if !info.Mode().IsRegular() {
   940  			omitted = append(omitted, FileError{Path: slashPath, Err: errNotRegular})
   941  			return nil
   942  		}
   943  
   944  		files = append(files, dirFile{
   945  			filePath:  filePath,
   946  			slashPath: slashPath,
   947  			info:      info,
   948  		})
   949  		return nil
   950  	})
   951  	if err != nil {
   952  		return nil, nil, err
   953  	}
   954  	return files, omitted, nil
   955  }
   956  
   957  type zipError struct {
   958  	verb, path string
   959  	err        error
   960  }
   961  
   962  func (e *zipError) Error() string {
   963  	if e.path == "" {
   964  		return fmt.Sprintf("%s: %v", e.verb, e.err)
   965  	} else {
   966  		return fmt.Sprintf("%s %s: %v", e.verb, e.path, e.err)
   967  	}
   968  }
   969  
   970  func (e *zipError) Unwrap() error {
   971  	return e.err
   972  }
   973  
   974  // strToFold returns a string with the property that
   975  //
   976  //	strings.EqualFold(s, t) iff strToFold(s) == strToFold(t)
   977  //
   978  // This lets us test a large set of strings for fold-equivalent
   979  // duplicates without making a quadratic number of calls
   980  // to EqualFold. Note that strings.ToUpper and strings.ToLower
   981  // do not have the desired property in some corner cases.
   982  func strToFold(s string) string {
   983  	// Fast path: all ASCII, no upper case.
   984  	// Most paths look like this already.
   985  	for i := 0; i < len(s); i++ {
   986  		c := s[i]
   987  		if c >= utf8.RuneSelf || 'A' <= c && c <= 'Z' {
   988  			goto Slow
   989  		}
   990  	}
   991  	return s
   992  
   993  Slow:
   994  	var buf bytes.Buffer
   995  	for _, r := range s {
   996  		// SimpleFold(x) cycles to the next equivalent rune > x
   997  		// or wraps around to smaller values. Iterate until it wraps,
   998  		// and we've found the minimum value.
   999  		for {
  1000  			r0 := r
  1001  			r = unicode.SimpleFold(r0)
  1002  			if r <= r0 {
  1003  				break
  1004  			}
  1005  		}
  1006  		// Exception to allow fast path above: A-Z => a-z
  1007  		if 'A' <= r && r <= 'Z' {
  1008  			r += 'a' - 'A'
  1009  		}
  1010  		buf.WriteRune(r)
  1011  	}
  1012  	return buf.String()
  1013  }
  1014  

View as plain text