aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools/build_defs/pkg/archive.py
diff options
context:
space:
mode:
Diffstat (limited to 'tools/build_defs/pkg/archive.py')
-rw-r--r--tools/build_defs/pkg/archive.py320
1 files changed, 320 insertions, 0 deletions
diff --git a/tools/build_defs/pkg/archive.py b/tools/build_defs/pkg/archive.py
new file mode 100644
index 0000000000..95d9e73ca2
--- /dev/null
+++ b/tools/build_defs/pkg/archive.py
@@ -0,0 +1,320 @@
+# Copyright 2015 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Archive manipulation library for the Docker rules."""
+
+import os
+from StringIO import StringIO
+import tarfile
+
+
+class SimpleArFile(object):
+ """A simple AR file reader.
+
+ This enable to read AR file (System V variant) as described
+ in https://en.wikipedia.org/wiki/Ar_(Unix).
+
+ The standard usage of this class is:
+
+ with SimpleArFile(filename) as ar:
+ nextFile = ar.next()
+ while nextFile:
+ print nextFile.filename
+ nextFile = ar.next()
+
+ Upon error, this class will raise a ArError exception.
+ """
+
+ # TODO(dmarting): We should use a standard library instead but python 2.7
+ # does not have AR reading library.
+
+ class ArError(Exception):
+ pass
+
+ class SimpleArFileEntry(object):
+ """Represent one entry in a AR archive.
+
+ Attributes:
+ filename: the filename of the entry, as described in the archive.
+ timestamp: the timestamp of the file entry.
+ owner_id, group_id: numeric id of the user and group owning the file.
+ mode: unix permission mode of the file
+ size: size of the file
+ data: the content of the file.
+ """
+
+ def __init__(self, f):
+ self.filename = f.read(16).strip()
+ if self.filename.endswith('/'): # SysV variant
+ self.filename = self.filename[:-1]
+ self.timestamp = int(f.read(12).strip())
+ self.owner_id = int(f.read(6).strip())
+ self.group_id = int(f.read(6).strip())
+ self.mode = int(f.read(8).strip(), 8)
+ self.size = int(f.read(10).strip())
+ pad = f.read(2)
+ if pad != '\x60\x0a':
+ raise SimpleArFile.ArError('Invalid AR file header')
+ self.data = f.read(self.size)
+
+ MAGIC_STRING = '!<arch>\n'
+
+ def __init__(self, filename):
+ self.filename = filename
+
+ def __enter__(self):
+ self.f = open(self.filename, 'rb')
+ if self.f.read(len(self.MAGIC_STRING)) != self.MAGIC_STRING:
+ raise self.ArError('Not a ar file: ' + self.filename)
+ return self
+
+ def __exit__(self, t, v, traceback):
+ self.f.close()
+
+ def next(self):
+ """Read the next file. Returns None when reaching the end of file."""
+ # AR sections are two bit aligned using new lines.
+ if self.f.tell() % 2 != 0:
+ self.f.read(1)
+ # An AR sections is at least 60 bytes. Some file might contains garbage
+ # bytes at the end of the archive, ignore them.
+ if self.f.tell() > os.fstat(self.f.fileno()).st_size - 60:
+ return None
+ return self.SimpleArFileEntry(self.f)
+
+
+class TarFileWriter(object):
+ """A wrapper to write tar files."""
+
+ class Error(Exception):
+ pass
+
+ def __init__(self, name, compression=''):
+ if compression in ['tgz', 'gz']:
+ mode = 'w:gz'
+ elif compression in ['bzip2', 'bz2']:
+ mode = 'w:bz2'
+ else:
+ mode = 'w:'
+ self.tar = tarfile.open(name=name, mode=mode)
+ self.members = set([])
+
+ def __enter__(self):
+ return self
+
+ def __exit__(self, t, v, traceback):
+ self.close()
+
+ def add_dir(self,
+ name,
+ path,
+ uid=0,
+ gid=0,
+ uname='',
+ gname='',
+ mtime=0,
+ mode=None,
+ depth=100):
+ """Recursively add a directory.
+
+ Args:
+ name: the destination path of the directory to add.
+ path: the path of the directory to add.
+ uid: owner user identifier.
+ gid: owner group identifier.
+ uname: owner user names.
+ gname: owner group names.
+ mtime: modification time to put in the archive.
+ mode: unix permission mode of the file, default 0644 (0755).
+ depth: maximum depth to recurse in to avoid infinite loops
+ with cyclic mounts.
+
+ Raises:
+ TarFileWriter.Error: when the recursion depth has exceeded the
+ `depth` argument.
+ """
+ if not (name == '.' or name.startswith('/') or name.startswith('./')):
+ name = './' + name
+ if os.path.isdir(path):
+ # Remove trailing '/' (index -1 => last character)
+ if name[-1] == '/':
+ name = name[:-1]
+ # Add the x bit to directories to prevent non-traversable directories.
+ # The x bit is set only to if the read bit is set.
+ dirmode = (mode | ((0444 & mode) >> 2)) if mode else mode
+ self.add_file(name + '/',
+ tarfile.DIRTYPE,
+ uid=uid,
+ gid=gid,
+ uname=uname,
+ gname=gname,
+ mtime=mtime,
+ mode=dirmode)
+ if depth <= 0:
+ raise self.Error('Recursion depth exceeded, probably in '
+ 'an infinite directory loop.')
+ # Iterate over the sorted list of file so we get a deterministic result.
+ filelist = os.listdir(path)
+ filelist.sort()
+ for f in filelist:
+ new_name = os.path.join(name, f)
+ new_path = os.path.join(path, f)
+ self.add_dir(new_name, new_path, uid, gid, uname, gname, mtime, mode,
+ depth - 1)
+ else:
+ self.add_file(name,
+ tarfile.REGTYPE,
+ file_content=path,
+ uid=uid,
+ gid=gid,
+ uname=uname,
+ gname=gname,
+ mtime=mtime,
+ mode=mode)
+
+ def _addfile(self, info, fileobj=None):
+ """Add a file in the tar file if there is no conflict."""
+ if not info.name.endswith('/') and info.type == tarfile.DIRTYPE:
+ # Enforce the ending / for directories so we correctly deduplicate.
+ info.name += '/'
+ if info.name not in self.members:
+ self.tar.addfile(info, fileobj)
+ self.members.add(info.name)
+ elif info.type != tarfile.DIRTYPE:
+ print('Duplicate file in archive: %s, '
+ 'picking first occurrence' % info.name)
+
+ def add_file(self,
+ name,
+ kind=tarfile.REGTYPE,
+ content=None,
+ link=None,
+ file_content=None,
+ uid=0,
+ gid=0,
+ uname='',
+ gname='',
+ mtime=0,
+ mode=None):
+ """Add a file to the current tar.
+
+ Args:
+ name: the name of the file to add.
+ kind: the type of the file to add, see tarfile.*TYPE.
+ content: a textual content to put in the file.
+ link: if the file is a link, the destination of the link.
+ file_content: file to read the content from. Provide either this
+ one or `content` to specifies a content for the file.
+ uid: owner user identifier.
+ gid: owner group identifier.
+ uname: owner user names.
+ gname: owner group names.
+ mtime: modification time to put in the archive.
+ mode: unix permission mode of the file, default 0644 (0755).
+ """
+ if file_content and os.path.isdir(file_content):
+ # Recurse into directory
+ self.add_dir(name, file_content, uid, gid, uname, gname, mtime, mode)
+ return
+ if not (name == '.' or name.startswith('/') or name.startswith('./')):
+ name = './' + name
+ tarinfo = tarfile.TarInfo(name)
+ tarinfo.mtime = mtime
+ tarinfo.uid = uid
+ tarinfo.gid = gid
+ tarinfo.uname = uname
+ tarinfo.gname = gname
+ tarinfo.type = kind
+ if mode is None:
+ tarinfo.mode = 0644 if kind == tarfile.REGTYPE else 0755
+ else:
+ tarinfo.mode = mode
+ if link:
+ tarinfo.linkname = link
+ if content:
+ tarinfo.size = len(content)
+ self.tar.addfile(tarinfo, StringIO(content))
+ elif file_content:
+ with open(file_content, 'rb') as f:
+ tarinfo.size = os.fstat(f.fileno()).st_size
+ self._addfile(tarinfo, f)
+ else:
+ self._addfile(tarinfo)
+
+ def add_tar(self,
+ tar,
+ rootuid=None,
+ rootgid=None,
+ numeric=False,
+ name_filter=None):
+ """Merge a tar content into the current tar, stripping timestamp.
+
+ Args:
+ tar: the name of tar to extract and put content into the current tar.
+ rootuid: user id that we will pretend is root (replaced by uid 0).
+ rootgid: group id that we will pretend is root (replaced by gid 0).
+ numeric: set to true to strip out name of owners (and just use the
+ numeric values).
+ name_filter: filter out file by names. If not none, this method will be
+ called for each file to add, given the name and should return true if
+ the file is to be added to the final tar and false otherwise.
+ """
+ compression = os.path.splitext(tar)[-1][1:]
+ if compression == 'tgz':
+ compression = 'gz'
+ elif compression == 'bzip2':
+ compression = 'bz2'
+ elif compression == 'lzma':
+ compression = 'xz'
+ elif compression not in ['gz', 'bz2', 'xz']:
+ compression = ''
+ if compression == 'xz':
+ # Python 2 does not support lzma, our py3 support is terrible so let's
+ # just hack around.
+ # Note that we buffer the file in memory and it can have an important
+ # memory footprint but it's probably fine as we don't use them for really
+ # large files.
+ # TODO(dmarting): once our py3 support gets better, compile this tools
+ # with py3 for proper lzma support.
+ f = StringIO(os.popen('cat %s | xzcat' % tar).read())
+ intar = tarfile.open(fileobj=f, mode='r:')
+ else:
+ intar = tarfile.open(name=tar, mode='r:' + compression)
+ for tarinfo in intar:
+ if name_filter is None or name_filter(tarinfo.name):
+ tarinfo.mtime = 0
+ if rootuid is not None and tarinfo.uid == rootuid:
+ tarinfo.uid = 0
+ tarinfo.uname = 'root'
+ if rootgid is not None and tarinfo.gid == rootgid:
+ tarinfo.gid = 0
+ tarinfo.gname = 'root'
+ if numeric:
+ tarinfo.uname = ''
+ tarinfo.gname = ''
+ name = tarinfo.name
+ if not name.startswith('/') and not name.startswith('.'):
+ tarinfo.name = './' + name
+
+ if tarinfo.isfile():
+ self._addfile(tarinfo, intar.extractfile(tarinfo.name))
+ else:
+ self._addfile(tarinfo)
+ intar.close()
+
+ def close(self):
+ """Close the output tar file.
+
+ This class should not be used anymore after calling that method.
+ """
+ self.tar.close()