# Copyright 2015 The Bazel Authors. All rights reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """This package manipulates Docker image layer metadata.""" from collections import namedtuple import copy import json import os import os.path import sys from tools.build_defs.docker import utils from third_party.py import gflags gflags.DEFINE_string( 'name', None, 'The name of the current layer') gflags.DEFINE_string( 'base', None, 'The parent image') gflags.DEFINE_string( 'output', None, 'The output file to generate') gflags.DEFINE_string( 'layer', None, 'The current layer tar') gflags.DEFINE_list( 'entrypoint', None, 'Override the "Entrypoint" of the previous layer') gflags.DEFINE_list( 'command', None, 'Override the "Cmd" of the previous layer') gflags.DEFINE_string( 'user', None, 'The username to run commands under') gflags.DEFINE_list('labels', None, 'Augment the "Label" of the previous layer') gflags.DEFINE_list( 'ports', None, 'Augment the "ExposedPorts" of the previous layer') gflags.DEFINE_list( 'volumes', None, 'Augment the "Volumes" of the previous layer') gflags.DEFINE_string( 'workdir', None, 'Set the working directory for the layer') gflags.DEFINE_list( 'env', None, 'Augment the "Env" of the previous layer') FLAGS = gflags.FLAGS _MetadataOptionsT = namedtuple('MetadataOptionsT', ['name', 'parent', 'size', 'entrypoint', 'cmd', 'env', 'labels', 'ports', 'volumes', 'workdir', 'user']) class MetadataOptions(_MetadataOptionsT): """Docker image layer metadata options.""" def __new__(cls, name=None, parent=None, size=None, entrypoint=None, cmd=None, user=None, labels=None, env=None, ports=None, volumes=None, workdir=None): """Constructor.""" return super(MetadataOptions, cls).__new__(cls, name=name, parent=parent, size=size, entrypoint=entrypoint, cmd=cmd, user=user, labels=labels, env=env, ports=ports, volumes=volumes, workdir=workdir) _DOCKER_VERSION = '1.5.0' _PROCESSOR_ARCHITECTURE = 'amd64' _OPERATING_SYSTEM = 'linux' def Resolve(value, environment): """Resolves environment variables embedded in the given value.""" outer_env = os.environ try: os.environ = environment return os.path.expandvars(value) finally: os.environ = outer_env def DeepCopySkipNull(data): """Do a deep copy, skipping null entry.""" if type(data) == type(dict()): return dict((DeepCopySkipNull(k), DeepCopySkipNull(v)) for k, v in data.iteritems() if v is not None) return copy.deepcopy(data) def KeyValueToDict(pair): """Converts an iterable object of key=value pairs to dictionary.""" d = dict() for kv in pair: (k, v) = kv.split('=', 1) d[k] = v return d def RewriteMetadata(data, options): """Rewrite and return a copy of the input data according to options. Args: data: The dict of Docker image layer metadata we're copying and rewriting. options: The changes this layer makes to the overall image's metadata, which first appears in this layer's version of the metadata Returns: A deep copy of data, which has been updated to reflect the metadata additions of this layer. Raises: Exception: a required option was missing. """ output = DeepCopySkipNull(data) if not options.name: raise Exception('Missing required option: name') output['id'] = options.name if options.parent: output['parent'] = options.parent elif data: raise Exception('Expected empty input object when parent is omitted') if options.size: output['Size'] = options.size elif 'Size' in output: del output['Size'] if 'config' not in output: output['config'] = {} if options.entrypoint: output['config']['Entrypoint'] = options.entrypoint if options.cmd: output['config']['Cmd'] = options.cmd if options.user: output['config']['User'] = options.user output['docker_version'] = _DOCKER_VERSION output['architecture'] = _PROCESSOR_ARCHITECTURE output['os'] = _OPERATING_SYSTEM def Dict2ConfigValue(d): return ['%s=%s' % (k, d[k]) for k in sorted(d.keys())] if options.env: # Build a dictionary of existing environment variables (used by Resolve). environ_dict = KeyValueToDict(output['config'].get('Env', [])) # Merge in new environment variables, resolving references. for k, v in options.env.iteritems(): # Resolve handles scenarios like "PATH=$PATH:...". environ_dict[k] = Resolve(v, environ_dict) output['config']['Env'] = Dict2ConfigValue(environ_dict) if options.labels: label_dict = KeyValueToDict(output['config'].get('Label', [])) for k, v in options.labels.iteritems(): label_dict[k] = v output['config']['Label'] = Dict2ConfigValue(label_dict) if options.ports: if 'ExposedPorts' not in output['config']: output['config']['ExposedPorts'] = {} for p in options.ports: if '/' in p: # The port spec has the form 80/tcp, 1234/udp # so we simply use it as the key. output['config']['ExposedPorts'][p] = {} else: # Assume tcp output['config']['ExposedPorts'][p + '/tcp'] = {} if options.volumes: if 'Volumes' not in output['config']: output['config']['Volumes'] = {} for p in options.volumes: output['config']['Volumes'][p] = {} if options.workdir: output['config']['WorkingDir'] = options.workdir # TODO(mattmoor): comment, created, container_config # container_config contains information about the container # that was used to create this layer, so it shouldn't # propagate from the parent to child. This is where we would # annotate information that can be extract by tools like Blubber # or Quay.io's UI to gain insight into the source that generated # the layer. A Dockerfile might produce something like: # # (nop) /bin/sh -c "apt-get update" # We might consider encoding the fully-qualified bazel build target: # //tools/build_defs/docker:image # However, we should be sensitive to leaking data through this field. if 'container_config' in output: del output['container_config'] return output def GetParentIdentifier(f): """Try to look at the parent identifier from a docker image. The identifier is expected to be in the 'top' file for our rule so we look at it first ('./top', 'top'). If it's not found, then we use the 'repositories' file and tries to parse it to get the first declared repository (so we can actually parse a file generated by 'docker save'). Args: f: the input tar file. Returns: The identifier of the docker image, or None if no identifier was found. """ # TODO(dmarting): Maybe we could drop the 'top' file all together? top = utils.GetTarFile(f, 'top') if top: return top.strip() repositories = utils.GetTarFile(f, 'repositories') if repositories: data = json.loads(repositories) for k1 in data: for k2 in data[k1]: # Returns the first found key return data[k1][k2].strip() return None def main(unused_argv): parent = '' base_json = '{}' if FLAGS.base: parent = GetParentIdentifier(FLAGS.base) if parent: base_json = utils.GetTarFile(FLAGS.base, '%s/json' % parent) data = json.loads(base_json) name = FLAGS.name if name.startswith('@'): with open(name[1:], 'r') as f: name = f.read() labels = KeyValueToDict(FLAGS.labels) for label, value in labels.iteritems(): if value.startswith('@'): with open(value[1:], 'r') as f: labels[label] = f.read() output = RewriteMetadata(data, MetadataOptions(name=name, parent=parent, size=os.path.getsize(FLAGS.layer), entrypoint=FLAGS.entrypoint, cmd=FLAGS.command, user=FLAGS.user, labels=labels, env=KeyValueToDict(FLAGS.env), ports=FLAGS.ports, volumes=FLAGS.volumes, workdir=FLAGS.workdir)) with open(FLAGS.output, 'w') as fp: json.dump(output, fp, sort_keys=True) fp.write('\n') if __name__ == '__main__': main(FLAGS(sys.argv))