aboutsummaryrefslogtreecommitdiffhomepage
path: root/tools
diff options
context:
space:
mode:
authorGravatar Damien Martin-Guillerez <dmarting@google.com>2015-10-07 09:39:28 +0000
committerGravatar Han-Wen Nienhuys <hanwen@google.com>2015-10-08 12:10:55 +0000
commitf7cc9cdb4b4cf0a092ea8fde17d419765e3c6f38 (patch)
tree7b9ac074cdcc8edc97c7456a59b172762f172dfa /tools
parent8c353e461c2f7bd01b2993c253005ad523c5a039 (diff)
Rolling-forward with fix:
Incremental build and load of Docker layers Previously we were always tar-ing all the layers in the final image. This change does not build the full image except if required. It instead only builds the concerned layer and a loader that loads all the layers that were not previously loaded in your local registry. This is somewhat incompatible with the previous version since the full tar file is not built anymore if not explicitly requested. See https://goo.gl/P6CVyG for an extensive comparison of this change. RELNOTES: [docker_build] incremental loading is default now. Specify explicitly //package:target.tar (with the .tar extension) to obtain the full image. -- MOS_MIGRATED_REVID=104844423
Diffstat (limited to 'tools')
-rw-r--r--tools/build_defs/docker/BUILD30
-rw-r--r--tools/build_defs/docker/README.md57
-rw-r--r--tools/build_defs/docker/docker.bzl104
-rw-r--r--tools/build_defs/docker/incremental_load.sh.tpl49
-rw-r--r--tools/build_defs/docker/join_layers.py102
5 files changed, 312 insertions, 30 deletions
diff --git a/tools/build_defs/docker/BUILD b/tools/build_defs/docker/BUILD
index 91ea88cfe2..ca1aad1151 100644
--- a/tools/build_defs/docker/BUILD
+++ b/tools/build_defs/docker/BUILD
@@ -26,17 +26,17 @@ TEST_TARGETS = [
]
TEST_DATA = [
- "//tools/build_defs/docker/testdata:" + t
+ "//tools/build_defs/docker/testdata:%s.tar" % t
for t in TEST_TARGETS
] + [
- "//tools/build_defs/docker/testdata:notop_" + t
+ "//tools/build_defs/docker/testdata:notop_%s.tar" % t
for t in TEST_TARGETS
] + [
- "//tools/build_defs/docker/testdata:gen_image",
- "//tools/build_defs/docker/testdata:data_path_image",
- "//tools/build_defs/docker/testdata:no_data_path_image",
- "//tools/build_defs/docker/testdata:dummy_repository",
- "//tools/build_defs/docker/testdata:extras_with_deb",
+ "//tools/build_defs/docker/testdata:gen_image.tar",
+ "//tools/build_defs/docker/testdata:data_path_image.tar",
+ "//tools/build_defs/docker/testdata:no_data_path_image.tar",
+ "//tools/build_defs/docker/testdata:dummy_repository.tar",
+ "//tools/build_defs/docker/testdata:extras_with_deb.tar",
]
sh_test(
@@ -108,3 +108,19 @@ py_binary(
"//third_party/py/gflags",
],
)
+
+py_binary(
+ name = "join_layers",
+ srcs = ["join_layers.py"],
+ visibility = ["//visibility:public"],
+ deps = [
+ ":archive",
+ "//third_party/py/gflags",
+ ],
+)
+
+filegroup(
+ name = "incremental_load_template",
+ srcs = ["incremental_load.sh.tpl"],
+ visibility = ["//visibility:public"],
+)
diff --git a/tools/build_defs/docker/README.md b/tools/build_defs/docker/README.md
index 8a13076a15..7734b92d1a 100644
--- a/tools/build_defs/docker/README.md
+++ b/tools/build_defs/docker/README.md
@@ -98,11 +98,12 @@ docker_build(
)
```
-You can build this with `bazel build my/image:helloworld`.
+You can build this with `bazel build my/image:helloworld.tar`.
This will produce the file `bazel-genfiles/my/image/helloworld.tar`.
You can load this into my local Docker client by running
`docker load -i bazel-genfiles/my/image/helloworld.tar`, or simply
-`bazel run my/image:helloworld`.
+`bazel run my/image:helloworld` (this last command only update the
+changed layers and thus is faster).
Upon success you should be able to run `docker images` and see:
@@ -116,6 +117,12 @@ You can now use this docker image with the name `bazel/my_image:helloworld` or
tag it with another name, for example:
`docker tag bazel/my_image:helloworld gcr.io/my-project/my-awesome-image:v0.9`
+You can do all that at once with specifying the tag on the command line of
+`bazel run`:
+```
+bazel run my/image:helloworld gcr.io/my-project/my-awesome-image:v0.9
+```
+
__Nota Bene:__ the `docker images` command will show a really old timestamp
because `docker_build` remove all timestamps from the build to make it
reproducible.
@@ -186,6 +193,52 @@ debs, symlinks, entrypoint, cmd, env, ports, volumes, workdir, repository)`
<table>
<thead>
<tr>
+ <th colspan="2">Implicit output targets</th>
+ </tr>
+ </thead>
+ <tbody>
+ <tr>
+ <td><code><i>name</i>.tar</code></td>
+ <td>
+ <code>The full Docker image</code>
+ <p>
+ A full Docker image containing all the layers, identical to
+ what <code>docker save</code> would return. This is
+ only generated on demand.
+ </p>
+ </td>
+ </tr>
+ <tr>
+ <td><code><i>name</i>-layer.tar</code></td>
+ <td>
+ <code>An image of the current layer</code>
+ <p>
+ A Docker image containing only the layer corresponding to
+ that target. It is used for incremental loading of the layer.
+ </p>
+ <p>
+ <b>Note:</b> this target is not suitable for direct comsumption.
+ It is used for incremental loading and non-docker rules should
+ depends on the docker image (<i>name</i>.tar) instead.
+ </p>
+ </td>
+ </tr>
+ <tr>
+ <td><code><i>name</i></code></td>
+ <td>
+ <code>Incremental image loader</code>
+ <p>
+ The incremental image loader. It will load only changed
+ layers inside the Docker registry.
+ </p>
+ </td>
+ </tr>
+ </tbody>
+</table>
+
+<table>
+ <thead>
+ <tr>
<th>Attribute</th>
<th>Description</th>
</tr>
diff --git a/tools/build_defs/docker/docker.bzl b/tools/build_defs/docker/docker.bzl
index 67e17e0761..238490ca42 100644
--- a/tools/build_defs/docker/docker.bzl
+++ b/tools/build_defs/docker/docker.bzl
@@ -123,8 +123,9 @@ def _sha256(ctx, artifact):
def _get_base_artifact(ctx):
if ctx.files.base:
- if hasattr(ctx.attr.base, "docker_image"):
- return ctx.attr.base.docker_image
+ if hasattr(ctx.attr.base, "docker_layers"):
+ # The base is the first layer in docker_layers if provided.
+ return ctx.attr.base.docker_layers[0]["layer"]
if len(ctx.files.base) != 1:
fail("base attribute should be a single tar file.")
return ctx.files.base[0]
@@ -192,45 +193,96 @@ def _create_image(ctx, layer, name, metadata):
"""Create the new image."""
create_image = ctx.executable._create_image
args = [
- "--output=" + ctx.outputs.out.path,
+ "--output=" + ctx.outputs.layer.path,
"--metadata=" + metadata.path,
"--layer=" + layer.path,
"--id=@" + name.path,
- # We label at push time, so we only put a single name in this file:
- # repository/package:target => {the layer being appended}
- "--repository=%s/%s" % (ctx.attr.repository,
- ctx.label.package.replace("/", "_")),
- "--name=" + ctx.label.name
]
inputs = [layer, metadata, name]
# If we have been provided a base image, add it.
- base = _get_base_artifact(ctx)
- if base:
- args += ["--base=%s" % base.path]
- inputs += [base]
+ if ctx.attr.base and not hasattr(ctx.attr.base, "docker_layers"):
+ base = _get_base_artifact(ctx)
+ if base:
+ args += ["--base=%s" % base.path]
+ inputs += [base]
ctx.action(
executable = create_image,
arguments = args,
inputs = inputs,
- use_default_shell_env = True,
- outputs = [ctx.outputs.out]
+ outputs = [ctx.outputs.layer],
+ mnemonic = "CreateLayer",
)
+def _assemble_image(ctx, layers, name):
+ """Create the full image from the list of layers."""
+ layers = [l["layer"] for l in layers]
+ args = [
+ "--output=" + ctx.outputs.out.path,
+ "--id=@" + name.path,
+ "--repository=" + _repository_name(ctx),
+ "--name=" + ctx.label.name
+ ] + ["--layer=" + l.path for l in layers]
+ inputs = [name] + layers
+ ctx.action(
+ executable = ctx.executable._join_layers,
+ arguments = args,
+ inputs = inputs,
+ outputs = [ctx.outputs.out],
+ mnemonic = "JoinLayers"
+ )
+
+def _repository_name(ctx):
+ """Compute the repository name for the current rule."""
+ return "%s/%s" % (ctx.attr.repository, ctx.label.package.replace("/", "_"))
+
+def reverse(lst):
+ result = []
+ for el in lst:
+ result = [el] + result
+ return result
+
+def _get_runfile_path(ctx, f):
+ """Return the runfiles relative path of f."""
+ if ctx.workspace_name:
+ return ctx.workspace_name + "/" + f.short_path
+ else:
+ return f.short_path
+
def _docker_build_impl(ctx):
"""Implementation for the docker_build rule."""
layer = _build_layer(ctx)
name = _compute_layer_name(ctx, layer)
metadata = _metadata(ctx, layer, name)
_create_image(ctx, layer, name, metadata)
- ctx.file_action(
- content = "\n".join([
- "#!/bin/bash -eu",
- "docker load -i " + ctx.outputs.out.short_path
- ]),
+ # Compute the layers transitive provider.
+ # It includes the current layers, and, if they exists the layer from
+ # base docker_build rules. We do not extract the list of layer in
+ # a base tarball as they probably do not respect the convention on
+ # layer naming that our rules use.
+ layers = [
+ {"layer": ctx.outputs.layer, "name": name}
+ ] + getattr(ctx.attr.base, "docker_layers", [])
+ # Generate the incremental load statement
+ ctx.template_action(
+ template = ctx.file._incremental_load_template,
+ substitutions = {
+ "%{load_statements}": "\n".join([
+ "incr_load '%s' '%s'" % (_get_runfile_path(ctx, l["name"]),
+ _get_runfile_path(ctx, l["layer"]))
+ # The last layer is the first in the list of layers.
+ # We reverse to load the layer from the parent to the child.
+ for l in reverse(layers)]),
+ "%{repository}": _repository_name(ctx),
+ "%{tag}" : ctx.label.name,
+ },
output = ctx.outputs.executable,
executable = True)
- return struct(runfiles = ctx.runfiles(files = [ctx.outputs.out]),
- docker_image = ctx.outputs.out)
+ _assemble_image(ctx, layers, name)
+ runfiles = ctx.runfiles(
+ files = [l["layer"] for l in layers] + [l["name"] for l in layers])
+ return struct(runfiles = runfiles,
+ files = set([ctx.outputs.layer]),
+ docker_layers = layers)
docker_build_ = rule(
implementation = _docker_build_impl,
@@ -261,6 +313,15 @@ docker_build_ = rule(
cfg=HOST_CFG,
executable=True,
allow_files=True),
+ "_incremental_load_template": attr.label(
+ default=Label("//tools/build_defs/docker:incremental_load_template"),
+ single_file=True,
+ allow_files=True),
+ "_join_layers": attr.label(
+ default=Label("//tools/build_defs/docker:join_layers"),
+ cfg=HOST_CFG,
+ executable=True,
+ allow_files=True),
"_rewrite_tool": attr.label(
default=Label("//tools/build_defs/docker:rewrite_json"),
cfg=HOST_CFG,
@@ -274,6 +335,7 @@ docker_build_ = rule(
},
outputs = {
"out": "%{name}.tar",
+ "layer": "%{name}-layer.tar",
},
executable = True)
diff --git a/tools/build_defs/docker/incremental_load.sh.tpl b/tools/build_defs/docker/incremental_load.sh.tpl
new file mode 100644
index 0000000000..7217fccd77
--- /dev/null
+++ b/tools/build_defs/docker/incremental_load.sh.tpl
@@ -0,0 +1,49 @@
+#!/bin/bash -eu
+#
+# Copyright 2015 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This is a generated files that load all docker layer built by "docker_build".
+
+RUNFILES=${BASH_SOURCE[0]}.runfiles
+
+# List all images identifier (only the identifier) from the local
+# docker registry.
+IMAGES="$(docker images -aq)"
+IMAGE_LEN=$(for i in $IMAGES; do echo -n $i | wc -c; done | sort -g | head -1 | xargs)
+
+[ -n "$IMAGE_LEN" ] || IMAGE_LEN=64
+
+function incr_load() {
+ # Load a layer if and only if the layer is not in "$IMAGES", that is
+ # in the local docker registry.
+ name=$(cat ${RUNFILES}/$1)
+ if (echo "$IMAGES" | grep -q ^${name:0:$IMAGE_LEN}$); then
+ echo "Skipping $name, already loaded."
+ else
+ echo "Loading $name..."
+ docker load -i ${RUNFILES}/$2
+ fi
+}
+
+# List of 'incr_load' statements for all layers.
+# This generated and injected by docker_build.
+%{load_statements}
+
+# Tag the last layer.
+if [ -n "${name}" ]; then
+ TAG="${1:-%{repository}:%{tag}}"
+ echo "Tagging ${name} as ${TAG}"
+ docker tag -f ${name} ${TAG}
+fi
diff --git a/tools/build_defs/docker/join_layers.py b/tools/build_defs/docker/join_layers.py
new file mode 100644
index 0000000000..361a6a6667
--- /dev/null
+++ b/tools/build_defs/docker/join_layers.py
@@ -0,0 +1,102 @@
+# Copyright 2015 The Bazel Authors. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""This tool creates a docker image from a list of layers."""
+# This is the main program to create a docker image. It expect to be run with:
+# join_layers --output=output_file \
+# --layer=layer1 [--layer=layer2 ... --layer=layerN] \
+# --id=@identifier \
+# --name=myname --repository=repositoryName
+# See the gflags declaration about the flags argument details.
+
+import os.path
+import sys
+
+from tools.build_defs.docker import archive
+from third_party.py import gflags
+
+gflags.DEFINE_string('output', None, 'The output file, mandatory')
+gflags.MarkFlagAsRequired('output')
+
+gflags.DEFINE_multistring('layer', [], 'The tar files for layers to join.')
+
+gflags.DEFINE_string(
+ 'id', None, 'The hex identifier of the top layer (hexstring or @filename).')
+
+gflags.DEFINE_string(
+ 'repository', None,
+ 'The name of the repository to add this image (use with --id and --name).')
+
+gflags.DEFINE_string(
+ 'name', None,
+ 'The symbolic name of this image (use with --id and --repsoitory).')
+
+FLAGS = gflags.FLAGS
+
+
+def _layer_filter(name):
+ """Ignore files 'top' and 'repositories' when merging layers."""
+ basename = os.path.basename(name)
+ return basename not in ('top', 'repositories')
+
+
+def create_image(output, layers, identifier=None,
+ name=None, repository=None):
+ """Creates a Docker image from a list of layers.
+
+ Args:
+ output: the name of the docker image file to create.
+ layers: the layers (tar files) to join to the image.
+ identifier: the identifier of the top layer for this image.
+ name: symbolic name for this docker image.
+ repository: repository name for this docker image.
+ """
+ tar = archive.TarFileWriter(output)
+ for layer in layers:
+ tar.add_tar(layer, name_filter=_layer_filter)
+ # In addition to N layers of the form described above, there might be
+ # a single file at the top of the image called repositories.
+ # This file contains a JSON blob of the form:
+ # {
+ # 'repo':{
+ # 'tag-name': 'top-most layer hex',
+ # ...
+ # },
+ # ...
+ # }
+ if identifier:
+ # If the identifier is not provided, then the resulted layer will be
+ # created without a 'top' file. Docker doesn't needs that file nor
+ # the repository to load the image and for intermediate layer,
+ # docker_build store the name of the layer in a separate artifact so
+ # this 'top' file is not needed.
+ tar.add_file('top', content=identifier)
+ if repository and name:
+ tar.add_file('repositories',
+ content='\n'.join([
+ '{', ' "%s": {' % repository, ' "%s": "%s"' % (
+ name, identifier), ' }', '}'
+ ]))
+
+
+def main(unused_argv):
+ identifier = FLAGS.id
+ if identifier and identifier.startswith('@'):
+ with open(identifier[1:], 'r') as f:
+ identifier = f.read()
+ create_image(FLAGS.output, FLAGS.layer, identifier, FLAGS.name,
+ FLAGS.repository)
+
+
+if __name__ == '__main__':
+ main(FLAGS(sys.argv))