diff options
author | 2016-06-06 12:43:01 +0000 | |
---|---|---|
committer | 2016-06-06 16:41:57 +0000 | |
commit | 4f72b2c7afb8d27971ca41142e6c15f4a22ddf16 (patch) | |
tree | 40abf205ab577d7b02700010a1ced97b97379545 /src/main/java | |
parent | 8240b748391d85d68b5ddc502d30f3a2391ba343 (diff) |
Add --experimental_multi_threaded_digest which lets DigestUtils use multiple threads when calculating the MD5 hash even for large files. Might improve performance when using an SSD.
Fixes #835 and #1210.
--
MOS_MIGRATED_REVID=124128233
Diffstat (limited to 'src/main/java')
6 files changed, 124 insertions, 10 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD index 419adffe0f..fe0b267423 100644 --- a/src/main/java/com/google/devtools/build/lib/BUILD +++ b/src/main/java/com/google/devtools/build/lib/BUILD @@ -27,6 +27,7 @@ filegroup( "//src/main/java/com/google/devtools/build/lib/rules/genquery:srcs", "//src/main/java/com/google/devtools/build/lib/rules/objc:srcs", "//src/main/java/com/google/devtools/build/lib/sandbox:srcs", + "//src/main/java/com/google/devtools/build/lib/ssd:srcs", "//src/main/java/com/google/devtools/build/lib/standalone:srcs", "//src/main/java/com/google/devtools/build/lib/worker:srcs", "//src/main/java/com/google/devtools/build/skyframe:srcs", @@ -574,6 +575,7 @@ java_library( "//src/main/java/com/google/devtools/build/lib/bazel/dash", "//src/main/java/com/google/devtools/build/lib/remote", "//src/main/java/com/google/devtools/build/lib/sandbox", + "//src/main/java/com/google/devtools/build/lib/ssd", "//src/main/java/com/google/devtools/build/lib/standalone", "//src/main/java/com/google/devtools/build/lib/worker", "//src/main/java/com/google/devtools/build/skyframe", diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java index 2ecc041fd0..377ea43fad 100644 --- a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java +++ b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java @@ -23,6 +23,7 @@ import com.google.devtools.build.lib.vfs.Path; import java.io.IOException; import java.util.Objects; +import java.util.concurrent.atomic.AtomicBoolean; import java.util.logging.Level; import javax.annotation.Nullable; @@ -33,6 +34,7 @@ import javax.annotation.Nullable; public class DigestUtils { // Object to synchronize on when serializing large file reads. private static final Object MD5_LOCK = new Object(); + private static final AtomicBoolean MULTI_THREADED_DIGEST = new AtomicBoolean(false); /** Private constructor to prevent instantiation of utility class. */ private DigestUtils() {} @@ -44,7 +46,7 @@ public class DigestUtils { * @param size size of Artifact on filesystem in bytes, getSize() on its stat. */ public static boolean useFileDigest(boolean isFile, long size) { - // Use timestamps for directories. Use digests for everything else. + // Use timestamps for directories and empty files. Use digests for everything else. return isFile && size != 0; } @@ -83,8 +85,17 @@ public class DigestUtils { * Returns the the fast md5 digest of the file, or null if not available. */ @Nullable - public static byte[] getFastDigest(Path path) throws IOException { - return path.getFastDigestFunctionType().equals("MD5") ? path.getFastDigest() : null; + private static byte[] getFastDigest(Path path) throws IOException { + // TODO(bazel-team): the action cache currently only works with md5 digests but it ought to + // work with any opaque digest. + return Objects.equals(path.getFastDigestFunctionType(), "MD5") ? path.getFastDigest() : null; + } + + /** + * Enable or disable multi-threaded digesting even for large files. + */ + public static void setMultiThreadedDigest(boolean multiThreadedDigest) { + DigestUtils.MULTI_THREADED_DIGEST.set(multiThreadedDigest); } /** @@ -97,12 +108,8 @@ public class DigestUtils { * to avoid excessive disk seeks. */ public static byte[] getDigestOrFail(Path path, long fileSize) throws IOException { - // TODO(bazel-team): the action cache currently only works with md5 digests but it ought to - // work with any opaque digest. - byte[] md5bin = null; - if (Objects.equals(path.getFastDigestFunctionType(), "MD5")) { - md5bin = getFastDigest(path); - } + byte[] md5bin = getFastDigest(path); + if (md5bin != null && !binaryDigestWellFormed(md5bin)) { // Fail-soft in cases where md5bin is non-null, but not a valid digest. String msg = String.format("Malformed digest '%s' for file %s", @@ -111,9 +118,10 @@ public class DigestUtils { LoggingUtil.logToRemote(Level.SEVERE, msg, new IllegalStateException(msg)); md5bin = null; } + if (md5bin != null) { return md5bin; - } else if (fileSize > 4096) { + } else if (fileSize > 4096 && !MULTI_THREADED_DIGEST.get()) { // We'll have to read file content in order to calculate the digest. In that case // it would be beneficial to serialize those calculations since there is a high // probability that MD5 will be requested for multiple output files simultaneously. diff --git a/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java b/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java index 827393296e..673c964ca5 100644 --- a/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java +++ b/src/main/java/com/google/devtools/build/lib/bazel/BazelMain.java @@ -45,6 +45,7 @@ public final class BazelMain { com.google.devtools.build.lib.bazel.BazelRepositoryModule.class, com.google.devtools.build.lib.bazel.dash.DashModule.class, com.google.devtools.build.lib.bazel.rules.BazelRulesModule.class, + com.google.devtools.build.lib.ssd.SsdModule.class, com.google.devtools.build.lib.worker.WorkerModule.class, com.google.devtools.build.lib.remote.RemoteModule.class, com.google.devtools.build.lib.standalone.StandaloneModule.class, diff --git a/src/main/java/com/google/devtools/build/lib/ssd/BUILD b/src/main/java/com/google/devtools/build/lib/ssd/BUILD new file mode 100644 index 0000000000..7a334df2fe --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/ssd/BUILD @@ -0,0 +1,31 @@ +package( + default_visibility = ["//src:__subpackages__"], +) + +java_library( + name = "ssd", + srcs = glob(["*.java"]), + deps = [ + "//src/main/java/com/google/devtools/build/lib:build-base", + "//src/main/java/com/google/devtools/build/lib:concurrent", + "//src/main/java/com/google/devtools/build/lib:events", + "//src/main/java/com/google/devtools/build/lib:io", + "//src/main/java/com/google/devtools/build/lib:packages-internal", + "//src/main/java/com/google/devtools/build/lib:runtime", + "//src/main/java/com/google/devtools/build/lib:util", + "//src/main/java/com/google/devtools/build/lib:vfs", + "//src/main/java/com/google/devtools/build/lib/actions", + "//src/main/java/com/google/devtools/build/lib/standalone", + "//src/main/java/com/google/devtools/common/options", + "//src/main/protobuf:worker_protocol_java_proto", + "//third_party:apache_commons_pool2", + "//third_party:guava", + "//third_party:jsr305", + "//third_party/protobuf", + ], +) + +filegroup( + name = "srcs", + srcs = glob(["**"]), +) diff --git a/src/main/java/com/google/devtools/build/lib/ssd/SsdModule.java b/src/main/java/com/google/devtools/build/lib/ssd/SsdModule.java new file mode 100644 index 0000000000..a41a3baa38 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/ssd/SsdModule.java @@ -0,0 +1,40 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.ssd; + +import com.google.common.collect.ImmutableList; +import com.google.devtools.build.lib.actions.cache.DigestUtils; +import com.google.devtools.build.lib.runtime.BlazeModule; +import com.google.devtools.build.lib.runtime.Command; +import com.google.devtools.common.options.OptionsBase; +import com.google.devtools.common.options.OptionsProvider; + +/** + * BlazeModule that applies optimizations to Bazel's internals in order to improve performance when + * using an SSD. + */ +public final class SsdModule extends BlazeModule { + @Override + public Iterable<Class<? extends OptionsBase>> getCommandOptions(Command command) { + return ImmutableList.<Class<? extends OptionsBase>>of(SsdOptions.class); + } + + @Override + public void handleOptions(OptionsProvider optionsProvider) { + SsdOptions options = optionsProvider.getOptions(SsdOptions.class); + if (options.experimentalMultiThreadedDigest) { + DigestUtils.setMultiThreadedDigest(options.experimentalMultiThreadedDigest); + } + } +} diff --git a/src/main/java/com/google/devtools/build/lib/ssd/SsdOptions.java b/src/main/java/com/google/devtools/build/lib/ssd/SsdOptions.java new file mode 100644 index 0000000000..a9da284f24 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/ssd/SsdOptions.java @@ -0,0 +1,32 @@ +// Copyright 2016 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.ssd; + +import com.google.devtools.common.options.Option; +import com.google.devtools.common.options.OptionsBase; + +/** + * Options that tune Bazel's performance in order to increase performance on workstations with an + * SSD. + */ +public class SsdOptions extends OptionsBase { + @Option( + name = "experimental_multi_threaded_digest", + defaultValue = "false", + help = + "Whether to always compute MD5 digests of files with multiple threads. Might improve " + + "performance when using an SSD." + ) + public boolean experimentalMultiThreadedDigest; +} |