aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java
diff options
context:
space:
mode:
authorGravatar Michajlo Matijkiw <michajlo@google.com>2016-12-15 17:25:08 +0000
committerGravatar John Cater <jcater@google.com>2016-12-15 20:38:55 +0000
commite3db95e855ac11f161d24d472efd488dc4085dfe (patch)
tree421e1c652eabd57b6ab0925ba9fa4e7876cfc742 /src/main/java
parent49757602c4ca9e48081f98850007b2ef17427b46 (diff)
Streamline Fingerprint implementation
Thread all updates through a CodedOutputStream. This has the benefit of potentially hashing less data, as many int values can be represented more compactly, and reducing the churn of hashing strings (generally), since CodedOutputStream is already heavily optimized for this. While the buffer size is a little generous, it winds up paying off. -- PiperOrigin-RevId: 142151062 MOS_MIGRATED_REVID=142151062
Diffstat (limited to 'src/main/java')
-rw-r--r--src/main/java/com/google/devtools/build/lib/BUILD1
-rw-r--r--src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java6
-rw-r--r--src/main/java/com/google/devtools/build/lib/util/Fingerprint.java221
3 files changed, 81 insertions, 147 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/BUILD b/src/main/java/com/google/devtools/build/lib/BUILD
index 87caa42b37..9ab93bddc7 100644
--- a/src/main/java/com/google/devtools/build/lib/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/BUILD
@@ -274,6 +274,7 @@ java_library(
"//src/main/java/com/google/devtools/common/options",
"//third_party:guava",
"//third_party:jsr305",
+ "//third_party/protobuf",
],
)
diff --git a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
index 3dcfe54937..508ee8920a 100644
--- a/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
+++ b/src/main/java/com/google/devtools/build/lib/actions/cache/DigestUtils.java
@@ -163,15 +163,15 @@ public class DigestUtils {
byte[] result = new byte[Md5Digest.MD5_SIZE];
Fingerprint fp = new Fingerprint();
for (Map.Entry<String, String> entry : env.entrySet()) {
- fp.addStringLatin1(entry.getKey());
- fp.addStringLatin1(entry.getValue());
+ fp.addString(entry.getKey());
+ fp.addString(entry.getValue());
xorWith(result, fp.digestAndReset());
}
return new Md5Digest(result);
}
private static byte[] getDigest(Fingerprint fp, String execPath, Metadata md) {
- fp.addStringLatin1(execPath);
+ fp.addString(execPath);
if (md == null) {
// Move along, nothing to see here.
diff --git a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
index f1af5491ff..c75104ee7f 100644
--- a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
+++ b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
@@ -14,14 +14,14 @@
package com.google.devtools.build.lib.util;
-import static java.nio.charset.StandardCharsets.UTF_8;
-
import com.google.common.collect.Iterables;
+import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.PathFragment;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
+import com.google.protobuf.CodedOutputStream;
+import java.io.IOException;
import java.nio.charset.StandardCharsets;
+import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
import java.util.Map;
@@ -35,9 +35,6 @@ import javax.annotation.Nullable;
*/
public final class Fingerprint {
- private static final byte[] TRUE_BYTES = new byte[] { 1 };
- private static final byte[] FALSE_BYTES = new byte[] { 0 };
-
private static final MessageDigest MD5_PROTOTYPE;
private static final boolean MD5_PROTOTYPE_SUPPORTS_CLONE;
@@ -46,198 +43,152 @@ public final class Fingerprint {
MD5_PROTOTYPE_SUPPORTS_CLONE = supportsClone(MD5_PROTOTYPE);
}
- private final ByteBuffer scratch;
+ // Make novel use of a CodedOutputStream, which is good at efficiently serializing data. By
+ // flushing at the end of each digest we can continue to use the stream.
+ private final CodedOutputStream codedOut;
private final MessageDigest md5;
- /**
- * Creates and initializes a new instance.
- */
+ /** Creates and initializes a new instance. */
public Fingerprint() {
- scratch = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN);
md5 = cloneOrCreateMd5();
+ // This is a lot of indirection, but CodedOutputStream does a reasonable job of converting
+ // strings to bytes without creating a whole bunch of garbage, which pays off.
+ codedOut = CodedOutputStream.newInstance(
+ new DigestOutputStream(ByteStreams.nullOutputStream(), md5),
+ /*bufferSize=*/ 1024);
}
/**
- * Completes the hash computation by doing final operations, e.g., padding.
- *
- * <p>This method has the side-effect of resetting the underlying digest computer.
+ * Completes the hash computation by doing final operations and resets the underlying state,
+ * allowing this instance to be used again.
*
* @return the MD5 digest as a 16-byte array
* @see java.security.MessageDigest#digest()
*/
public byte[] digestAndReset() {
- // Reset is implicit.
+ try {
+ codedOut.flush();
+ } catch (IOException e) {
+ throw new IllegalStateException("failed to flush", e);
+ }
return md5.digest();
}
- /**
- * Completes the hash computation and returns the digest as a string.
- *
- * <p>This method has the side-effect of resetting the underlying digest computer.
- *
- * @return the MD5 digest as a 32-character string of hexadecimal digits
- */
+ /** Same as {@link #digestAndReset()}, except returns the digest in hex string form. */
public String hexDigestAndReset() {
return hexDigest(digestAndReset());
}
- /**
- * Updates the digest with 0 or more bytes.
- *
- * @param input the array of bytes with which to update the digest
- * @see java.security.MessageDigest#update(byte[])
- */
+ /** Updates the digest with 0 or more bytes. */
public Fingerprint addBytes(byte[] input) {
- md5.update(input);
+ addBytes(input, 0, input.length);
return this;
}
- /**
- * Updates the digest with the specified number of bytes starting at offset.
- *
- * @param input the array of bytes with which to update the digest
- * @param offset the offset into the array
- * @param len the number of bytes to use
- * @see java.security.MessageDigest#update(byte[], int, int)
- */
+ /** Updates the digest with the specified number of bytes starting at offset. */
public Fingerprint addBytes(byte[] input, int offset, int len) {
- md5.update(input, offset, len);
+ try {
+ codedOut.write(input, offset, len);
+ } catch (IOException e) {
+ throw new IllegalStateException("failed to write bytes", e);
+ }
return this;
}
- /**
- * Updates the digest with a boolean value.
- */
+ /** Updates the digest with a boolean value. */
public Fingerprint addBoolean(boolean input) {
- md5.update(input ? TRUE_BYTES : FALSE_BYTES);
+ try {
+ codedOut.writeBoolNoTag(input);
+ } catch (IOException e) {
+ throw new IllegalStateException();
+ }
return this;
}
- /**
- * Updates the digest with a boolean value, correctly handling null.
- */
+ /** Same as {@link #addBoolean(boolean)}, except considers nullability. */
public Fingerprint addNullableBoolean(Boolean input) {
- return addInt(input == null ? -1 : (input.booleanValue() ? 1 : 0));
+ if (input == null) {
+ addBoolean(false);
+ } else {
+ addBoolean(true);
+ addBoolean(input);
+ }
+ return this;
}
- /**
- * Updates the digest with the little-endian bytes of a given int value.
- *
- * @param input the integer with which to update the digest
- */
+ /** Updates the digest with the varint representation of input. */
public Fingerprint addInt(int input) {
- scratch.putInt(input);
- updateFromScratch(4);
+ try {
+ codedOut.writeInt32NoTag(input);
+ } catch (IOException e) {
+ throw new IllegalStateException(e);
+ }
return this;
}
- /**
- * Updates the digest with the little-endian bytes of a given long value.
- *
- * @param input the long with which to update the digest
- */
+ /** Updates the digest with the varint representation of a long value. */
public Fingerprint addLong(long input) {
- scratch.putLong(input);
- updateFromScratch(8);
+ try {
+ codedOut.writeInt64NoTag(input);
+ } catch (IOException e) {
+ throw new IllegalStateException("failed to write long", e);
+ }
return this;
}
- /**
- * Updates the digest with the little-endian bytes of a given int value, correctly distinguishing
- * between null and non-null values.
- *
- * @param input the integer with which to update the digest
- */
+ /** Same as {@link #addInt(int)}, except considers nullability. */
public Fingerprint addNullableInt(@Nullable Integer input) {
if (input == null) {
- addInt(0);
+ addBoolean(false);
} else {
- addInt(1);
+ addBoolean(true);
addInt(input);
}
return this;
}
- /**
- * Updates the digest with a UUID.
- *
- * @param uuid the UUID with which to update the digest. Must not be null.
- */
+ /** Updates the digest with a UUID. */
public Fingerprint addUUID(UUID uuid) {
addLong(uuid.getLeastSignificantBits());
addLong(uuid.getMostSignificantBits());
return this;
}
- /**
- * Updates the digest with a String using its length plus its UTF8 encoded bytes.
- *
- * @param input the String with which to update the digest
- * @see java.security.MessageDigest#update(byte[])
- */
+ /** Updates the digest with a String using UTF8 encoding. */
public Fingerprint addString(String input) {
- byte[] bytes = input.getBytes(UTF_8);
- addInt(bytes.length);
- md5.update(bytes);
+ try {
+ codedOut.writeStringNoTag(input);
+ } catch (IOException e) {
+ throw new IllegalStateException("failed to write string", e);
+ }
return this;
}
- /**
- * Updates the digest with a String using its length plus its UTF8 encoded bytes; if the string
- * is null, then it uses -1 as the length.
- *
- * @param input the String with which to update the digest
- * @see java.security.MessageDigest#update(byte[])
- */
+ /** Same as {@link #addString(String)}, except considers nullability. */
public Fingerprint addNullableString(@Nullable String input) {
if (input == null) {
- addInt(-1);
+ addBoolean(false);
} else {
+ addBoolean(true);
addString(input);
}
return this;
}
- /**
- * Updates the digest with a String using its length and content.
- *
- * @param input the String with which to update the digest
- * @see java.security.MessageDigest#update(byte[])
- */
- public Fingerprint addStringLatin1(String input) {
- addInt(input.length());
- byte[] bytes = new byte[input.length()];
- for (int i = 0; i < input.length(); i++) {
- bytes[i] = (byte) input.charAt(i);
- }
- md5.update(bytes);
- return this;
- }
-
- /**
- * Updates the digest with a Path.
- *
- * @param input the Path with which to update the digest.
- */
+ /** Updates the digest with a {@link Path}. */
public Fingerprint addPath(Path input) {
- addStringLatin1(input.getPathString());
+ addString(input.getPathString());
return this;
}
- /**
- * Updates the digest with a Path.
- *
- * @param input the Path with which to update the digest.
- */
+ /** Updates the digest with a {@link PathFragment}. */
public Fingerprint addPath(PathFragment input) {
- return addStringLatin1(input.getPathString());
+ return addString(input.getPathString());
}
/**
- * Updates the digest with inputs by iterating over them and invoking
- * {@code #addString(String)} on each element.
- *
- * @param inputs the inputs with which to update the digest
+ * Add the supplied sequence of {@link String}s to the digest as an atomic unit, that is this is
+ * different from adding them each individually.
*/
public Fingerprint addStrings(Iterable<String> inputs) {
addInt(Iterables.size(inputs));
@@ -248,13 +199,7 @@ public final class Fingerprint {
return this;
}
- /**
- * Updates the digest with inputs which are pairs in a map, by iterating over
- * the map entries and invoking {@code #addString(String)} on each key and
- * value.
- *
- * @param inputs the inputs in a map with which to update the digest
- */
+ /** Updates the digest with the supplied map. */
public Fingerprint addStringMap(Map<String, String> inputs) {
addInt(inputs.size());
for (Map.Entry<String, String> entry : inputs.entrySet()) {
@@ -266,8 +211,8 @@ public final class Fingerprint {
}
/**
- * Updates the digest with a list of paths by iterating over them and
- * invoking {@link #addPath(PathFragment)} on each element.
+ * Add the supplied sequence of {@link PathFragment}s to the digest as an atomic unit, that is
+ * this is different from adding each item individually.
*
* @param inputs the paths with which to update the digest
*/
@@ -280,18 +225,6 @@ public final class Fingerprint {
return this;
}
- /**
- * Reset the Fingerprint for additional use as though previous digesting had not been done.
- */
- public void reset() {
- md5.reset();
- }
-
- private void updateFromScratch(int numBytes) {
- md5.update(scratch.array(), 0, numBytes);
- scratch.clear();
- }
-
private static MessageDigest cloneOrCreateMd5() {
if (MD5_PROTOTYPE_SUPPORTS_CLONE) {
try {