aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com
diff options
context:
space:
mode:
authorGravatar tomlu <tomlu@google.com>2018-01-31 11:18:20 -0800
committerGravatar Copybara-Service <copybara-piper@google.com>2018-01-31 11:20:14 -0800
commit42c2c729597eb7e8a87c62dce435781f47732d2c (patch)
treec524951169166f05b61e470a47eafab73dfc0240 /src/main/java/com
parent223ed93582dbb730eb92c80cc6350dcec116ec0a (diff)
Add memory-efficient map for storing nested set -> digest.
Instead of using ConcurrentHashMap, we use a dead-simple open addressed hash hable with a giant byte array with 16-byte slots. We then read or write fingerprints straight into and out of the array, obviating the need to generate intermediate garbage. Locking mechanism is a read-write lock. This should be faster than full synchronisation for read-heavy loads. RELNOTES: None PiperOrigin-RevId: 184019301
Diffstat (limited to 'src/main/java/com')
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD5
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java181
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java57
-rw-r--r--src/main/java/com/google/devtools/build/lib/util/Fingerprint.java21
4 files changed, 230 insertions, 34 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
index 92e058bb1f..bb8fae4abf 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
@@ -41,7 +41,10 @@ java_library(
java_library(
name = "fingerprint_cache",
- srcs = ["NestedSetFingerprintCache.java"],
+ srcs = [
+ "DigestMap.java",
+ "NestedSetFingerprintCache.java",
+ ],
deps = [
":nestedset",
"//src/main/java/com/google/devtools/build/lib:commandline_item",
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java
new file mode 100644
index 0000000000..71e5f85348
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java
@@ -0,0 +1,181 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.collect.nestedset;
+
+import com.google.common.base.Preconditions;
+import com.google.devtools.build.lib.util.Fingerprint;
+import java.util.concurrent.locks.StampedLock;
+
+/**
+ * Map of key -> [digest bytes].
+ *
+ * <p>This class uses a single array of keys and a big single block of bytes. To read/store digests
+ * we index straight into the byte array. This is more memory-efficient and uses less GC than a
+ * corresponding Map<Object, byte[]>.
+ *
+ * <p>Keys use reference equality.
+ */
+final class DigestMap {
+ private final int digestSize;
+ private final StampedLock readWriteLock = new StampedLock();
+ private Object[] keys;
+ private byte[] bytes;
+ private int tableSize;
+ private int nextResize;
+ private int size;
+
+ DigestMap(int digestSize, int initialSize) {
+ Preconditions.checkArgument(
+ initialSize > 0 && (initialSize & (initialSize - 1)) == 0,
+ "initialSize must be a power of 2 greater than 0");
+ this.digestSize = digestSize;
+ this.keys = new Object[initialSize];
+ this.bytes = new byte[initialSize * digestSize];
+ this.tableSize = initialSize;
+ this.nextResize = getNextResize(initialSize);
+ }
+
+ /** Finds the digest for the corresponding key and adds it to the passed fingerprint. */
+ boolean readDigest(Object key, Fingerprint fingerprint) {
+ long stamp = readWriteLock.readLock();
+ try {
+ int index = findKeyReadLocked(key);
+ if (index >= 0) {
+ fingerprint.addBytes(bytes, index * digestSize, digestSize);
+ return true;
+ } else {
+ return false;
+ }
+ } finally {
+ readWriteLock.unlockRead(stamp);
+ }
+ }
+
+ // Finds the key in the array. Must be called under read lock.
+ private int findKeyReadLocked(Object key) {
+ int hash = hash(key);
+ int index = hash & (tableSize - 1);
+ while (true) {
+ Object currentKey = keys[index];
+ if (currentKey == key) {
+ return index;
+ } else if (currentKey == null) {
+ return -1;
+ }
+ index = probe(index, tableSize);
+ }
+ }
+
+ /**
+ * Inserts a digest for the corresponding key, then immediately reads it into another fingerprint.
+ *
+ * <p>This is equivalent to <code>
+ * digestMap.insertDigest(key, digest.digestAndReset(); digestMap.readDigest(key, readTo); </code>
+ * but it will be faster.
+ *
+ * @param key The key to insert.
+ * @param digest The fingerprint to insert. This will reset the fingerprint instance.
+ * @param readTo A fingerprint to read the just-added fingerprint into.
+ */
+ void insertAndReadDigest(Object key, Fingerprint digest, Fingerprint readTo) {
+ long stamp = readWriteLock.writeLock();
+ try {
+ int index = insertKeyWriteLocked(key);
+ digest.digestAndReset(bytes, index * digestSize, digestSize);
+ readTo.addBytes(bytes, index * digestSize, digestSize);
+ } finally {
+ readWriteLock.unlockWrite(stamp);
+ }
+ }
+
+ // Inserts a key into the array and returns the index. Must be called under write lock.
+ private int insertKeyWriteLocked(Object key) {
+ if (size >= nextResize) {
+ resizeTableWriteLocked();
+ }
+ int hash = hash(key);
+ int index = hash & (tableSize - 1);
+ while (true) {
+ Object currentKey = keys[index];
+ if (currentKey == null) {
+ keys[index] = key;
+ ++size;
+ return index;
+ } else if (currentKey == key) {
+ // Key is already present
+ return index;
+ }
+ index = probe(index, tableSize);
+ }
+ }
+
+ private void resizeTableWriteLocked() {
+ int digestSize = this.digestSize;
+ int tableSize = this.tableSize;
+ Object[] keys = this.keys;
+ byte[] bytes = this.bytes;
+ int newTableSize = this.tableSize * 2;
+ Object[] newKeys = new Object[newTableSize];
+ byte[] newBytes = new byte[newTableSize * digestSize];
+ for (int i = 0; i < tableSize; ++i) {
+ Object key = keys[i];
+ if (key != null) {
+ int newIndex = firstFreeIndex(newKeys, newTableSize, key);
+ newKeys[newIndex] = key;
+ System.arraycopy(bytes, i * digestSize, newBytes, newIndex * digestSize, digestSize);
+ }
+ }
+ this.tableSize = newTableSize;
+ this.keys = newKeys;
+ this.bytes = newBytes;
+ this.nextResize = getNextResize(newTableSize);
+ }
+
+ private static int firstFreeIndex(Object[] keys, int tableSize, Object key) {
+ int hash = hash(key);
+ int index = hash & (tableSize - 1);
+ while (true) {
+ Object currentKey = keys[index];
+ if (currentKey == null) {
+ return index;
+ }
+ index = probe(index, tableSize);
+ }
+ }
+
+ private static int hash(Object key) {
+ return smear(System.identityHashCode(key));
+ }
+
+ private static int probe(int index, int tableSize) {
+ return (index + 1) & (tableSize - 1);
+ }
+
+ private static int getNextResize(int newTableSize) {
+ // 75% load
+ return (newTableSize * 3) / 4;
+ }
+
+ /*
+ * This method was rewritten in Java from an intermediate step of the Murmur hash function in
+ * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the
+ * following header:
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author
+ * hereby disclaims copyright to this source code.
+ */
+ private static int smear(int hashCode) {
+ return 0x1b873593 * Integer.rotateLeft(hashCode * 0xcc9e2d51, 15);
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
index b4d341143c..b48d4a2298 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
@@ -22,10 +22,11 @@ import java.util.concurrent.ConcurrentHashMap;
/** Computes fingerprints for nested sets, reusing sub-computations from children. */
public class NestedSetFingerprintCache {
- private static final byte[] EMPTY_SET_BYTES = new byte[] {};
+ private static final int DIGEST_SIZE = 16;
+ private static final int EMPTY_SET_DIGEST = 104_395_303;
/** Memoize the subresults. We have to have one cache per type of command item map function. */
- private Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> mapFnToFingerprints = createMap();
+ private Map<CommandLineItem.MapFn<?>, DigestMap> mapFnToFingerprints = createMap();
public <T> void addNestedSetToFingerprint(Fingerprint fingerprint, NestedSet<T> nestedSet) {
addNestedSetToFingerprint(CommandLineItem.MapFn.DEFAULT, fingerprint, nestedSet);
@@ -33,12 +34,16 @@ public class NestedSetFingerprintCache {
public <T> void addNestedSetToFingerprint(
CommandLineItem.MapFn<? super T> mapFn, Fingerprint fingerprint, NestedSet<T> nestedSet) {
- Map<Object, byte[]> fingerprints =
- mapFnToFingerprints.computeIfAbsent(mapFn, k -> new ConcurrentHashMap<>());
+ // Only top-level nested sets can be empty, so we can bail here
+ if (nestedSet.isEmpty()) {
+ fingerprint.addInt(EMPTY_SET_DIGEST);
+ return;
+ }
+ DigestMap digestMap =
+ mapFnToFingerprints.computeIfAbsent(mapFn, k -> new DigestMap(DIGEST_SIZE, 1024));
fingerprint.addInt(nestedSet.getOrder().ordinal());
Object children = nestedSet.rawChildren();
- byte[] bytes = getBytes(mapFn, fingerprints, children);
- fingerprint.addBytes(bytes);
+ addToFingerprint(mapFn, fingerprint, digestMap, children);
}
public void clear() {
@@ -46,36 +51,22 @@ public class NestedSetFingerprintCache {
}
@SuppressWarnings("unchecked")
- private <T> byte[] getBytes(
- CommandLineItem.MapFn<? super T> mapFn, Map<Object, byte[]> fingerprints, Object children) {
- byte[] bytes = fingerprints.get(children);
- if (bytes == null) {
- if (children instanceof Object[]) {
- Fingerprint fingerprint = new Fingerprint();
+ private <T> void addToFingerprint(
+ CommandLineItem.MapFn<? super T> mapFn,
+ Fingerprint fingerprint,
+ DigestMap digestMap,
+ Object children) {
+ if (children instanceof Object[]) {
+ if (!digestMap.readDigest(children, fingerprint)) {
+ Fingerprint childrenFingerprint = new Fingerprint();
for (Object child : (Object[]) children) {
- if (child instanceof Object[]) {
- fingerprint.addBytes(getBytes(mapFn, fingerprints, child));
- } else {
- addToFingerprint(mapFn, fingerprint, (T) child);
- }
+ addToFingerprint(mapFn, childrenFingerprint, digestMap, child);
}
- bytes = fingerprint.digestAndReset();
-
- // There is no point memoizing anything except the multi-item case,
- // since the single-item case gets inlined into its parents anyway,
- // and the empty set is a singleton
- fingerprints.put(children, bytes);
- } else if (children != NestedSet.EMPTY_CHILDREN) {
- // Single item
- Fingerprint fingerprint = new Fingerprint();
- addToFingerprint(mapFn, fingerprint, (T) children);
- bytes = fingerprint.digestAndReset();
- } else {
- // Empty nested set
- bytes = EMPTY_SET_BYTES;
+ digestMap.insertAndReadDigest(children, childrenFingerprint, fingerprint);
}
+ } else {
+ addToFingerprint(mapFn, fingerprint, (T) children);
}
- return bytes;
}
@VisibleForTesting
@@ -84,7 +75,7 @@ public class NestedSetFingerprintCache {
fingerprint.addString(mapFn.expandToCommandLine(object));
}
- private static Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> createMap() {
+ private static Map<CommandLineItem.MapFn<?>, DigestMap> createMap() {
return new ConcurrentHashMap<>();
}
}
diff --git a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
index c2119eeccb..ddb71b4aaf 100644
--- a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
+++ b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
@@ -20,6 +20,7 @@ import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
+import java.security.DigestException;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -73,6 +74,26 @@ public final class Fingerprint {
return md5.digest();
}
+ /**
+ * Completes the hash computation by doing final operations and resets the underlying state,
+ * allowing this instance to be used again.
+ *
+ * <p>Instead of returning a digest, this method writes the digest straight into the supplied byte
+ * array, at the given offset.
+ *
+ * @see java.security.MessageDigest#digest()
+ */
+ public void digestAndReset(byte[] buf, int offset, int len) {
+ try {
+ codedOut.flush();
+ md5.digest(buf, offset, len);
+ } catch (IOException e) {
+ throw new IllegalStateException("failed to flush", e);
+ } catch (DigestException e) {
+ throw new IllegalStateException("failed to digest", e);
+ }
+ }
+
/** Same as {@link #digestAndReset()}, except returns the digest in hex string form. */
public String hexDigestAndReset() {
return hexDigest(digestAndReset());