aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar tomlu <tomlu@google.com>2018-01-31 11:18:20 -0800
committerGravatar Copybara-Service <copybara-piper@google.com>2018-01-31 11:20:14 -0800
commit42c2c729597eb7e8a87c62dce435781f47732d2c (patch)
treec524951169166f05b61e470a47eafab73dfc0240
parent223ed93582dbb730eb92c80cc6350dcec116ec0a (diff)
Add memory-efficient map for storing nested set -> digest.
Instead of using ConcurrentHashMap, we use a dead-simple open addressed hash hable with a giant byte array with 16-byte slots. We then read or write fingerprints straight into and out of the array, obviating the need to generate intermediate garbage. Locking mechanism is a read-write lock. This should be faster than full synchronisation for read-heavy loads. RELNOTES: None PiperOrigin-RevId: 184019301
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD5
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java181
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java57
-rw-r--r--src/main/java/com/google/devtools/build/lib/util/Fingerprint.java21
-rw-r--r--src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java110
5 files changed, 340 insertions, 34 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
index 92e058bb1f..bb8fae4abf 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
@@ -41,7 +41,10 @@ java_library(
java_library(
name = "fingerprint_cache",
- srcs = ["NestedSetFingerprintCache.java"],
+ srcs = [
+ "DigestMap.java",
+ "NestedSetFingerprintCache.java",
+ ],
deps = [
":nestedset",
"//src/main/java/com/google/devtools/build/lib:commandline_item",
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java
new file mode 100644
index 0000000000..71e5f85348
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java
@@ -0,0 +1,181 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.collect.nestedset;
+
+import com.google.common.base.Preconditions;
+import com.google.devtools.build.lib.util.Fingerprint;
+import java.util.concurrent.locks.StampedLock;
+
+/**
+ * Map of key -> [digest bytes].
+ *
+ * <p>This class uses a single array of keys and a big single block of bytes. To read/store digests
+ * we index straight into the byte array. This is more memory-efficient and uses less GC than a
+ * corresponding Map<Object, byte[]>.
+ *
+ * <p>Keys use reference equality.
+ */
+final class DigestMap {
+ private final int digestSize;
+ private final StampedLock readWriteLock = new StampedLock();
+ private Object[] keys;
+ private byte[] bytes;
+ private int tableSize;
+ private int nextResize;
+ private int size;
+
+ DigestMap(int digestSize, int initialSize) {
+ Preconditions.checkArgument(
+ initialSize > 0 && (initialSize & (initialSize - 1)) == 0,
+ "initialSize must be a power of 2 greater than 0");
+ this.digestSize = digestSize;
+ this.keys = new Object[initialSize];
+ this.bytes = new byte[initialSize * digestSize];
+ this.tableSize = initialSize;
+ this.nextResize = getNextResize(initialSize);
+ }
+
+ /** Finds the digest for the corresponding key and adds it to the passed fingerprint. */
+ boolean readDigest(Object key, Fingerprint fingerprint) {
+ long stamp = readWriteLock.readLock();
+ try {
+ int index = findKeyReadLocked(key);
+ if (index >= 0) {
+ fingerprint.addBytes(bytes, index * digestSize, digestSize);
+ return true;
+ } else {
+ return false;
+ }
+ } finally {
+ readWriteLock.unlockRead(stamp);
+ }
+ }
+
+ // Finds the key in the array. Must be called under read lock.
+ private int findKeyReadLocked(Object key) {
+ int hash = hash(key);
+ int index = hash & (tableSize - 1);
+ while (true) {
+ Object currentKey = keys[index];
+ if (currentKey == key) {
+ return index;
+ } else if (currentKey == null) {
+ return -1;
+ }
+ index = probe(index, tableSize);
+ }
+ }
+
+ /**
+ * Inserts a digest for the corresponding key, then immediately reads it into another fingerprint.
+ *
+ * <p>This is equivalent to <code>
+ * digestMap.insertDigest(key, digest.digestAndReset(); digestMap.readDigest(key, readTo); </code>
+ * but it will be faster.
+ *
+ * @param key The key to insert.
+ * @param digest The fingerprint to insert. This will reset the fingerprint instance.
+ * @param readTo A fingerprint to read the just-added fingerprint into.
+ */
+ void insertAndReadDigest(Object key, Fingerprint digest, Fingerprint readTo) {
+ long stamp = readWriteLock.writeLock();
+ try {
+ int index = insertKeyWriteLocked(key);
+ digest.digestAndReset(bytes, index * digestSize, digestSize);
+ readTo.addBytes(bytes, index * digestSize, digestSize);
+ } finally {
+ readWriteLock.unlockWrite(stamp);
+ }
+ }
+
+ // Inserts a key into the array and returns the index. Must be called under write lock.
+ private int insertKeyWriteLocked(Object key) {
+ if (size >= nextResize) {
+ resizeTableWriteLocked();
+ }
+ int hash = hash(key);
+ int index = hash & (tableSize - 1);
+ while (true) {
+ Object currentKey = keys[index];
+ if (currentKey == null) {
+ keys[index] = key;
+ ++size;
+ return index;
+ } else if (currentKey == key) {
+ // Key is already present
+ return index;
+ }
+ index = probe(index, tableSize);
+ }
+ }
+
+ private void resizeTableWriteLocked() {
+ int digestSize = this.digestSize;
+ int tableSize = this.tableSize;
+ Object[] keys = this.keys;
+ byte[] bytes = this.bytes;
+ int newTableSize = this.tableSize * 2;
+ Object[] newKeys = new Object[newTableSize];
+ byte[] newBytes = new byte[newTableSize * digestSize];
+ for (int i = 0; i < tableSize; ++i) {
+ Object key = keys[i];
+ if (key != null) {
+ int newIndex = firstFreeIndex(newKeys, newTableSize, key);
+ newKeys[newIndex] = key;
+ System.arraycopy(bytes, i * digestSize, newBytes, newIndex * digestSize, digestSize);
+ }
+ }
+ this.tableSize = newTableSize;
+ this.keys = newKeys;
+ this.bytes = newBytes;
+ this.nextResize = getNextResize(newTableSize);
+ }
+
+ private static int firstFreeIndex(Object[] keys, int tableSize, Object key) {
+ int hash = hash(key);
+ int index = hash & (tableSize - 1);
+ while (true) {
+ Object currentKey = keys[index];
+ if (currentKey == null) {
+ return index;
+ }
+ index = probe(index, tableSize);
+ }
+ }
+
+ private static int hash(Object key) {
+ return smear(System.identityHashCode(key));
+ }
+
+ private static int probe(int index, int tableSize) {
+ return (index + 1) & (tableSize - 1);
+ }
+
+ private static int getNextResize(int newTableSize) {
+ // 75% load
+ return (newTableSize * 3) / 4;
+ }
+
+ /*
+ * This method was rewritten in Java from an intermediate step of the Murmur hash function in
+ * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the
+ * following header:
+ *
+ * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author
+ * hereby disclaims copyright to this source code.
+ */
+ private static int smear(int hashCode) {
+ return 0x1b873593 * Integer.rotateLeft(hashCode * 0xcc9e2d51, 15);
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
index b4d341143c..b48d4a2298 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java
@@ -22,10 +22,11 @@ import java.util.concurrent.ConcurrentHashMap;
/** Computes fingerprints for nested sets, reusing sub-computations from children. */
public class NestedSetFingerprintCache {
- private static final byte[] EMPTY_SET_BYTES = new byte[] {};
+ private static final int DIGEST_SIZE = 16;
+ private static final int EMPTY_SET_DIGEST = 104_395_303;
/** Memoize the subresults. We have to have one cache per type of command item map function. */
- private Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> mapFnToFingerprints = createMap();
+ private Map<CommandLineItem.MapFn<?>, DigestMap> mapFnToFingerprints = createMap();
public <T> void addNestedSetToFingerprint(Fingerprint fingerprint, NestedSet<T> nestedSet) {
addNestedSetToFingerprint(CommandLineItem.MapFn.DEFAULT, fingerprint, nestedSet);
@@ -33,12 +34,16 @@ public class NestedSetFingerprintCache {
public <T> void addNestedSetToFingerprint(
CommandLineItem.MapFn<? super T> mapFn, Fingerprint fingerprint, NestedSet<T> nestedSet) {
- Map<Object, byte[]> fingerprints =
- mapFnToFingerprints.computeIfAbsent(mapFn, k -> new ConcurrentHashMap<>());
+ // Only top-level nested sets can be empty, so we can bail here
+ if (nestedSet.isEmpty()) {
+ fingerprint.addInt(EMPTY_SET_DIGEST);
+ return;
+ }
+ DigestMap digestMap =
+ mapFnToFingerprints.computeIfAbsent(mapFn, k -> new DigestMap(DIGEST_SIZE, 1024));
fingerprint.addInt(nestedSet.getOrder().ordinal());
Object children = nestedSet.rawChildren();
- byte[] bytes = getBytes(mapFn, fingerprints, children);
- fingerprint.addBytes(bytes);
+ addToFingerprint(mapFn, fingerprint, digestMap, children);
}
public void clear() {
@@ -46,36 +51,22 @@ public class NestedSetFingerprintCache {
}
@SuppressWarnings("unchecked")
- private <T> byte[] getBytes(
- CommandLineItem.MapFn<? super T> mapFn, Map<Object, byte[]> fingerprints, Object children) {
- byte[] bytes = fingerprints.get(children);
- if (bytes == null) {
- if (children instanceof Object[]) {
- Fingerprint fingerprint = new Fingerprint();
+ private <T> void addToFingerprint(
+ CommandLineItem.MapFn<? super T> mapFn,
+ Fingerprint fingerprint,
+ DigestMap digestMap,
+ Object children) {
+ if (children instanceof Object[]) {
+ if (!digestMap.readDigest(children, fingerprint)) {
+ Fingerprint childrenFingerprint = new Fingerprint();
for (Object child : (Object[]) children) {
- if (child instanceof Object[]) {
- fingerprint.addBytes(getBytes(mapFn, fingerprints, child));
- } else {
- addToFingerprint(mapFn, fingerprint, (T) child);
- }
+ addToFingerprint(mapFn, childrenFingerprint, digestMap, child);
}
- bytes = fingerprint.digestAndReset();
-
- // There is no point memoizing anything except the multi-item case,
- // since the single-item case gets inlined into its parents anyway,
- // and the empty set is a singleton
- fingerprints.put(children, bytes);
- } else if (children != NestedSet.EMPTY_CHILDREN) {
- // Single item
- Fingerprint fingerprint = new Fingerprint();
- addToFingerprint(mapFn, fingerprint, (T) children);
- bytes = fingerprint.digestAndReset();
- } else {
- // Empty nested set
- bytes = EMPTY_SET_BYTES;
+ digestMap.insertAndReadDigest(children, childrenFingerprint, fingerprint);
}
+ } else {
+ addToFingerprint(mapFn, fingerprint, (T) children);
}
- return bytes;
}
@VisibleForTesting
@@ -84,7 +75,7 @@ public class NestedSetFingerprintCache {
fingerprint.addString(mapFn.expandToCommandLine(object));
}
- private static Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> createMap() {
+ private static Map<CommandLineItem.MapFn<?>, DigestMap> createMap() {
return new ConcurrentHashMap<>();
}
}
diff --git a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
index c2119eeccb..ddb71b4aaf 100644
--- a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
+++ b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java
@@ -20,6 +20,7 @@ import com.google.devtools.build.lib.vfs.PathFragment;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
import java.nio.charset.StandardCharsets;
+import java.security.DigestException;
import java.security.DigestOutputStream;
import java.security.MessageDigest;
import java.security.NoSuchAlgorithmException;
@@ -73,6 +74,26 @@ public final class Fingerprint {
return md5.digest();
}
+ /**
+ * Completes the hash computation by doing final operations and resets the underlying state,
+ * allowing this instance to be used again.
+ *
+ * <p>Instead of returning a digest, this method writes the digest straight into the supplied byte
+ * array, at the given offset.
+ *
+ * @see java.security.MessageDigest#digest()
+ */
+ public void digestAndReset(byte[] buf, int offset, int len) {
+ try {
+ codedOut.flush();
+ md5.digest(buf, offset, len);
+ } catch (IOException e) {
+ throw new IllegalStateException("failed to flush", e);
+ } catch (DigestException e) {
+ throw new IllegalStateException("failed to digest", e);
+ }
+ }
+
/** Same as {@link #digestAndReset()}, except returns the digest in hex string form. */
public String hexDigestAndReset() {
return hexDigest(digestAndReset());
diff --git a/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java b/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java
new file mode 100644
index 0000000000..35fb7ed3da
--- /dev/null
+++ b/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java
@@ -0,0 +1,110 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.collect.nestedset;
+
+import static com.google.common.truth.Truth.assertThat;
+
+import com.google.devtools.build.lib.util.Fingerprint;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Random;
+import java.util.concurrent.atomic.AtomicBoolean;
+import java.util.concurrent.atomic.AtomicReference;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.JUnit4;
+
+/** Tests for {@link DigestMap}. */
+@RunWith(JUnit4.class)
+public class DigestMapTest {
+
+ @Test
+ public void simpleTest() {
+ int count = 128; // Must be smaller than byte for this test or we'll overflow
+ Object[] keys = new Object[count];
+ for (int i = 0; i < count; ++i) {
+ keys[i] = new Object();
+ }
+
+ DigestMap digestMap = new DigestMap(16, 4);
+ for (int i = 0; i < count; ++i) {
+ Fingerprint digest = new Fingerprint().addInt(i);
+ Fingerprint fingerprint = new Fingerprint();
+ digestMap.insertAndReadDigest(keys[i], digest, fingerprint);
+ Fingerprint reference =
+ new Fingerprint().addBytes(new Fingerprint().addInt(i).digestAndReset());
+ assertThat(fingerprint.hexDigestAndReset()).isEqualTo(reference.hexDigestAndReset());
+ }
+ for (int i = 0; i < count; ++i) {
+ Fingerprint fingerprint = new Fingerprint();
+ assertThat(digestMap.readDigest(keys[i], fingerprint)).isTrue();
+ Fingerprint reference =
+ new Fingerprint().addBytes(new Fingerprint().addInt(i).digestAndReset());
+ assertThat(fingerprint.hexDigestAndReset()).isEqualTo(reference.hexDigestAndReset());
+ }
+ }
+
+ @Test
+ public void concurrencyTest() throws Exception {
+ int count = 128; // Must be smaller than byte for this test or we'll overflow
+ Object[] keys = new Object[count];
+ for (int i = 0; i < count; ++i) {
+ keys[i] = new Object();
+ }
+ DigestMap digestMap = new DigestMap(16, 4);
+
+ AtomicBoolean done = new AtomicBoolean();
+ AtomicReference<Exception> exception = new AtomicReference<>();
+ List<Thread> threads = new ArrayList<>();
+ int threadCount = 16;
+ for (int i = 0; i < threadCount; ++i) {
+ Thread thread =
+ new Thread(
+ () -> {
+ Random random = new Random();
+ while (!done.get()) {
+ int index = random.nextInt(count);
+ Object key = keys[index];
+ Fingerprint fingerprint = new Fingerprint();
+ if (!digestMap.readDigest(key, fingerprint)) {
+ Fingerprint digest = new Fingerprint().addInt(index);
+ digestMap.insertAndReadDigest(key, digest, fingerprint);
+ }
+ Fingerprint reference =
+ new Fingerprint().addBytes(new Fingerprint().addInt(index).digestAndReset());
+ String hexDigest = fingerprint.hexDigestAndReset();
+ String referenceDigest = reference.hexDigestAndReset();
+ if (!hexDigest.equals(referenceDigest)) {
+ exception.set(
+ new IllegalStateException(
+ String.format(
+ "Digests are not equal: %s != %s, index %d",
+ hexDigest, referenceDigest, index)));
+ done.set(true);
+ }
+ }
+ });
+ thread.start();
+ threads.add(thread);
+ }
+ Thread.sleep(1000);
+ done.set(true);
+ for (int i = 0; i < threadCount; ++i) {
+ threads.get(i).join(1000);
+ }
+ if (exception.get() != null) {
+ throw exception.get();
+ }
+ }
+}