diff options
author | tomlu <tomlu@google.com> | 2018-01-31 11:18:20 -0800 |
---|---|---|
committer | Copybara-Service <copybara-piper@google.com> | 2018-01-31 11:20:14 -0800 |
commit | 42c2c729597eb7e8a87c62dce435781f47732d2c (patch) | |
tree | c524951169166f05b61e470a47eafab73dfc0240 | |
parent | 223ed93582dbb730eb92c80cc6350dcec116ec0a (diff) |
Add memory-efficient map for storing nested set -> digest.
Instead of using ConcurrentHashMap, we use a dead-simple open addressed hash hable with a giant byte array with 16-byte slots. We then read or write fingerprints straight into and out of the array, obviating the need to generate intermediate garbage.
Locking mechanism is a read-write lock. This should be faster than full synchronisation for read-heavy loads.
RELNOTES: None
PiperOrigin-RevId: 184019301
5 files changed, 340 insertions, 34 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD index 92e058bb1f..bb8fae4abf 100644 --- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD @@ -41,7 +41,10 @@ java_library( java_library( name = "fingerprint_cache", - srcs = ["NestedSetFingerprintCache.java"], + srcs = [ + "DigestMap.java", + "NestedSetFingerprintCache.java", + ], deps = [ ":nestedset", "//src/main/java/com/google/devtools/build/lib:commandline_item", diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java new file mode 100644 index 0000000000..71e5f85348 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/DigestMap.java @@ -0,0 +1,181 @@ +// Copyright 2018 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.collect.nestedset; + +import com.google.common.base.Preconditions; +import com.google.devtools.build.lib.util.Fingerprint; +import java.util.concurrent.locks.StampedLock; + +/** + * Map of key -> [digest bytes]. + * + * <p>This class uses a single array of keys and a big single block of bytes. To read/store digests + * we index straight into the byte array. This is more memory-efficient and uses less GC than a + * corresponding Map<Object, byte[]>. + * + * <p>Keys use reference equality. + */ +final class DigestMap { + private final int digestSize; + private final StampedLock readWriteLock = new StampedLock(); + private Object[] keys; + private byte[] bytes; + private int tableSize; + private int nextResize; + private int size; + + DigestMap(int digestSize, int initialSize) { + Preconditions.checkArgument( + initialSize > 0 && (initialSize & (initialSize - 1)) == 0, + "initialSize must be a power of 2 greater than 0"); + this.digestSize = digestSize; + this.keys = new Object[initialSize]; + this.bytes = new byte[initialSize * digestSize]; + this.tableSize = initialSize; + this.nextResize = getNextResize(initialSize); + } + + /** Finds the digest for the corresponding key and adds it to the passed fingerprint. */ + boolean readDigest(Object key, Fingerprint fingerprint) { + long stamp = readWriteLock.readLock(); + try { + int index = findKeyReadLocked(key); + if (index >= 0) { + fingerprint.addBytes(bytes, index * digestSize, digestSize); + return true; + } else { + return false; + } + } finally { + readWriteLock.unlockRead(stamp); + } + } + + // Finds the key in the array. Must be called under read lock. + private int findKeyReadLocked(Object key) { + int hash = hash(key); + int index = hash & (tableSize - 1); + while (true) { + Object currentKey = keys[index]; + if (currentKey == key) { + return index; + } else if (currentKey == null) { + return -1; + } + index = probe(index, tableSize); + } + } + + /** + * Inserts a digest for the corresponding key, then immediately reads it into another fingerprint. + * + * <p>This is equivalent to <code> + * digestMap.insertDigest(key, digest.digestAndReset(); digestMap.readDigest(key, readTo); </code> + * but it will be faster. + * + * @param key The key to insert. + * @param digest The fingerprint to insert. This will reset the fingerprint instance. + * @param readTo A fingerprint to read the just-added fingerprint into. + */ + void insertAndReadDigest(Object key, Fingerprint digest, Fingerprint readTo) { + long stamp = readWriteLock.writeLock(); + try { + int index = insertKeyWriteLocked(key); + digest.digestAndReset(bytes, index * digestSize, digestSize); + readTo.addBytes(bytes, index * digestSize, digestSize); + } finally { + readWriteLock.unlockWrite(stamp); + } + } + + // Inserts a key into the array and returns the index. Must be called under write lock. + private int insertKeyWriteLocked(Object key) { + if (size >= nextResize) { + resizeTableWriteLocked(); + } + int hash = hash(key); + int index = hash & (tableSize - 1); + while (true) { + Object currentKey = keys[index]; + if (currentKey == null) { + keys[index] = key; + ++size; + return index; + } else if (currentKey == key) { + // Key is already present + return index; + } + index = probe(index, tableSize); + } + } + + private void resizeTableWriteLocked() { + int digestSize = this.digestSize; + int tableSize = this.tableSize; + Object[] keys = this.keys; + byte[] bytes = this.bytes; + int newTableSize = this.tableSize * 2; + Object[] newKeys = new Object[newTableSize]; + byte[] newBytes = new byte[newTableSize * digestSize]; + for (int i = 0; i < tableSize; ++i) { + Object key = keys[i]; + if (key != null) { + int newIndex = firstFreeIndex(newKeys, newTableSize, key); + newKeys[newIndex] = key; + System.arraycopy(bytes, i * digestSize, newBytes, newIndex * digestSize, digestSize); + } + } + this.tableSize = newTableSize; + this.keys = newKeys; + this.bytes = newBytes; + this.nextResize = getNextResize(newTableSize); + } + + private static int firstFreeIndex(Object[] keys, int tableSize, Object key) { + int hash = hash(key); + int index = hash & (tableSize - 1); + while (true) { + Object currentKey = keys[index]; + if (currentKey == null) { + return index; + } + index = probe(index, tableSize); + } + } + + private static int hash(Object key) { + return smear(System.identityHashCode(key)); + } + + private static int probe(int index, int tableSize) { + return (index + 1) & (tableSize - 1); + } + + private static int getNextResize(int newTableSize) { + // 75% load + return (newTableSize * 3) / 4; + } + + /* + * This method was rewritten in Java from an intermediate step of the Murmur hash function in + * http://code.google.com/p/smhasher/source/browse/trunk/MurmurHash3.cpp, which contained the + * following header: + * + * MurmurHash3 was written by Austin Appleby, and is placed in the public domain. The author + * hereby disclaims copyright to this source code. + */ + private static int smear(int hashCode) { + return 0x1b873593 * Integer.rotateLeft(hashCode * 0xcc9e2d51, 15); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java index b4d341143c..b48d4a2298 100644 --- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetFingerprintCache.java @@ -22,10 +22,11 @@ import java.util.concurrent.ConcurrentHashMap; /** Computes fingerprints for nested sets, reusing sub-computations from children. */ public class NestedSetFingerprintCache { - private static final byte[] EMPTY_SET_BYTES = new byte[] {}; + private static final int DIGEST_SIZE = 16; + private static final int EMPTY_SET_DIGEST = 104_395_303; /** Memoize the subresults. We have to have one cache per type of command item map function. */ - private Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> mapFnToFingerprints = createMap(); + private Map<CommandLineItem.MapFn<?>, DigestMap> mapFnToFingerprints = createMap(); public <T> void addNestedSetToFingerprint(Fingerprint fingerprint, NestedSet<T> nestedSet) { addNestedSetToFingerprint(CommandLineItem.MapFn.DEFAULT, fingerprint, nestedSet); @@ -33,12 +34,16 @@ public class NestedSetFingerprintCache { public <T> void addNestedSetToFingerprint( CommandLineItem.MapFn<? super T> mapFn, Fingerprint fingerprint, NestedSet<T> nestedSet) { - Map<Object, byte[]> fingerprints = - mapFnToFingerprints.computeIfAbsent(mapFn, k -> new ConcurrentHashMap<>()); + // Only top-level nested sets can be empty, so we can bail here + if (nestedSet.isEmpty()) { + fingerprint.addInt(EMPTY_SET_DIGEST); + return; + } + DigestMap digestMap = + mapFnToFingerprints.computeIfAbsent(mapFn, k -> new DigestMap(DIGEST_SIZE, 1024)); fingerprint.addInt(nestedSet.getOrder().ordinal()); Object children = nestedSet.rawChildren(); - byte[] bytes = getBytes(mapFn, fingerprints, children); - fingerprint.addBytes(bytes); + addToFingerprint(mapFn, fingerprint, digestMap, children); } public void clear() { @@ -46,36 +51,22 @@ public class NestedSetFingerprintCache { } @SuppressWarnings("unchecked") - private <T> byte[] getBytes( - CommandLineItem.MapFn<? super T> mapFn, Map<Object, byte[]> fingerprints, Object children) { - byte[] bytes = fingerprints.get(children); - if (bytes == null) { - if (children instanceof Object[]) { - Fingerprint fingerprint = new Fingerprint(); + private <T> void addToFingerprint( + CommandLineItem.MapFn<? super T> mapFn, + Fingerprint fingerprint, + DigestMap digestMap, + Object children) { + if (children instanceof Object[]) { + if (!digestMap.readDigest(children, fingerprint)) { + Fingerprint childrenFingerprint = new Fingerprint(); for (Object child : (Object[]) children) { - if (child instanceof Object[]) { - fingerprint.addBytes(getBytes(mapFn, fingerprints, child)); - } else { - addToFingerprint(mapFn, fingerprint, (T) child); - } + addToFingerprint(mapFn, childrenFingerprint, digestMap, child); } - bytes = fingerprint.digestAndReset(); - - // There is no point memoizing anything except the multi-item case, - // since the single-item case gets inlined into its parents anyway, - // and the empty set is a singleton - fingerprints.put(children, bytes); - } else if (children != NestedSet.EMPTY_CHILDREN) { - // Single item - Fingerprint fingerprint = new Fingerprint(); - addToFingerprint(mapFn, fingerprint, (T) children); - bytes = fingerprint.digestAndReset(); - } else { - // Empty nested set - bytes = EMPTY_SET_BYTES; + digestMap.insertAndReadDigest(children, childrenFingerprint, fingerprint); } + } else { + addToFingerprint(mapFn, fingerprint, (T) children); } - return bytes; } @VisibleForTesting @@ -84,7 +75,7 @@ public class NestedSetFingerprintCache { fingerprint.addString(mapFn.expandToCommandLine(object)); } - private static Map<CommandLineItem.MapFn<?>, Map<Object, byte[]>> createMap() { + private static Map<CommandLineItem.MapFn<?>, DigestMap> createMap() { return new ConcurrentHashMap<>(); } } diff --git a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java index c2119eeccb..ddb71b4aaf 100644 --- a/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java +++ b/src/main/java/com/google/devtools/build/lib/util/Fingerprint.java @@ -20,6 +20,7 @@ import com.google.devtools.build.lib.vfs.PathFragment; import com.google.protobuf.CodedOutputStream; import java.io.IOException; import java.nio.charset.StandardCharsets; +import java.security.DigestException; import java.security.DigestOutputStream; import java.security.MessageDigest; import java.security.NoSuchAlgorithmException; @@ -73,6 +74,26 @@ public final class Fingerprint { return md5.digest(); } + /** + * Completes the hash computation by doing final operations and resets the underlying state, + * allowing this instance to be used again. + * + * <p>Instead of returning a digest, this method writes the digest straight into the supplied byte + * array, at the given offset. + * + * @see java.security.MessageDigest#digest() + */ + public void digestAndReset(byte[] buf, int offset, int len) { + try { + codedOut.flush(); + md5.digest(buf, offset, len); + } catch (IOException e) { + throw new IllegalStateException("failed to flush", e); + } catch (DigestException e) { + throw new IllegalStateException("failed to digest", e); + } + } + /** Same as {@link #digestAndReset()}, except returns the digest in hex string form. */ public String hexDigestAndReset() { return hexDigest(digestAndReset()); diff --git a/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java b/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java new file mode 100644 index 0000000000..35fb7ed3da --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/collect/nestedset/DigestMapTest.java @@ -0,0 +1,110 @@ +// Copyright 2018 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.collect.nestedset; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.devtools.build.lib.util.Fingerprint; +import java.util.ArrayList; +import java.util.List; +import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicReference; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link DigestMap}. */ +@RunWith(JUnit4.class) +public class DigestMapTest { + + @Test + public void simpleTest() { + int count = 128; // Must be smaller than byte for this test or we'll overflow + Object[] keys = new Object[count]; + for (int i = 0; i < count; ++i) { + keys[i] = new Object(); + } + + DigestMap digestMap = new DigestMap(16, 4); + for (int i = 0; i < count; ++i) { + Fingerprint digest = new Fingerprint().addInt(i); + Fingerprint fingerprint = new Fingerprint(); + digestMap.insertAndReadDigest(keys[i], digest, fingerprint); + Fingerprint reference = + new Fingerprint().addBytes(new Fingerprint().addInt(i).digestAndReset()); + assertThat(fingerprint.hexDigestAndReset()).isEqualTo(reference.hexDigestAndReset()); + } + for (int i = 0; i < count; ++i) { + Fingerprint fingerprint = new Fingerprint(); + assertThat(digestMap.readDigest(keys[i], fingerprint)).isTrue(); + Fingerprint reference = + new Fingerprint().addBytes(new Fingerprint().addInt(i).digestAndReset()); + assertThat(fingerprint.hexDigestAndReset()).isEqualTo(reference.hexDigestAndReset()); + } + } + + @Test + public void concurrencyTest() throws Exception { + int count = 128; // Must be smaller than byte for this test or we'll overflow + Object[] keys = new Object[count]; + for (int i = 0; i < count; ++i) { + keys[i] = new Object(); + } + DigestMap digestMap = new DigestMap(16, 4); + + AtomicBoolean done = new AtomicBoolean(); + AtomicReference<Exception> exception = new AtomicReference<>(); + List<Thread> threads = new ArrayList<>(); + int threadCount = 16; + for (int i = 0; i < threadCount; ++i) { + Thread thread = + new Thread( + () -> { + Random random = new Random(); + while (!done.get()) { + int index = random.nextInt(count); + Object key = keys[index]; + Fingerprint fingerprint = new Fingerprint(); + if (!digestMap.readDigest(key, fingerprint)) { + Fingerprint digest = new Fingerprint().addInt(index); + digestMap.insertAndReadDigest(key, digest, fingerprint); + } + Fingerprint reference = + new Fingerprint().addBytes(new Fingerprint().addInt(index).digestAndReset()); + String hexDigest = fingerprint.hexDigestAndReset(); + String referenceDigest = reference.hexDigestAndReset(); + if (!hexDigest.equals(referenceDigest)) { + exception.set( + new IllegalStateException( + String.format( + "Digests are not equal: %s != %s, index %d", + hexDigest, referenceDigest, index))); + done.set(true); + } + } + }); + thread.start(); + threads.add(thread); + } + Thread.sleep(1000); + done.set(true); + for (int i = 0; i < threadCount; ++i) { + threads.get(i).join(1000); + } + if (exception.get() != null) { + throw exception.get(); + } + } +} |