aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools
diff options
context:
space:
mode:
authorGravatar cpeyser <cpeyser@google.com>2018-04-19 10:27:55 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-04-19 10:30:22 -0700
commit9aebb11d3af94648980964596ab7907bf022eaed (patch)
treed9010bbe9caf240451d571de4557283a1524193c /src/main/java/com/google/devtools
parent6fcfc537398dd6d1836ef5cd32686d3b74ed088c (diff)
Introduce NestedSetStore, which persists information used for NestedSet serialization.
PiperOrigin-RevId: 193536486
Diffstat (limited to 'src/main/java/com/google/devtools')
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD1
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java4
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecWithStore.java64
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java221
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/ByteStringCodec.java41
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java8
6 files changed, 283 insertions, 56 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
index 64b7a2cbca..a1a5bb259d 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
@@ -13,6 +13,7 @@ java_library(
"NestedSet.java",
"NestedSetBuilder.java",
"NestedSetCodecWithStore.java",
+ "NestedSetStore.java",
"NestedSetView.java",
"NestedSetVisitor.java",
"Order.java",
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java
index d4540f6c9e..7ff6da4a3f 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java
@@ -20,6 +20,7 @@ import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableSet;
import com.google.devtools.build.lib.collect.compacthashset.CompactHashSet;
import com.google.devtools.build.lib.skyframe.serialization.autocodec.AutoCodec;
+import com.google.protobuf.ByteString;
import java.util.AbstractCollection;
import java.util.Arrays;
import java.util.Collection;
@@ -95,6 +96,9 @@ public final class NestedSet<E> implements Iterable<E> {
if (member instanceof Object[]) {
throw new IllegalArgumentException("cannot store Object[] in NestedSet");
}
+ if (member instanceof ByteString) {
+ throw new IllegalArgumentException("cannot store ByteString in NestedSet");
+ }
if (!alreadyInserted.contains(member)) {
children[n++] = member;
}
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecWithStore.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecWithStore.java
index 3c434c95e2..93df1e378d 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecWithStore.java
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecWithStore.java
@@ -13,9 +13,6 @@
// limitations under the License.
package com.google.devtools.build.lib.collect.nestedset;
-import com.google.common.hash.Hashing;
-import com.google.common.hash.HashingOutputStream;
-import com.google.common.io.ByteStreams;
import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
import com.google.devtools.build.lib.skyframe.serialization.SerializationConstants;
@@ -26,7 +23,6 @@ import com.google.protobuf.ByteString;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
-import java.util.concurrent.ConcurrentHashMap;
/**
* Codec for {@link NestedSet} that uses the {@link NestedSetStore}.
@@ -36,10 +32,12 @@ import java.util.concurrent.ConcurrentHashMap;
*/
public class NestedSetCodecWithStore<T> implements ObjectCodec<NestedSet<T>> {
- private final ConcurrentHashMap<ByteString, Object> fingerprintToContents =
- new ConcurrentHashMap<>();
- private final ConcurrentHashMap<Object, ByteString> contentsToFingerprint =
- new ConcurrentHashMap<>();
+ private final NestedSetStore nestedSetStore;
+
+ /** Creates a NestedSetCodecWithStore that will use the given {@link NestedSetStore}. */
+ public NestedSetCodecWithStore(NestedSetStore nestedSetStore) {
+ this.nestedSetStore = nestedSetStore;
+ }
@SuppressWarnings("unchecked")
@Override
@@ -64,7 +62,7 @@ public class NestedSetCodecWithStore<T> implements ObjectCodec<NestedSet<T>> {
if (obj.isEmpty()) {
return;
}
- ByteString fingerprint = serializeToFingerprint(obj.rawChildren(), context);
+ ByteString fingerprint = nestedSetStore.computeFingerprintAndStore(obj.rawChildren(), context);
codedOut.writeByteArrayNoTag(fingerprint.toByteArray());
}
@@ -83,53 +81,7 @@ public class NestedSetCodecWithStore<T> implements ObjectCodec<NestedSet<T>> {
}
ByteString fingerprint = ByteString.copyFrom(codedIn.readByteArray());
- Object members = fingerprintToContents.get(fingerprint);
+ Object members = nestedSetStore.getContentsAndDeserialize(fingerprint, context);
return new NestedSet<>(order, members);
}
-
- private ByteString serializeToFingerprint(
- Object children, SerializationContext serializationContext) throws SerializationException {
- // For every fingerprint computation, we need to use a new memoization table. This is required
- // to guarantee that the same child will always have the same fingerprint - otherwise,
- // differences in memoization context could cause part of a child to be memoized in one
- // fingerprinting but not in the other. We expect this clearing of memoization state to be a
- // major source of extra work over the naive serialization approach. The same value may have to
- // be serialized many times across separate fingerprintings.
- SerializationContext newSerializationContext = serializationContext.getNewMemoizingContext();
-
- HashingOutputStream hashingOutputStream =
- new HashingOutputStream(Hashing.md5(), ByteStreams.nullOutputStream());
- CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(hashingOutputStream);
-
- try {
- if (children instanceof Object[]) {
- for (Object child : (Object[]) children) {
- if (child instanceof Object[]) {
- ByteString fingerprint = contentsToFingerprint.get(child);
- // If this fingerprint is not yet known, we recurse to compute it.
- if (fingerprint == null) {
- fingerprint = serializeToFingerprint(child, serializationContext);
- }
- codedOutputStream.writeBytesNoTag(fingerprint);
- } else {
- newSerializationContext.serialize(child, codedOutputStream);
- }
- }
- } else {
- newSerializationContext.serialize(children, codedOutputStream);
- }
- codedOutputStream.flush();
- } catch (IOException e) {
- throw new SerializationException(
- "Could not serialize " + children + ": " + e.getMessage(), e);
- }
-
- ByteString fingerprint = ByteString.copyFrom(hashingOutputStream.hash().asBytes());
-
- // Update the bimap
- fingerprintToContents.put(fingerprint, children);
- contentsToFingerprint.put(children, fingerprint);
-
- return fingerprint;
- }
}
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java
new file mode 100644
index 0000000000..9ad55efee5
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetStore.java
@@ -0,0 +1,221 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.collect.nestedset;
+
+import com.google.common.base.Preconditions;
+import com.google.common.collect.MapMaker;
+import com.google.common.hash.Hashing;
+import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
+import com.google.devtools.build.lib.skyframe.serialization.SerializationConstants;
+import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
+import com.google.devtools.build.lib.skyframe.serialization.SerializationException;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.CodedOutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+import javax.annotation.Nullable;
+
+/**
+ * Supports association between fingerprints and NestedSet contents. A single NestedSetStore
+ * instance should be globally available across a single process.
+ *
+ * <p>Maintains the fingerprint -> contents side of the bimap by decomposing nested Object[]'s.
+ *
+ * <p>For example, suppose the NestedSet A can be drawn as:
+ *
+ * <pre>
+ * A
+ * / \
+ * B C
+ * / \
+ * D E
+ * </pre>
+ *
+ * <p>Then, in memory, A = [[D, E], C]. To store the NestedSet, we would rely on the fingerprint
+ * value FPb = fingerprint([D, E]) and write
+ *
+ * <pre>A -> fingerprint(FPb, C)</pre>
+ *
+ * <p>On retrieval, A will be reconstructed by first retrieving A using its fingerprint, and then
+ * recursively retrieving B using its fingerprint.
+ */
+public class NestedSetStore {
+
+ /** Stores fingerprint -> NestedSet associations. */
+ interface NestedSetStorageEndpoint {
+ /** Associates a fingerprint with the serialized representation of some NestedSet contents. */
+ void put(ByteString fingerprint, byte[] serializedBytes);
+
+ /**
+ * Retrieves the serialized bytes for the NestedSet contents associated with this fingerprint.
+ */
+ byte[] get(ByteString fingerprint);
+ }
+
+ /** An in-memory {@link NestedSetStorageEndpoint} */
+ private static class InMemoryNestedSetStorageEndpoint implements NestedSetStorageEndpoint {
+ private final ConcurrentHashMap<ByteString, byte[]> fingerprintToContents =
+ new ConcurrentHashMap<>();
+
+ @Override
+ public void put(ByteString fingerprint, byte[] serializedBytes) {
+ fingerprintToContents.put(fingerprint, serializedBytes);
+ }
+
+ @Override
+ public byte[] get(ByteString fingerprint) {
+ return fingerprintToContents.get(fingerprint);
+ }
+ }
+
+ /** An in-memory cache for fingerprint <-> NestedSet associations. */
+ private static class NestedSetCache {
+ private final Map<ByteString, Object> fingerprintToContents =
+ new MapMaker()
+ .concurrencyLevel(SerializationConstants.DESERIALIZATION_POOL_SIZE)
+ .weakValues()
+ .makeMap();
+
+ /** Object/Object[] contents to fingerprint. Maintained for fast fingerprinting. */
+ private final Map<Object, ByteString> contentsToFingerprint =
+ new MapMaker()
+ .concurrencyLevel(SerializationConstants.DESERIALIZATION_POOL_SIZE)
+ .weakKeys()
+ .makeMap();
+
+ /**
+ * Returns the NestedSet contents associated with the given fingerprint. Returns null if the
+ * fingerprint is not known.
+ */
+ @Nullable
+ public Object contentsForFingerprint(ByteString fingerprint) {
+ return fingerprintToContents.get(fingerprint);
+ }
+
+ /**
+ * Retrieves the fingerprint associated with the given NestedSet contents, or null if the given
+ * contents are not known.
+ */
+ @Nullable
+ public ByteString fingerprintForContents(Object contents) {
+ return contentsToFingerprint.get(contents);
+ }
+
+ /** Associates the provided fingerprint and NestedSet contents. */
+ public void put(ByteString fingerprint, Object contents) {
+ contentsToFingerprint.put(contents, fingerprint);
+ fingerprintToContents.put(fingerprint, contents);
+ }
+ }
+
+ private final NestedSetCache nestedSetCache = new NestedSetCache();
+ private final NestedSetStorageEndpoint nestedSetStorageEndpoint =
+ new InMemoryNestedSetStorageEndpoint();
+
+ /**
+ * Computes and returns the fingerprint for the given NestedSet contents using the given {@link
+ * SerializationContext}, while also associating the contents with the computed fingerprint in the
+ * store. Recursively does the same for all transitive members (i.e. Object[] members) of the
+ * provided contents.
+ */
+ ByteString computeFingerprintAndStore(Object contents, SerializationContext serializationContext)
+ throws SerializationException {
+ ByteString priorFingerprint = nestedSetCache.fingerprintForContents(contents);
+ if (priorFingerprint != null) {
+ return priorFingerprint;
+ }
+
+ // For every fingerprint computation, we need to use a new memoization table. This is required
+ // to guarantee that the same child will always have the same fingerprint - otherwise,
+ // differences in memoization context could cause part of a child to be memoized in one
+ // fingerprinting but not in the other. We expect this clearing of memoization state to be a
+ // major source of extra work over the naive serialization approach. The same value may have to
+ // be serialized many times across separate fingerprintings.
+ SerializationContext newSerializationContext = serializationContext.getNewMemoizingContext();
+ ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
+ CodedOutputStream codedOutputStream = CodedOutputStream.newInstance(byteArrayOutputStream);
+
+ try {
+ if (contents instanceof Object[]) {
+ Object[] contentsArray = (Object[]) contents;
+ codedOutputStream.writeInt32NoTag(contentsArray.length);
+ for (Object child : contentsArray) {
+ if (child instanceof Object[]) {
+ ByteString fingerprint = computeFingerprintAndStore(child, serializationContext);
+ newSerializationContext.serialize(fingerprint, codedOutputStream);
+ } else {
+ newSerializationContext.serialize(child, codedOutputStream);
+ }
+ }
+ } else {
+ codedOutputStream.writeInt32NoTag(1);
+ newSerializationContext.serialize(contents, codedOutputStream);
+ }
+ codedOutputStream.flush();
+ } catch (IOException e) {
+ throw new SerializationException("Could not serialize " + contents, e);
+ }
+
+ byte[] serializedBytes = byteArrayOutputStream.toByteArray();
+ ByteString fingerprint =
+ ByteString.copyFrom(Hashing.md5().hashBytes(serializedBytes).asBytes());
+
+ nestedSetCache.put(fingerprint, contents);
+ nestedSetStorageEndpoint.put(fingerprint, serializedBytes);
+
+ return fingerprint;
+ }
+
+ /** Retrieves and deserializes the NestedSet contents associated with the given fingerprint. */
+ public Object getContentsAndDeserialize(
+ ByteString fingerprint, DeserializationContext deserializationContext)
+ throws IOException, SerializationException {
+ Object contents = nestedSetCache.contentsForFingerprint(fingerprint);
+ if (contents != null) {
+ return contents;
+ }
+
+ byte[] retrieved =
+ Preconditions.checkNotNull(
+ nestedSetStorageEndpoint.get(fingerprint),
+ "Fingerprint %s not found in NestedSetStore",
+ fingerprint);
+ CodedInputStream codedIn = CodedInputStream.newInstance(retrieved);
+ DeserializationContext newDeserializationContext =
+ deserializationContext.getNewMemoizingContext();
+
+ int numberOfElements = codedIn.readInt32();
+ Object dereferencedContents;
+ if (numberOfElements > 1) {
+ Object[] dereferencedContentsArray = new Object[numberOfElements];
+ for (int i = 0; i < numberOfElements; i++) {
+ Object deserializedElement = newDeserializationContext.deserialize(codedIn);
+ dereferencedContentsArray[i] =
+ deserializedElement instanceof ByteString
+ ? getContentsAndDeserialize(
+ (ByteString) deserializedElement, deserializationContext)
+ : deserializedElement;
+ }
+ dereferencedContents = dereferencedContentsArray;
+ } else {
+ dereferencedContents = newDeserializationContext.deserialize(codedIn);
+ }
+
+ nestedSetCache.put(fingerprint, dereferencedContents);
+ return dereferencedContents;
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/ByteStringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/ByteStringCodec.java
new file mode 100644
index 0000000000..613fa6a98e
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/ByteStringCodec.java
@@ -0,0 +1,41 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.skyframe.serialization;
+
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.CodedOutputStream;
+import java.io.IOException;
+
+/** Codec for {@link ByteString}. */
+public class ByteStringCodec implements ObjectCodec<ByteString> {
+
+ @Override
+ public Class<? extends ByteString> getEncodedClass() {
+ return ByteString.class;
+ }
+
+ @Override
+ public void serialize(SerializationContext context, ByteString obj, CodedOutputStream codedOut)
+ throws SerializationException, IOException {
+ codedOut.writeByteArrayNoTag(obj.toByteArray());
+ }
+
+ @Override
+ public ByteString deserialize(DeserializationContext context, CodedInputStream codedIn)
+ throws SerializationException, IOException {
+ return ByteString.copyFrom(codedIn.readByteArray());
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java
index 2080517e1f..801dfc5149 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/DeserializationContext.java
@@ -110,6 +110,14 @@ public class DeserializationContext {
if (deserializer != null) {
return this;
}
+ return getNewMemoizingContext();
+ }
+
+ /**
+ * Returns a memoizing {@link DeserializationContext}, as getMemoizingContext above. Unlike
+ * getMemoizingContext, this method is not idempotent - the returned context will always be fresh.
+ */
+ public DeserializationContext getNewMemoizingContext() {
return new DeserializationContext(this.registry, this.dependencies, new Deserializer());
}
}