aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/collect/nestedset
diff options
context:
space:
mode:
authorGravatar tomlu <tomlu@google.com>2017-11-27 10:02:19 -0800
committerGravatar Copybara-Service <copybara-piper@google.com>2017-11-27 10:04:32 -0800
commit4b32d5cf98e1edbe864e6c72ba7db90944214ff2 (patch)
tree003293d3d10feb68c4655cafb57ec3ebc6a6239e /src/main/java/com/google/devtools/build/lib/collect/nestedset
parent3e7ef36aa6f25200580fd235e71f9cb4cc8284aa (diff)
Support nested set serialization.
PiperOrigin-RevId: 177032673
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/collect/nestedset')
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD21
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java14
-rw-r--r--src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java206
3 files changed, 236 insertions, 5 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
index 86639cf3bc..6c6ef3eb20 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD
@@ -9,9 +9,13 @@ filegroup(
# Library of collection utilities.
java_library(
name = "nestedset",
- srcs = glob([
- "*.java",
- ]),
+ srcs = [
+ "NestedSet.java",
+ "NestedSetBuilder.java",
+ "NestedSetView.java",
+ "NestedSetVisitor.java",
+ "Order.java",
+ ],
deps = [
"//src/main/java/com/google/devtools/build/lib/collect/compacthashset",
"//src/main/java/com/google/devtools/build/lib/concurrent",
@@ -19,3 +23,14 @@ java_library(
"//third_party:jsr305",
],
)
+
+java_library(
+ name = "serialization",
+ srcs = ["NestedSetCodec.java"],
+ deps = [
+ ":nestedset",
+ "//src/main/java/com/google/devtools/build/lib/skyframe/serialization",
+ "//third_party:guava",
+ "//third_party/protobuf:protobuf_java",
+ ],
+)
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java
index 89db4ecafc..b9e43089c0 100644
--- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java
@@ -37,11 +37,11 @@ import javax.annotation.Nullable;
public final class NestedSet<E> implements Iterable<E> {
private final Order order;
- private final Object children;
+ final Object children;
private byte[] memo;
private static final byte[] LEAF_MEMO = {};
- private static final Object[] EMPTY_CHILDREN = {};
+ static final Object[] EMPTY_CHILDREN = {};
/**
* Construct an empty NestedSet. Should only be called by Order's class initializer.
@@ -134,6 +134,16 @@ public final class NestedSet<E> implements Iterable<E> {
}
}
+ // Only used by deserialization
+ NestedSet(Order order, Object children) {
+ this.order = order;
+ this.children = children;
+ boolean hasChildren =
+ children instanceof Object[]
+ && (Arrays.stream((Object[]) children).anyMatch(child -> child instanceof Object[]));
+ this.memo = hasChildren ? null : LEAF_MEMO;
+ }
+
/**
* Returns the ordering of this nested set.
*/
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java
new file mode 100644
index 0000000000..eee5edc136
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java
@@ -0,0 +1,206 @@
+// Copyright 2017 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+package com.google.devtools.build.lib.collect.nestedset;
+
+import com.google.common.base.Preconditions;
+import com.google.common.hash.Hashing;
+import com.google.common.hash.HashingOutputStream;
+import com.google.devtools.build.lib.skyframe.serialization.EnumCodec;
+import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
+import com.google.devtools.build.lib.skyframe.serialization.SerializationException;
+import com.google.protobuf.ByteString;
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.CodedOutputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.IdentityHashMap;
+import java.util.LinkedHashSet;
+import java.util.Map;
+
+/**
+ * Codec for {@link NestedSet}.
+ *
+ * <p>Nested sets are serialized by sorting the sub-graph in topological order, then writing the
+ * nodes in that order. As a node is written we remember its digest. When serializing a node higher
+ * in the graph, we replace any edge to another nested set with its digest.
+ */
+public class NestedSetCodec<T> implements ObjectCodec<NestedSet<T>> {
+
+ private static final EnumCodec<Order> orderCodec = new EnumCodec<>(Order.class);
+ private final ObjectCodec<T> objectCodec;
+
+ public NestedSetCodec(ObjectCodec<T> objectCodec) {
+ this.objectCodec = objectCodec;
+ }
+
+ @SuppressWarnings("unchecked")
+ @Override
+ public Class<NestedSet<T>> getEncodedClass() {
+ // Compiler doesn't like cast from Class<NestedSet> -> Class<NestedSet<T>>, but it
+ // does allow what we see below. Type is lost at runtime anyway, so while gross this works.
+ return (Class<NestedSet<T>>) ((Class<?>) NestedSet.class);
+ }
+
+ @Override
+ public void serialize(NestedSet<T> obj, CodedOutputStream codedOut)
+ throws SerializationException, IOException {
+ // Topo sort the nested set to ensure digests are available for children at time of writing
+ Collection<Object> topoSortedChildren = getTopologicallySortedChildren(obj);
+ Map<Object, byte[]> childToDigest = new IdentityHashMap<>();
+ codedOut.writeInt32NoTag(topoSortedChildren.size());
+ orderCodec.serialize(obj.getOrder(), codedOut);
+ for (Object children : topoSortedChildren) {
+ serializeOneNestedSet(children, codedOut, childToDigest);
+ }
+ }
+
+ @Override
+ public NestedSet<T> deserialize(CodedInputStream codedIn)
+ throws SerializationException, IOException {
+ Map<ByteString, Object> digestToChild = new HashMap<>();
+ int nestedSetCount = codedIn.readInt32();
+ Preconditions.checkState(
+ nestedSetCount >= 1,
+ "Should have at least serialized one nested set, got: %d",
+ nestedSetCount);
+ Order order = orderCodec.deserialize(codedIn);
+ Object children = null;
+ for (int i = 0; i < nestedSetCount; ++i) {
+ // Update var, the last one in the list is the top level nested set
+ children = deserializeOneNestedSet(codedIn, digestToChild);
+ }
+ return createNestedSet(order, children);
+ }
+
+ private void serializeOneNestedSet(
+ Object children, CodedOutputStream codedOut, Map<Object, byte[]> childToDigest)
+ throws IOException, SerializationException {
+ // Serialize nested set into an inner byte array so we can take its digest
+ ByteArrayOutputStream childOutputStream = new ByteArrayOutputStream();
+ HashingOutputStream hashingOutputStream =
+ new HashingOutputStream(Hashing.md5(), childOutputStream);
+ CodedOutputStream childCodedOut = CodedOutputStream.newInstance(hashingOutputStream);
+ if (children instanceof Object[]) {
+ serializeMultiItemChildArray((Object[]) children, childToDigest, childCodedOut);
+ } else if (children != NestedSet.EMPTY_CHILDREN) {
+ serializeSingleItemChildArray(children, childCodedOut);
+ } else {
+ // Empty set
+ childCodedOut.writeInt32NoTag(0);
+ }
+ childCodedOut.flush();
+ byte[] digest = hashingOutputStream.hash().asBytes();
+ codedOut.writeByteArrayNoTag(digest);
+ byte[] childBytes = childOutputStream.toByteArray();
+ codedOut.writeByteArrayNoTag(childBytes);
+ childToDigest.put(children, digest);
+ }
+
+ private void serializeMultiItemChildArray(
+ Object[] children, Map<Object, byte[]> childToDigest, CodedOutputStream childCodedOut)
+ throws IOException, SerializationException {
+ childCodedOut.writeInt32NoTag(children.length);
+ for (Object child : children) {
+ if (child instanceof Object[]) {
+ byte[] digest =
+ Preconditions.checkNotNull(
+ childToDigest.get(child),
+ "Digest not available; Are nested sets serialized in the right order?");
+ childCodedOut.writeBoolNoTag(true);
+ childCodedOut.writeByteArrayNoTag(digest);
+ } else {
+ childCodedOut.writeBoolNoTag(false);
+ objectCodec.serialize(cast(child), childCodedOut);
+ }
+ }
+ }
+
+ private void serializeSingleItemChildArray(Object children, CodedOutputStream childCodedOut)
+ throws IOException, SerializationException {
+ childCodedOut.writeInt32NoTag(1);
+ T singleChild = cast(children);
+ objectCodec.serialize(singleChild, childCodedOut);
+ }
+
+ private Object deserializeOneNestedSet(
+ CodedInputStream codedIn, Map<ByteString, Object> digestToChild)
+ throws SerializationException, IOException {
+ ByteString digest = codedIn.readBytes();
+ CodedInputStream childCodedIn = codedIn.readBytes().newCodedInput();
+ childCodedIn.enableAliasing(true); // Allow efficient views of byte slices when reading digests
+ int childCount = childCodedIn.readInt32();
+ final Object result;
+ if (childCount > 1) {
+ result = deserializeMultipleItemChildArray(digestToChild, childCodedIn, childCount);
+ } else if (childCount == 1) {
+ result = objectCodec.deserialize(childCodedIn);
+ } else {
+ result = NestedSet.EMPTY_CHILDREN;
+ }
+ digestToChild.put(digest, result);
+ return result;
+ }
+
+ private Object deserializeMultipleItemChildArray(
+ Map<ByteString, Object> digestToChild, CodedInputStream childCodedIn, int childCount)
+ throws IOException, SerializationException {
+ Object[] children = new Object[childCount];
+ for (int i = 0; i < childCount; ++i) {
+ boolean isTransitiveEntry = childCodedIn.readBool();
+ if (isTransitiveEntry) {
+ ByteString digest = childCodedIn.readBytes();
+ children[i] =
+ Preconditions.checkNotNull(digestToChild.get(digest), "Transitive nested set missing");
+ } else {
+ children[i] = objectCodec.deserialize(childCodedIn);
+ }
+ }
+ return children;
+ }
+
+ @SuppressWarnings("unchecked")
+ private T cast(Object object) {
+ return (T) object;
+ }
+
+ private static Collection<Object> getTopologicallySortedChildren(
+ NestedSet<?> nestedSet) {
+ LinkedHashSet<Object> result = new LinkedHashSet<>();
+ dfs(result, nestedSet.children);
+ return result;
+ }
+
+ private static <T> NestedSet<T> createNestedSet(Order order, Object children) {
+ if (children == NestedSet.EMPTY_CHILDREN) {
+ return order.emptySet();
+ }
+ return new NestedSet<>(order, children);
+ }
+
+ private static void dfs(LinkedHashSet<Object> sets, Object children) {
+ if (sets.contains(children)) {
+ return;
+ }
+ if (children instanceof Object[]) {
+ for (Object child : (Object[]) children) {
+ if (child instanceof Object[]) {
+ dfs(sets, child);
+ }
+ }
+ }
+ sets.add(children);
+ }
+}