diff options
author | 2017-11-27 10:02:19 -0800 | |
---|---|---|
committer | 2017-11-27 10:04:32 -0800 | |
commit | 4b32d5cf98e1edbe864e6c72ba7db90944214ff2 (patch) | |
tree | 003293d3d10feb68c4655cafb57ec3ebc6a6239e /src/main/java | |
parent | 3e7ef36aa6f25200580fd235e71f9cb4cc8284aa (diff) |
Support nested set serialization.
PiperOrigin-RevId: 177032673
Diffstat (limited to 'src/main/java')
4 files changed, 237 insertions, 6 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD index 86639cf3bc..6c6ef3eb20 100644 --- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD @@ -9,9 +9,13 @@ filegroup( # Library of collection utilities. java_library( name = "nestedset", - srcs = glob([ - "*.java", - ]), + srcs = [ + "NestedSet.java", + "NestedSetBuilder.java", + "NestedSetView.java", + "NestedSetVisitor.java", + "Order.java", + ], deps = [ "//src/main/java/com/google/devtools/build/lib/collect/compacthashset", "//src/main/java/com/google/devtools/build/lib/concurrent", @@ -19,3 +23,14 @@ java_library( "//third_party:jsr305", ], ) + +java_library( + name = "serialization", + srcs = ["NestedSetCodec.java"], + deps = [ + ":nestedset", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization", + "//third_party:guava", + "//third_party/protobuf:protobuf_java", + ], +) diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java index 89db4ecafc..b9e43089c0 100644 --- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java @@ -37,11 +37,11 @@ import javax.annotation.Nullable; public final class NestedSet<E> implements Iterable<E> { private final Order order; - private final Object children; + final Object children; private byte[] memo; private static final byte[] LEAF_MEMO = {}; - private static final Object[] EMPTY_CHILDREN = {}; + static final Object[] EMPTY_CHILDREN = {}; /** * Construct an empty NestedSet. Should only be called by Order's class initializer. @@ -134,6 +134,16 @@ public final class NestedSet<E> implements Iterable<E> { } } + // Only used by deserialization + NestedSet(Order order, Object children) { + this.order = order; + this.children = children; + boolean hasChildren = + children instanceof Object[] + && (Arrays.stream((Object[]) children).anyMatch(child -> child instanceof Object[])); + this.memo = hasChildren ? null : LEAF_MEMO; + } + /** * Returns the ordering of this nested set. */ diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java new file mode 100644 index 0000000000..eee5edc136 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java @@ -0,0 +1,206 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.collect.nestedset; + +import com.google.common.base.Preconditions; +import com.google.common.hash.Hashing; +import com.google.common.hash.HashingOutputStream; +import com.google.devtools.build.lib.skyframe.serialization.EnumCodec; +import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; +import com.google.devtools.build.lib.skyframe.serialization.SerializationException; +import com.google.protobuf.ByteString; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedHashSet; +import java.util.Map; + +/** + * Codec for {@link NestedSet}. + * + * <p>Nested sets are serialized by sorting the sub-graph in topological order, then writing the + * nodes in that order. As a node is written we remember its digest. When serializing a node higher + * in the graph, we replace any edge to another nested set with its digest. + */ +public class NestedSetCodec<T> implements ObjectCodec<NestedSet<T>> { + + private static final EnumCodec<Order> orderCodec = new EnumCodec<>(Order.class); + private final ObjectCodec<T> objectCodec; + + public NestedSetCodec(ObjectCodec<T> objectCodec) { + this.objectCodec = objectCodec; + } + + @SuppressWarnings("unchecked") + @Override + public Class<NestedSet<T>> getEncodedClass() { + // Compiler doesn't like cast from Class<NestedSet> -> Class<NestedSet<T>>, but it + // does allow what we see below. Type is lost at runtime anyway, so while gross this works. + return (Class<NestedSet<T>>) ((Class<?>) NestedSet.class); + } + + @Override + public void serialize(NestedSet<T> obj, CodedOutputStream codedOut) + throws SerializationException, IOException { + // Topo sort the nested set to ensure digests are available for children at time of writing + Collection<Object> topoSortedChildren = getTopologicallySortedChildren(obj); + Map<Object, byte[]> childToDigest = new IdentityHashMap<>(); + codedOut.writeInt32NoTag(topoSortedChildren.size()); + orderCodec.serialize(obj.getOrder(), codedOut); + for (Object children : topoSortedChildren) { + serializeOneNestedSet(children, codedOut, childToDigest); + } + } + + @Override + public NestedSet<T> deserialize(CodedInputStream codedIn) + throws SerializationException, IOException { + Map<ByteString, Object> digestToChild = new HashMap<>(); + int nestedSetCount = codedIn.readInt32(); + Preconditions.checkState( + nestedSetCount >= 1, + "Should have at least serialized one nested set, got: %d", + nestedSetCount); + Order order = orderCodec.deserialize(codedIn); + Object children = null; + for (int i = 0; i < nestedSetCount; ++i) { + // Update var, the last one in the list is the top level nested set + children = deserializeOneNestedSet(codedIn, digestToChild); + } + return createNestedSet(order, children); + } + + private void serializeOneNestedSet( + Object children, CodedOutputStream codedOut, Map<Object, byte[]> childToDigest) + throws IOException, SerializationException { + // Serialize nested set into an inner byte array so we can take its digest + ByteArrayOutputStream childOutputStream = new ByteArrayOutputStream(); + HashingOutputStream hashingOutputStream = + new HashingOutputStream(Hashing.md5(), childOutputStream); + CodedOutputStream childCodedOut = CodedOutputStream.newInstance(hashingOutputStream); + if (children instanceof Object[]) { + serializeMultiItemChildArray((Object[]) children, childToDigest, childCodedOut); + } else if (children != NestedSet.EMPTY_CHILDREN) { + serializeSingleItemChildArray(children, childCodedOut); + } else { + // Empty set + childCodedOut.writeInt32NoTag(0); + } + childCodedOut.flush(); + byte[] digest = hashingOutputStream.hash().asBytes(); + codedOut.writeByteArrayNoTag(digest); + byte[] childBytes = childOutputStream.toByteArray(); + codedOut.writeByteArrayNoTag(childBytes); + childToDigest.put(children, digest); + } + + private void serializeMultiItemChildArray( + Object[] children, Map<Object, byte[]> childToDigest, CodedOutputStream childCodedOut) + throws IOException, SerializationException { + childCodedOut.writeInt32NoTag(children.length); + for (Object child : children) { + if (child instanceof Object[]) { + byte[] digest = + Preconditions.checkNotNull( + childToDigest.get(child), + "Digest not available; Are nested sets serialized in the right order?"); + childCodedOut.writeBoolNoTag(true); + childCodedOut.writeByteArrayNoTag(digest); + } else { + childCodedOut.writeBoolNoTag(false); + objectCodec.serialize(cast(child), childCodedOut); + } + } + } + + private void serializeSingleItemChildArray(Object children, CodedOutputStream childCodedOut) + throws IOException, SerializationException { + childCodedOut.writeInt32NoTag(1); + T singleChild = cast(children); + objectCodec.serialize(singleChild, childCodedOut); + } + + private Object deserializeOneNestedSet( + CodedInputStream codedIn, Map<ByteString, Object> digestToChild) + throws SerializationException, IOException { + ByteString digest = codedIn.readBytes(); + CodedInputStream childCodedIn = codedIn.readBytes().newCodedInput(); + childCodedIn.enableAliasing(true); // Allow efficient views of byte slices when reading digests + int childCount = childCodedIn.readInt32(); + final Object result; + if (childCount > 1) { + result = deserializeMultipleItemChildArray(digestToChild, childCodedIn, childCount); + } else if (childCount == 1) { + result = objectCodec.deserialize(childCodedIn); + } else { + result = NestedSet.EMPTY_CHILDREN; + } + digestToChild.put(digest, result); + return result; + } + + private Object deserializeMultipleItemChildArray( + Map<ByteString, Object> digestToChild, CodedInputStream childCodedIn, int childCount) + throws IOException, SerializationException { + Object[] children = new Object[childCount]; + for (int i = 0; i < childCount; ++i) { + boolean isTransitiveEntry = childCodedIn.readBool(); + if (isTransitiveEntry) { + ByteString digest = childCodedIn.readBytes(); + children[i] = + Preconditions.checkNotNull(digestToChild.get(digest), "Transitive nested set missing"); + } else { + children[i] = objectCodec.deserialize(childCodedIn); + } + } + return children; + } + + @SuppressWarnings("unchecked") + private T cast(Object object) { + return (T) object; + } + + private static Collection<Object> getTopologicallySortedChildren( + NestedSet<?> nestedSet) { + LinkedHashSet<Object> result = new LinkedHashSet<>(); + dfs(result, nestedSet.children); + return result; + } + + private static <T> NestedSet<T> createNestedSet(Order order, Object children) { + if (children == NestedSet.EMPTY_CHILDREN) { + return order.emptySet(); + } + return new NestedSet<>(order, children); + } + + private static void dfs(LinkedHashSet<Object> sets, Object children) { + if (sets.contains(children)) { + return; + } + if (children instanceof Object[]) { + for (Object child : (Object[]) children) { + if (child instanceof Object[]) { + dfs(sets, child); + } + } + } + sets.add(children); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java index 1f370f68de..baa1f280d3 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java @@ -128,7 +128,7 @@ public class ObjectCodecTester<T> { } /** Add subjects to be tested for serialization/deserialization. */ - Builder<T> addSubjects(ImmutableList<T> subjects) { + public Builder<T> addSubjects(ImmutableList<T> subjects) { subjectsBuilder.addAll(subjects); return this; } |