diff options
6 files changed, 326 insertions, 6 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD index 86639cf3bc..6c6ef3eb20 100644 --- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/BUILD @@ -9,9 +9,13 @@ filegroup( # Library of collection utilities. java_library( name = "nestedset", - srcs = glob([ - "*.java", - ]), + srcs = [ + "NestedSet.java", + "NestedSetBuilder.java", + "NestedSetView.java", + "NestedSetVisitor.java", + "Order.java", + ], deps = [ "//src/main/java/com/google/devtools/build/lib/collect/compacthashset", "//src/main/java/com/google/devtools/build/lib/concurrent", @@ -19,3 +23,14 @@ java_library( "//third_party:jsr305", ], ) + +java_library( + name = "serialization", + srcs = ["NestedSetCodec.java"], + deps = [ + ":nestedset", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization", + "//third_party:guava", + "//third_party/protobuf:protobuf_java", + ], +) diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java index 89db4ecafc..b9e43089c0 100644 --- a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSet.java @@ -37,11 +37,11 @@ import javax.annotation.Nullable; public final class NestedSet<E> implements Iterable<E> { private final Order order; - private final Object children; + final Object children; private byte[] memo; private static final byte[] LEAF_MEMO = {}; - private static final Object[] EMPTY_CHILDREN = {}; + static final Object[] EMPTY_CHILDREN = {}; /** * Construct an empty NestedSet. Should only be called by Order's class initializer. @@ -134,6 +134,16 @@ public final class NestedSet<E> implements Iterable<E> { } } + // Only used by deserialization + NestedSet(Order order, Object children) { + this.order = order; + this.children = children; + boolean hasChildren = + children instanceof Object[] + && (Arrays.stream((Object[]) children).anyMatch(child -> child instanceof Object[])); + this.memo = hasChildren ? null : LEAF_MEMO; + } + /** * Returns the ordering of this nested set. */ diff --git a/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java new file mode 100644 index 0000000000..eee5edc136 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodec.java @@ -0,0 +1,206 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.collect.nestedset; + +import com.google.common.base.Preconditions; +import com.google.common.hash.Hashing; +import com.google.common.hash.HashingOutputStream; +import com.google.devtools.build.lib.skyframe.serialization.EnumCodec; +import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; +import com.google.devtools.build.lib.skyframe.serialization.SerializationException; +import com.google.protobuf.ByteString; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Collection; +import java.util.HashMap; +import java.util.IdentityHashMap; +import java.util.LinkedHashSet; +import java.util.Map; + +/** + * Codec for {@link NestedSet}. + * + * <p>Nested sets are serialized by sorting the sub-graph in topological order, then writing the + * nodes in that order. As a node is written we remember its digest. When serializing a node higher + * in the graph, we replace any edge to another nested set with its digest. + */ +public class NestedSetCodec<T> implements ObjectCodec<NestedSet<T>> { + + private static final EnumCodec<Order> orderCodec = new EnumCodec<>(Order.class); + private final ObjectCodec<T> objectCodec; + + public NestedSetCodec(ObjectCodec<T> objectCodec) { + this.objectCodec = objectCodec; + } + + @SuppressWarnings("unchecked") + @Override + public Class<NestedSet<T>> getEncodedClass() { + // Compiler doesn't like cast from Class<NestedSet> -> Class<NestedSet<T>>, but it + // does allow what we see below. Type is lost at runtime anyway, so while gross this works. + return (Class<NestedSet<T>>) ((Class<?>) NestedSet.class); + } + + @Override + public void serialize(NestedSet<T> obj, CodedOutputStream codedOut) + throws SerializationException, IOException { + // Topo sort the nested set to ensure digests are available for children at time of writing + Collection<Object> topoSortedChildren = getTopologicallySortedChildren(obj); + Map<Object, byte[]> childToDigest = new IdentityHashMap<>(); + codedOut.writeInt32NoTag(topoSortedChildren.size()); + orderCodec.serialize(obj.getOrder(), codedOut); + for (Object children : topoSortedChildren) { + serializeOneNestedSet(children, codedOut, childToDigest); + } + } + + @Override + public NestedSet<T> deserialize(CodedInputStream codedIn) + throws SerializationException, IOException { + Map<ByteString, Object> digestToChild = new HashMap<>(); + int nestedSetCount = codedIn.readInt32(); + Preconditions.checkState( + nestedSetCount >= 1, + "Should have at least serialized one nested set, got: %d", + nestedSetCount); + Order order = orderCodec.deserialize(codedIn); + Object children = null; + for (int i = 0; i < nestedSetCount; ++i) { + // Update var, the last one in the list is the top level nested set + children = deserializeOneNestedSet(codedIn, digestToChild); + } + return createNestedSet(order, children); + } + + private void serializeOneNestedSet( + Object children, CodedOutputStream codedOut, Map<Object, byte[]> childToDigest) + throws IOException, SerializationException { + // Serialize nested set into an inner byte array so we can take its digest + ByteArrayOutputStream childOutputStream = new ByteArrayOutputStream(); + HashingOutputStream hashingOutputStream = + new HashingOutputStream(Hashing.md5(), childOutputStream); + CodedOutputStream childCodedOut = CodedOutputStream.newInstance(hashingOutputStream); + if (children instanceof Object[]) { + serializeMultiItemChildArray((Object[]) children, childToDigest, childCodedOut); + } else if (children != NestedSet.EMPTY_CHILDREN) { + serializeSingleItemChildArray(children, childCodedOut); + } else { + // Empty set + childCodedOut.writeInt32NoTag(0); + } + childCodedOut.flush(); + byte[] digest = hashingOutputStream.hash().asBytes(); + codedOut.writeByteArrayNoTag(digest); + byte[] childBytes = childOutputStream.toByteArray(); + codedOut.writeByteArrayNoTag(childBytes); + childToDigest.put(children, digest); + } + + private void serializeMultiItemChildArray( + Object[] children, Map<Object, byte[]> childToDigest, CodedOutputStream childCodedOut) + throws IOException, SerializationException { + childCodedOut.writeInt32NoTag(children.length); + for (Object child : children) { + if (child instanceof Object[]) { + byte[] digest = + Preconditions.checkNotNull( + childToDigest.get(child), + "Digest not available; Are nested sets serialized in the right order?"); + childCodedOut.writeBoolNoTag(true); + childCodedOut.writeByteArrayNoTag(digest); + } else { + childCodedOut.writeBoolNoTag(false); + objectCodec.serialize(cast(child), childCodedOut); + } + } + } + + private void serializeSingleItemChildArray(Object children, CodedOutputStream childCodedOut) + throws IOException, SerializationException { + childCodedOut.writeInt32NoTag(1); + T singleChild = cast(children); + objectCodec.serialize(singleChild, childCodedOut); + } + + private Object deserializeOneNestedSet( + CodedInputStream codedIn, Map<ByteString, Object> digestToChild) + throws SerializationException, IOException { + ByteString digest = codedIn.readBytes(); + CodedInputStream childCodedIn = codedIn.readBytes().newCodedInput(); + childCodedIn.enableAliasing(true); // Allow efficient views of byte slices when reading digests + int childCount = childCodedIn.readInt32(); + final Object result; + if (childCount > 1) { + result = deserializeMultipleItemChildArray(digestToChild, childCodedIn, childCount); + } else if (childCount == 1) { + result = objectCodec.deserialize(childCodedIn); + } else { + result = NestedSet.EMPTY_CHILDREN; + } + digestToChild.put(digest, result); + return result; + } + + private Object deserializeMultipleItemChildArray( + Map<ByteString, Object> digestToChild, CodedInputStream childCodedIn, int childCount) + throws IOException, SerializationException { + Object[] children = new Object[childCount]; + for (int i = 0; i < childCount; ++i) { + boolean isTransitiveEntry = childCodedIn.readBool(); + if (isTransitiveEntry) { + ByteString digest = childCodedIn.readBytes(); + children[i] = + Preconditions.checkNotNull(digestToChild.get(digest), "Transitive nested set missing"); + } else { + children[i] = objectCodec.deserialize(childCodedIn); + } + } + return children; + } + + @SuppressWarnings("unchecked") + private T cast(Object object) { + return (T) object; + } + + private static Collection<Object> getTopologicallySortedChildren( + NestedSet<?> nestedSet) { + LinkedHashSet<Object> result = new LinkedHashSet<>(); + dfs(result, nestedSet.children); + return result; + } + + private static <T> NestedSet<T> createNestedSet(Order order, Object children) { + if (children == NestedSet.EMPTY_CHILDREN) { + return order.emptySet(); + } + return new NestedSet<>(order, children); + } + + private static void dfs(LinkedHashSet<Object> sets, Object children) { + if (sets.contains(children)) { + return; + } + if (children instanceof Object[]) { + for (Object child : (Object[]) children) { + if (child instanceof Object[]) { + dfs(sets, child); + } + } + } + sets.add(children); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java index 1f370f68de..baa1f280d3 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils/ObjectCodecTester.java @@ -128,7 +128,7 @@ public class ObjectCodecTester<T> { } /** Add subjects to be tested for serialization/deserialization. */ - Builder<T> addSubjects(ImmutableList<T> subjects) { + public Builder<T> addSubjects(ImmutableList<T> subjects) { subjectsBuilder.addAll(subjects); return this; } diff --git a/src/test/java/com/google/devtools/build/lib/BUILD b/src/test/java/com/google/devtools/build/lib/BUILD index ae04d46f4c..b5305a2b93 100644 --- a/src/test/java/com/google/devtools/build/lib/BUILD +++ b/src/test/java/com/google/devtools/build/lib/BUILD @@ -184,8 +184,11 @@ java_test( "//src/main/java/com/google/devtools/build/lib/clock", "//src/main/java/com/google/devtools/build/lib/collect", "//src/main/java/com/google/devtools/build/lib/collect/nestedset", + "//src/main/java/com/google/devtools/build/lib/collect/nestedset:serialization", "//src/main/java/com/google/devtools/build/lib/concurrent", "//src/main/java/com/google/devtools/build/lib/shell", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization", + "//src/main/java/com/google/devtools/build/lib/skyframe/serialization/testutils", "//src/main/java/com/google/devtools/build/lib/vfs", "//src/main/java/com/google/devtools/build/lib/vfs/inmemoryfs", "//src/main/java/com/google/devtools/common/options", diff --git a/src/test/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecTest.java b/src/test/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecTest.java new file mode 100644 index 0000000000..b0f4f8e13a --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/collect/nestedset/NestedSetCodecTest.java @@ -0,0 +1,86 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +package com.google.devtools.build.lib.collect.nestedset; + +import static com.google.common.truth.Truth.assertThat; + +import com.google.common.collect.ImmutableList; +import com.google.devtools.build.lib.skyframe.serialization.strings.StringCodecs; +import com.google.devtools.build.lib.skyframe.serialization.testutils.ObjectCodecTester; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** Tests for {@link NestedSetCodec}. */ +@RunWith(JUnit4.class) +public class NestedSetCodecTest { + + private static final NestedSet<String> SHARED_NESTED_SET = + NestedSetBuilder.<String>stableOrder().add("e").build(); + + @Test + public void testCodec() throws Exception { + ImmutableList<NestedSet<String>> subjects = + ImmutableList.of( + NestedSetBuilder.emptySet(Order.STABLE_ORDER), + NestedSetBuilder.emptySet(Order.NAIVE_LINK_ORDER), + NestedSetBuilder.create(Order.STABLE_ORDER, "a"), + NestedSetBuilder.create(Order.STABLE_ORDER, "a", "b", "c"), + NestedSetBuilder.<String>stableOrder() + .add("a") + .add("b") + .addTransitive( + NestedSetBuilder.<String>stableOrder() + .add("c") + .addTransitive(SHARED_NESTED_SET) + .build()) + .addTransitive( + NestedSetBuilder.<String>stableOrder() + .add("d") + .addTransitive(SHARED_NESTED_SET) + .build()) + .addTransitive(NestedSetBuilder.emptySet(Order.STABLE_ORDER)) + .build()); + + ObjectCodecTester.newBuilder(new NestedSetCodec<>(StringCodecs.simple())) + .addSubjects(subjects) + .verificationFunction(NestedSetCodecTest::verifyDeserialization) + .buildAndRunTests(); + } + + @SuppressWarnings("unchecked") + private static void verifyDeserialization(NestedSet<String> subject, Object deserialized) { + NestedSet<String> other = (NestedSet<String>) deserialized; + assertThat(subject.getOrder()).isEqualTo(other.getOrder()); + assertThat(subject.toSet()).isEqualTo(other.toSet()); + verifyStructure(subject.children, other.children); + } + + private static void verifyStructure(Object lhs, Object rhs) { + if (lhs == NestedSet.EMPTY_CHILDREN) { + assertThat(rhs).isSameAs(NestedSet.EMPTY_CHILDREN); + } else if (lhs instanceof Object[]) { + assertThat(rhs).isInstanceOf(Object[].class); + Object[] lhsArray = (Object[]) lhs; + Object[] rhsArray = (Object[]) rhs; + int n = lhsArray.length; + assertThat(rhsArray).hasLength(n); + for (int i = 0; i < n; ++i) { + verifyStructure(lhsArray[i], rhsArray[i]); + } + } else { + assertThat(lhs).isEqualTo(rhs); + } + } +} |