aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/skyframe/serialization
diff options
context:
space:
mode:
authorGravatar janakr <janakr@google.com>2018-05-21 12:58:59 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-05-21 13:00:32 -0700
commit88f0f85fe775db0aac223050d1454c99e3f9f80e (patch)
treea2fef9444b95f3107afd085487cf08a673b0a284 /src/main/java/com/google/devtools/build/lib/skyframe/serialization
parentcaed1a27756d513d437143186771981b5fcfcdcc (diff)
When using JDK9, replace naive StringCodec with an optimized codec that uses the String's underlying raw bytes. This avoids byte copying and UTF interpretation. Experiments show it is approximately 15(!) times faster than the naive StringCodec for serialization and 2 times faster for deserialization (10 times faster for non-ASCII strings).
PiperOrigin-RevId: 197441758
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/skyframe/serialization')
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java2
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/UnsafeJdk9StringCodec.java121
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java47
3 files changed, 146 insertions, 24 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
index 234c74f7d7..71f7a405de 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
@@ -153,7 +153,7 @@ class Memoizer {
if (strategy == MemoizationStrategy.DO_NOT_MEMOIZE) {
// TODO(janakr): there is no reason this is limited to the DO_NOT_MEMOIZE case, but we don't
// memoize Strings, so putting the code here saves a tiny bit of work in the other cases. If
- // the StringCodec#getStrategy changes, this block of code will have to move.
+ // String's codec's #getStrategy changes, this block of code will have to move.
if (!maybeEmitString(context, obj, codec, codedOut)) {
codec.serialize(context, obj, codedOut);
}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/UnsafeJdk9StringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/UnsafeJdk9StringCodec.java
new file mode 100644
index 0000000000..325524e264
--- /dev/null
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/UnsafeJdk9StringCodec.java
@@ -0,0 +1,121 @@
+// Copyright 2018 The Bazel Authors. All rights reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+package com.google.devtools.build.lib.skyframe.serialization;
+
+import com.google.common.annotations.VisibleForTesting;
+import com.google.devtools.build.lib.skyframe.serialization.autocodec.UnsafeProvider;
+import com.google.protobuf.CodedInputStream;
+import com.google.protobuf.CodedOutputStream;
+import java.io.IOException;
+import java.lang.reflect.Constructor;
+import java.lang.reflect.Field;
+import java.util.Arrays;
+
+/**
+ * A high-performance {@link ObjectCodec} for {@link String} objects specialized for Strings in
+ * JDK9, where a String can be represented as a byte array together with a single byte (0 or 1) for
+ * Latin-1 or UTF16 encoding.
+ */
+public class UnsafeJdk9StringCodec implements ObjectCodec<String> {
+ @VisibleForTesting
+ public static boolean canUseUnsafeCodec() {
+ return getVersion() > 1.8;
+ }
+
+ private static double getVersion() {
+ String version = System.getProperty("java.version");
+ int pos = version.indexOf('.');
+ pos = version.indexOf('.', pos + 1);
+ return Double.parseDouble(version.substring(0, pos));
+ }
+
+ private final Constructor<String> constructor;
+ private final long valueOffset;
+ private final long coderOffset;
+
+ public UnsafeJdk9StringCodec() {
+ Field valueField;
+ Field coderField;
+ try {
+ this.constructor = String.class.getDeclaredConstructor(byte[].class, byte.class);
+ valueField = String.class.getDeclaredField("value");
+ coderField = String.class.getDeclaredField("coder");
+ } catch (ReflectiveOperationException e) {
+ throw new IllegalStateException(
+ "Bad fields/constructor: "
+ + Arrays.toString(String.class.getDeclaredConstructors())
+ + ", "
+ + Arrays.toString(String.class.getDeclaredFields()),
+ e);
+ }
+ this.constructor.setAccessible(true);
+ valueField.setAccessible(true);
+ valueOffset = UnsafeProvider.getInstance().objectFieldOffset(valueField);
+ coderField.setAccessible(true);
+ coderOffset = UnsafeProvider.getInstance().objectFieldOffset(coderField);
+ }
+
+ @Override
+ public Class<? extends String> getEncodedClass() {
+ return String.class;
+ }
+
+ @Override
+ public MemoizationStrategy getStrategy() {
+ // Don't memoize strings inside memoizing serialization, to preserve current behavior.
+ // TODO(janakr,brandjon,michajlo): Is it actually a problem to memoize strings? Doubt there
+ // would be much performance impact from increasing the size of the identity map, and we
+ // could potentially drop our string tables in the future.
+ return MemoizationStrategy.DO_NOT_MEMOIZE;
+ }
+
+ @Override
+ public void serialize(SerializationContext context, String obj, CodedOutputStream codedOut)
+ throws SerializationException, IOException {
+ byte coder = UnsafeProvider.getInstance().getByte(obj, coderOffset);
+ byte[] value = (byte[]) UnsafeProvider.getInstance().getObject(obj, valueOffset);
+ // Optimize for the case that coder == 0, in which case we can just write the length here,
+ // potentially using just one byte. If coder != 0, we'll use 4 bytes, but that's vanishingly
+ // rare.
+ if (coder == 0) {
+ codedOut.writeInt32NoTag(value.length);
+ } else if (coder == 1) {
+ codedOut.writeInt32NoTag(-value.length);
+ } else {
+ throw new SerializationException("Unexpected coder value: " + coder + " for " + obj);
+ }
+ codedOut.writeRawBytes(value);
+ }
+
+ @Override
+ public String deserialize(DeserializationContext context, CodedInputStream codedIn)
+ throws SerializationException, IOException {
+ int length = codedIn.readInt32();
+ byte coder;
+ if (length >= 0) {
+ coder = 0;
+ } else {
+ coder = 1;
+ length = -length;
+ }
+ byte[] value = codedIn.readRawBytes(length);
+ try {
+ return constructor.newInstance(value, coder);
+ } catch (ReflectiveOperationException e) {
+ throw new SerializationException(
+ "Could not instantiate string: " + Arrays.toString(value) + ", " + coder, e);
+ }
+ }
+}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java
index a5bd332c16..9d696e0eb9 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java
@@ -14,43 +14,44 @@
package com.google.devtools.build.lib.skyframe.serialization.strings;
+import static com.google.devtools.build.lib.skyframe.serialization.UnsafeJdk9StringCodec.canUseUnsafeCodec;
+
+import com.google.common.collect.ImmutableList;
+import com.google.devtools.build.lib.skyframe.serialization.CodecRegisterer;
import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
+import com.google.devtools.build.lib.skyframe.serialization.UnsafeJdk9StringCodec;
/** Utility for accessing (potentially platform-specific) {@link String} {@link ObjectCodec}s. */
public final class StringCodecs {
private static final StringCodec stringCodec = new StringCodec();
- private StringCodecs() {}
+ private static final UnsafeJdk9StringCodec unsafeCodec =
+ canUseUnsafeCodec() ? new UnsafeJdk9StringCodec() : null;
/**
- * Returns whether or not optimized codecs are available. Exposed so users can check at runtime
- * if the expected optimizations are applied.
+ * Returns optimized singleton instance, if supported. Otherwise, returns a functional, but not
+ * optimized implementation. Currently supported on JDK9.
*/
- public static boolean supportsOptimizedAscii() {
- return false;
+ public static ObjectCodec<String> asciiOptimized() {
+ return unsafeCodec != null ? unsafeCodec : stringCodec;
}
- /**
- * Returns singleton instance optimized for almost-always ASCII data, if supported. Otherwise,
- * returns a functional, but not optimized implementation. To tell if the optimized version is
- * supported see {@link #supportsOptimizedAscii()}.
- *
- * <p>Note that when optimized, this instance can still serialize/deserialize UTF-8 data, but with
- * potentially worse performance than {@link #simple()}.
- *
- * <p>Currently this is the same as {@link #simple()}, it remains to avoid a time-consuming
- * cleanup and in case we want to revive an optimized version in the near future.
- */
- // TODO(bazel-core): Determine if we need to revive ascii-optimized.
- public static ObjectCodec<String> asciiOptimized() {
- return simple();
+ static class UnsafeStringCodecRegisterer implements CodecRegisterer<UnsafeJdk9StringCodec> {
+ @Override
+ public Iterable<? extends ObjectCodec<?>> getCodecsToRegister() {
+ return canUseUnsafeCodec() ? ImmutableList.of(unsafeCodec) : ImmutableList.of();
+ }
}
- /**
- * Returns singleton instance of basic implementation. Should be preferred over
- * {@link #asciiOptimized()} when a sufficient amount of UTF-8 data is expected.
- */
+ static class SimpleStringCodecRegisterer implements CodecRegisterer<StringCodec> {
+ @Override
+ public Iterable<StringCodec> getCodecsToRegister() {
+ return canUseUnsafeCodec() ? ImmutableList.of() : ImmutableList.of(stringCodec);
+ }
+ }
+
+ /** Returns singleton instance of basic implementation. */
public static ObjectCodec<String> simple() {
return stringCodec;
}