diff options
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/skyframe/serialization')
3 files changed, 146 insertions, 24 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java index 234c74f7d7..71f7a405de 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java @@ -153,7 +153,7 @@ class Memoizer { if (strategy == MemoizationStrategy.DO_NOT_MEMOIZE) { // TODO(janakr): there is no reason this is limited to the DO_NOT_MEMOIZE case, but we don't // memoize Strings, so putting the code here saves a tiny bit of work in the other cases. If - // the StringCodec#getStrategy changes, this block of code will have to move. + // String's codec's #getStrategy changes, this block of code will have to move. if (!maybeEmitString(context, obj, codec, codedOut)) { codec.serialize(context, obj, codedOut); } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/UnsafeJdk9StringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/UnsafeJdk9StringCodec.java new file mode 100644 index 0000000000..325524e264 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/UnsafeJdk9StringCodec.java @@ -0,0 +1,121 @@ +// Copyright 2018 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe.serialization; + +import com.google.common.annotations.VisibleForTesting; +import com.google.devtools.build.lib.skyframe.serialization.autocodec.UnsafeProvider; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import java.io.IOException; +import java.lang.reflect.Constructor; +import java.lang.reflect.Field; +import java.util.Arrays; + +/** + * A high-performance {@link ObjectCodec} for {@link String} objects specialized for Strings in + * JDK9, where a String can be represented as a byte array together with a single byte (0 or 1) for + * Latin-1 or UTF16 encoding. + */ +public class UnsafeJdk9StringCodec implements ObjectCodec<String> { + @VisibleForTesting + public static boolean canUseUnsafeCodec() { + return getVersion() > 1.8; + } + + private static double getVersion() { + String version = System.getProperty("java.version"); + int pos = version.indexOf('.'); + pos = version.indexOf('.', pos + 1); + return Double.parseDouble(version.substring(0, pos)); + } + + private final Constructor<String> constructor; + private final long valueOffset; + private final long coderOffset; + + public UnsafeJdk9StringCodec() { + Field valueField; + Field coderField; + try { + this.constructor = String.class.getDeclaredConstructor(byte[].class, byte.class); + valueField = String.class.getDeclaredField("value"); + coderField = String.class.getDeclaredField("coder"); + } catch (ReflectiveOperationException e) { + throw new IllegalStateException( + "Bad fields/constructor: " + + Arrays.toString(String.class.getDeclaredConstructors()) + + ", " + + Arrays.toString(String.class.getDeclaredFields()), + e); + } + this.constructor.setAccessible(true); + valueField.setAccessible(true); + valueOffset = UnsafeProvider.getInstance().objectFieldOffset(valueField); + coderField.setAccessible(true); + coderOffset = UnsafeProvider.getInstance().objectFieldOffset(coderField); + } + + @Override + public Class<? extends String> getEncodedClass() { + return String.class; + } + + @Override + public MemoizationStrategy getStrategy() { + // Don't memoize strings inside memoizing serialization, to preserve current behavior. + // TODO(janakr,brandjon,michajlo): Is it actually a problem to memoize strings? Doubt there + // would be much performance impact from increasing the size of the identity map, and we + // could potentially drop our string tables in the future. + return MemoizationStrategy.DO_NOT_MEMOIZE; + } + + @Override + public void serialize(SerializationContext context, String obj, CodedOutputStream codedOut) + throws SerializationException, IOException { + byte coder = UnsafeProvider.getInstance().getByte(obj, coderOffset); + byte[] value = (byte[]) UnsafeProvider.getInstance().getObject(obj, valueOffset); + // Optimize for the case that coder == 0, in which case we can just write the length here, + // potentially using just one byte. If coder != 0, we'll use 4 bytes, but that's vanishingly + // rare. + if (coder == 0) { + codedOut.writeInt32NoTag(value.length); + } else if (coder == 1) { + codedOut.writeInt32NoTag(-value.length); + } else { + throw new SerializationException("Unexpected coder value: " + coder + " for " + obj); + } + codedOut.writeRawBytes(value); + } + + @Override + public String deserialize(DeserializationContext context, CodedInputStream codedIn) + throws SerializationException, IOException { + int length = codedIn.readInt32(); + byte coder; + if (length >= 0) { + coder = 0; + } else { + coder = 1; + length = -length; + } + byte[] value = codedIn.readRawBytes(length); + try { + return constructor.newInstance(value, coder); + } catch (ReflectiveOperationException e) { + throw new SerializationException( + "Could not instantiate string: " + Arrays.toString(value) + ", " + coder, e); + } + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java index a5bd332c16..9d696e0eb9 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java @@ -14,43 +14,44 @@ package com.google.devtools.build.lib.skyframe.serialization.strings; +import static com.google.devtools.build.lib.skyframe.serialization.UnsafeJdk9StringCodec.canUseUnsafeCodec; + +import com.google.common.collect.ImmutableList; +import com.google.devtools.build.lib.skyframe.serialization.CodecRegisterer; import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; +import com.google.devtools.build.lib.skyframe.serialization.UnsafeJdk9StringCodec; /** Utility for accessing (potentially platform-specific) {@link String} {@link ObjectCodec}s. */ public final class StringCodecs { private static final StringCodec stringCodec = new StringCodec(); - private StringCodecs() {} + private static final UnsafeJdk9StringCodec unsafeCodec = + canUseUnsafeCodec() ? new UnsafeJdk9StringCodec() : null; /** - * Returns whether or not optimized codecs are available. Exposed so users can check at runtime - * if the expected optimizations are applied. + * Returns optimized singleton instance, if supported. Otherwise, returns a functional, but not + * optimized implementation. Currently supported on JDK9. */ - public static boolean supportsOptimizedAscii() { - return false; + public static ObjectCodec<String> asciiOptimized() { + return unsafeCodec != null ? unsafeCodec : stringCodec; } - /** - * Returns singleton instance optimized for almost-always ASCII data, if supported. Otherwise, - * returns a functional, but not optimized implementation. To tell if the optimized version is - * supported see {@link #supportsOptimizedAscii()}. - * - * <p>Note that when optimized, this instance can still serialize/deserialize UTF-8 data, but with - * potentially worse performance than {@link #simple()}. - * - * <p>Currently this is the same as {@link #simple()}, it remains to avoid a time-consuming - * cleanup and in case we want to revive an optimized version in the near future. - */ - // TODO(bazel-core): Determine if we need to revive ascii-optimized. - public static ObjectCodec<String> asciiOptimized() { - return simple(); + static class UnsafeStringCodecRegisterer implements CodecRegisterer<UnsafeJdk9StringCodec> { + @Override + public Iterable<? extends ObjectCodec<?>> getCodecsToRegister() { + return canUseUnsafeCodec() ? ImmutableList.of(unsafeCodec) : ImmutableList.of(); + } } - /** - * Returns singleton instance of basic implementation. Should be preferred over - * {@link #asciiOptimized()} when a sufficient amount of UTF-8 data is expected. - */ + static class SimpleStringCodecRegisterer implements CodecRegisterer<StringCodec> { + @Override + public Iterable<StringCodec> getCodecsToRegister() { + return canUseUnsafeCodec() ? ImmutableList.of() : ImmutableList.of(stringCodec); + } + } + + /** Returns singleton instance of basic implementation. */ public static ObjectCodec<String> simple() { return stringCodec; } |