From 1606cf208cc5194b13ca212819371f7e4ba43a6b Mon Sep 17 00:00:00 2001 From: michajlo Date: Tue, 17 Oct 2017 23:07:09 +0200 Subject: Fall back on basic StringCodec if FastStringCodec isn't available Also adds a method which can be used to tell if this behavior actually applied, for more performance-sensitive users. PiperOrigin-RevId: 172512011 --- .../serialization/strings/FastStringCodec.java | 48 +++++++++++++++------- .../serialization/strings/StringCodecs.java | 36 +++++++++++++--- .../serialization/strings/FastStringCodecTest.java | 3 +- .../serialization/strings/StringCodecsTest.java | 36 ++++++++++++++++ 4 files changed, 101 insertions(+), 22 deletions(-) create mode 100644 src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecsTest.java diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java index e763f70b50..f4b44c1568 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java @@ -14,6 +14,7 @@ package com.google.devtools.build.lib.skyframe.serialization.strings; +import com.google.common.base.Preconditions; import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; import com.google.protobuf.CodedInputStream; import com.google.protobuf.CodedOutputStream; @@ -29,9 +30,14 @@ import sun.misc.Unsafe; * Similar to {@link StringCodec}, except with deserialization optimized for ascii data. It can * still handle UTF-8, though less efficiently than {@link StringCodec}. Should be used when the * majority of the data passing through will be ascii. + * + *

Users MUST check if this class is usable by checking {@link #isAvailable()}. */ class FastStringCodec implements ObjectCodec { + /** Sentinel value for missing {@link #STRING_VALUE_OFFSET}. */ + private static final long UNSUPPORTED_STRING_VALUE_OFFSET = -1; + private static final Unsafe theUnsafe; private static final long STRING_VALUE_OFFSET; @@ -39,21 +45,16 @@ class FastStringCodec implements ObjectCodec { static { theUnsafe = getUnsafe(); - try { - // String's 'value' field stores its char[]. If this field changes name or type then the - // reflective check below will fail. We can reasonably expect our approach to be stable for - // now, but things are likely to change in java 9, hopefully in a way which obsoletes this - // optimization. - Field valueField = String.class.getDeclaredField("value"); - Class valueFieldType = valueField.getType(); - if (!valueFieldType.equals(char[].class)) { - throw new AssertionError( - "Expected String's value field to be char[], but was " + valueFieldType); - } - STRING_VALUE_OFFSET = theUnsafe.objectFieldOffset(valueField); - } catch (NoSuchFieldException | SecurityException e) { - throw new AssertionError("Failed to find String's 'value' offset", e); - } + STRING_VALUE_OFFSET = getStringValueOffset(); + } + + /** Returns whether or not this implementation is supported. */ + static boolean isAvailable() { + return STRING_VALUE_OFFSET != UNSUPPORTED_STRING_VALUE_OFFSET; + } + + FastStringCodec() { + Preconditions.checkState(isAvailable(), "FastStringCodec isn't available!"); } @Override @@ -137,4 +138,21 @@ class FastStringCodec implements ObjectCodec { throw new AssertionError("Unable to get sun.misc.Unsafe", pae); } } + + private static long getStringValueOffset() { + try { + // We expect a String's value field to be a char[] - if that's not the case then we're + // probably on a more modern JDK and this optimization isn't available. + Field valueField = String.class.getDeclaredField("value"); + Class valueFieldType = valueField.getType(); + if (valueFieldType.equals(char[].class)) { + return theUnsafe.objectFieldOffset(valueField); + } else { + // value was of a different type, bail. + return UNSUPPORTED_STRING_VALUE_OFFSET; + } + } catch (NoSuchFieldException | SecurityException e) { + throw new AssertionError("Failed to find String's 'value' field/offset", e); + } + } } diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java index 24f36ecb07..60e5f29f87 100644 --- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java @@ -15,22 +15,46 @@ package com.google.devtools.build.lib.skyframe.serialization.strings; import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; +import java.util.logging.Logger; /** Utility for accessing (potentially platform-specific) {@link String} {@link ObjectCodec}s. */ public final class StringCodecs { - private static final FastStringCodec fastStringCodec = new FastStringCodec(); - private static final StringCodec stringCodec = new StringCodec(); + private static final Logger logger = Logger.getLogger(StringCodecs.class.getName()); + + private static final StringCodec stringCodec; + private static final ObjectCodec asciiOptimized; + + static { + stringCodec = new StringCodec(); + if (FastStringCodec.isAvailable()) { + asciiOptimized = new FastStringCodec(); + } else { + logger.warning("Optimized string deserialization unavailable"); + asciiOptimized = stringCodec; + } + } private StringCodecs() {} /** - * Returns singleton instance optimized for almost-always ASCII data. This instance can still - * serialize/deserialize UTF-8 data, but with potentially worse performance than - * {@link #simple()}. + * Returns whether or not optimized codecs are available. Exposed so users can check at runtime + * if the expected optimizations are applied. + */ + public static boolean supportsOptimizedAscii() { + return asciiOptimized instanceof FastStringCodec; + } + + /** + * Returns singleton instance optimized for almost-always ASCII data, if supported. Otherwise, + * returns a functional, but not optimized implementation. To tell if the optimized version is + * supported see {@link #supportsOptimizedAscii()}. + * + *

Note that when optimized, this instance can still serialize/deserialize UTF-8 data, but with + * potentially worse performance than {@link #simple()}. */ public static ObjectCodec asciiOptimized() { - return fastStringCodec; + return asciiOptimized; } /** diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodecTest.java b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodecTest.java index b472f54b49..30b9a1ded2 100644 --- a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodecTest.java +++ b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodecTest.java @@ -26,7 +26,8 @@ public class FastStringCodecTest extends AbstractObjectCodecTest { public FastStringCodecTest() { super( - new FastStringCodec(), + // TODO(michajlo): Don't bother running this test if FastStringCodec isn't available. + FastStringCodec.isAvailable() ? new FastStringCodec() : new StringCodec(), "ow now brown cow. ow now brown cow", "(╯°□°)╯︵┻━┻ string with utf8/ascii", "string with ascii/utf8 (╯°□°)╯︵┻━┻", diff --git a/src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecsTest.java b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecsTest.java new file mode 100644 index 0000000000..52c50fc8ab --- /dev/null +++ b/src/test/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecsTest.java @@ -0,0 +1,36 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe.serialization.strings; + +import static com.google.common.truth.Truth.assertThat; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.junit.runners.JUnit4; + +/** {@link StringCodecs} tests */ +@RunWith(JUnit4.class) +public class StringCodecsTest { + + @Test + public void testUsesFastStringCodecIfAvailable() { + if (FastStringCodec.isAvailable()) { + assertThat(StringCodecs.asciiOptimized()).isInstanceOf(FastStringCodec.class); + } else { + assertThat(StringCodecs.asciiOptimized()).isSameAs(StringCodecs.simple()); + } + } + +} -- cgit v1.2.3