aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java
diff options
context:
space:
mode:
authorGravatar michajlo <michajlo@google.com>2018-03-26 20:53:04 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-03-26 20:54:37 -0700
commitb8765a6656415eb6380fffd20202515918880d96 (patch)
tree8a629f6b8c998206578df76cb9f78ae01451abb2 /src/main/java
parent327c74df7c3b4820a0620bf9696c3f88bffebda3 (diff)
Remove FastStringCodec
Currently unnecessary PiperOrigin-RevId: 190568226
Diffstat (limited to 'src/main/java')
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java171
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java56
2 files changed, 7 insertions, 220 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java
deleted file mode 100644
index 0cd658a8c6..0000000000
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java
+++ /dev/null
@@ -1,171 +0,0 @@
-// Copyright 2017 The Bazel Authors. All rights reserved.
-//
-// Licensed under the Apache License, Version 2.0 (the "License");
-// you may not use this file except in compliance with the License.
-// You may obtain a copy of the License at
-//
-// http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing, software
-// distributed under the License is distributed on an "AS IS" BASIS,
-// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// See the License for the specific language governing permissions and
-// limitations under the License.
-
-package com.google.devtools.build.lib.skyframe.serialization.strings;
-
-import com.google.common.base.Preconditions;
-import com.google.devtools.build.lib.skyframe.serialization.DeserializationContext;
-import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
-import com.google.devtools.build.lib.skyframe.serialization.SerializationContext;
-import com.google.protobuf.CodedInputStream;
-import com.google.protobuf.CodedOutputStream;
-import java.io.IOException;
-import java.lang.reflect.Field;
-import java.nio.charset.StandardCharsets;
-import java.security.AccessController;
-import java.security.PrivilegedActionException;
-import java.security.PrivilegedExceptionAction;
-import sun.misc.Unsafe;
-
-/**
- * Similar to {@link StringCodec}, except with deserialization optimized for ascii data. It can
- * still handle UTF-8, though less efficiently than {@link StringCodec}. Should be used when the
- * majority of the data passing through will be ascii.
- *
- * <p>Users <b>MUST</b> check if this class is usable by checking {@link #isAvailable()}.
- */
-class FastStringCodec implements ObjectCodec<String> {
-
- /** Sentinel value for missing {@link #STRING_VALUE_OFFSET}. */
- private static final long UNSUPPORTED_STRING_VALUE_OFFSET = -1;
-
- private static final Unsafe theUnsafe;
- private static final long STRING_VALUE_OFFSET;
-
- private static final String EMPTY_STRING = "";
-
- static {
- theUnsafe = getUnsafe();
- STRING_VALUE_OFFSET = getStringValueOffset();
- }
-
- /** Returns whether or not this implementation is supported. */
- static boolean isAvailable() {
- return STRING_VALUE_OFFSET != UNSUPPORTED_STRING_VALUE_OFFSET;
- }
-
- FastStringCodec() {
- Preconditions.checkState(isAvailable(), "FastStringCodec isn't available!");
- }
-
- @Override
- public Class<String> getEncodedClass() {
- return String.class;
- }
-
- @Override
- public MemoizationStrategy getStrategy() {
- // Don't memoize strings inside memoizing serialization, to preserve current behavior.
- // TODO(janakr,brandjon,michajlo): Is it actually a problem to memoize strings? Doubt there
- // would be much performance impact from increasing the size of the identity map, and we
- // could potentially drop our string tables in the future.
- return MemoizationStrategy.DO_NOT_MEMOIZE;
- }
-
- @Override
- public void serialize(SerializationContext context, String string, CodedOutputStream codedOut)
- throws IOException {
- codedOut.writeStringNoTag(string);
- }
-
- @Override
- public String deserialize(DeserializationContext context, CodedInputStream codedIn)
- throws IOException {
- int length = codedIn.readInt32();
- if (length == 0) {
- return EMPTY_STRING;
- }
-
- char[] maybeDecoded = new char[length];
- for (int i = 0; i < length; i++) {
- // Read one byte at a time to avoid creating a new ByteString/copy of the underlying array.
- byte b = codedIn.readRawByte();
- // Check highest order bit, if it's set we've crossed into extended ascii/utf8.
- if ((b & 0x80) == 0) {
- maybeDecoded[i] = (char) b;
- } else {
- // Fail, we encountered a non-ascii byte. Copy what we have so far plus and then the rest
- // of the data into a buffer and let String's constructor do the UTF-8 decoding work.
- byte[] decodeFrom = new byte[length];
- for (int j = 0; j < i; j++) {
- decodeFrom[j] = (byte) maybeDecoded[j];
- }
- decodeFrom[i] = b;
- for (int j = i + 1; j < length; j++) {
- decodeFrom[j] = codedIn.readRawByte();
- }
- return new String(decodeFrom, StandardCharsets.UTF_8);
- }
- }
-
- try {
- String result = (String) theUnsafe.allocateInstance(String.class);
- theUnsafe.putObject(result, STRING_VALUE_OFFSET, maybeDecoded);
- return result;
- } catch (Exception e) {
- // This should only catch InstantiationException, but that makes IntelliJ unhappy for
- // some reason; it insists that that exception cannot be thrown from here, even though it
- // is set to JDK 8
- throw new IllegalStateException("Could not create string", e);
- }
- }
-
- /**
- * Get a reference to {@link sun.misc.Unsafe} or throw an {@link AssertionError} if failing to do
- * so. Failure is highly unlikely, but possible if the underlying VM stores unsafe in an
- * unexpected location.
- */
- private static Unsafe getUnsafe() {
- try {
- // sun.misc.Unsafe is intentionally difficult to get a hold of - it gives us the power to
- // do things like access raw memory and segfault the JVM.
- return AccessController.doPrivileged(
- new PrivilegedExceptionAction<Unsafe>() {
- @Override
- public Unsafe run() throws Exception {
- Class<Unsafe> unsafeClass = Unsafe.class;
- // Unsafe usually exists in the field 'theUnsafe', however check all fields
- // in case it's somewhere else in this VM's version of Unsafe.
- for (Field f : unsafeClass.getDeclaredFields()) {
- f.setAccessible(true);
- Object fieldValue = f.get(null);
- if (unsafeClass.isInstance(fieldValue)) {
- return unsafeClass.cast(fieldValue);
- }
- }
- throw new AssertionError("Failed to find sun.misc.Unsafe instance");
- }
- });
- } catch (PrivilegedActionException pae) {
- throw new AssertionError("Unable to get sun.misc.Unsafe", pae);
- }
- }
-
- private static long getStringValueOffset() {
- try {
- // We expect a String's value field to be a char[] - if that's not the case then we're
- // probably on a more modern JDK and this optimization isn't available.
- Field valueField = String.class.getDeclaredField("value");
- Class<?> valueFieldType = valueField.getType();
- if (valueFieldType.equals(char[].class)) {
- return theUnsafe.objectFieldOffset(valueField);
- } else {
- // value was of a different type, bail.
- return UNSUPPORTED_STRING_VALUE_OFFSET;
- }
- } catch (NoSuchFieldException | SecurityException e) {
- throw new AssertionError("Failed to find String's 'value' field/offset", e);
- }
- }
-}
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java
index f9b294dac3..a5bd332c16 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java
@@ -14,28 +14,12 @@
package com.google.devtools.build.lib.skyframe.serialization.strings;
-import com.google.devtools.build.lib.skyframe.serialization.CodecRegisterer;
import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
-import java.util.Collections;
-import java.util.logging.Logger;
/** Utility for accessing (potentially platform-specific) {@link String} {@link ObjectCodec}s. */
public final class StringCodecs {
- private static final Logger logger = Logger.getLogger(StringCodecs.class.getName());
-
- private static final StringCodec stringCodec;
- private static final ObjectCodec<String> asciiOptimized;
-
- static {
- stringCodec = new StringCodec();
- if (FastStringCodec.isAvailable()) {
- asciiOptimized = new FastStringCodec();
- } else {
- logger.warning("Optimized string deserialization unavailable");
- asciiOptimized = stringCodec;
- }
- }
+ private static final StringCodec stringCodec = new StringCodec();
private StringCodecs() {}
@@ -44,7 +28,7 @@ public final class StringCodecs {
* if the expected optimizations are applied.
*/
public static boolean supportsOptimizedAscii() {
- return asciiOptimized instanceof FastStringCodec;
+ return false;
}
/**
@@ -54,9 +38,13 @@ public final class StringCodecs {
*
* <p>Note that when optimized, this instance can still serialize/deserialize UTF-8 data, but with
* potentially worse performance than {@link #simple()}.
+ *
+ * <p>Currently this is the same as {@link #simple()}, it remains to avoid a time-consuming
+ * cleanup and in case we want to revive an optimized version in the near future.
*/
+ // TODO(bazel-core): Determine if we need to revive ascii-optimized.
public static ObjectCodec<String> asciiOptimized() {
- return asciiOptimized;
+ return simple();
}
/**
@@ -66,34 +54,4 @@ public final class StringCodecs {
public static ObjectCodec<String> simple() {
return stringCodec;
}
-
- /**
- * Registers a codec for {@link String}.
- *
- * <p>Needed to resolve ambiguity between {@link StringCodec} and {@link FastStringCodec}.
- */
- static class StringCodecRegisterer implements CodecRegisterer<StringCodec> {
- @Override
- public Iterable<? extends ObjectCodec<?>> getCodecsToRegister() {
- if (!supportsOptimizedAscii()) {
- return Collections.singletonList(simple());
- }
- return Collections.emptyList();
- }
- }
-
- /**
- * Registers a codec for {@link String}.
- *
- * <p>Needed to resolve ambiguity between {@link StringCodec} and {@link FastStringCodec}.
- */
- static class FastStringCodecRegisterer implements CodecRegisterer<FastStringCodec> {
- @Override
- public Iterable<? extends ObjectCodec<?>> getCodecsToRegister() {
- if (supportsOptimizedAscii()) {
- return Collections.singletonList(asciiOptimized());
- }
- return Collections.emptyList();
- }
- }
}