diff options
author | 2017-10-07 19:44:43 +0200 | |
---|---|---|
committer | 2017-10-09 08:16:06 +0200 | |
commit | b45d5f536048db0e568e16417f4fb1d50f01e1ac (patch) | |
tree | f4fcdf655a25321234f8b19846c75735eb5c1126 /src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings | |
parent | 11be883a0aef677799721e60d2c44202cecae6ce (diff) |
Consolidate ObjectCodec<String> creation
Opens the door to swapping in different implementations without needing to
touch a ton of code.
RELNOTES: None
PiperOrigin-RevId: 171412555
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings')
3 files changed, 222 insertions, 0 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java new file mode 100644 index 0000000000..e763f70b50 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java @@ -0,0 +1,140 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe.serialization.strings; + +import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import java.io.IOException; +import java.lang.reflect.Field; +import java.nio.charset.StandardCharsets; +import java.security.AccessController; +import java.security.PrivilegedActionException; +import java.security.PrivilegedExceptionAction; +import sun.misc.Unsafe; + +/** + * Similar to {@link StringCodec}, except with deserialization optimized for ascii data. It can + * still handle UTF-8, though less efficiently than {@link StringCodec}. Should be used when the + * majority of the data passing through will be ascii. + */ +class FastStringCodec implements ObjectCodec<String> { + + private static final Unsafe theUnsafe; + private static final long STRING_VALUE_OFFSET; + + private static final String EMPTY_STRING = ""; + + static { + theUnsafe = getUnsafe(); + try { + // String's 'value' field stores its char[]. If this field changes name or type then the + // reflective check below will fail. We can reasonably expect our approach to be stable for + // now, but things are likely to change in java 9, hopefully in a way which obsoletes this + // optimization. + Field valueField = String.class.getDeclaredField("value"); + Class<?> valueFieldType = valueField.getType(); + if (!valueFieldType.equals(char[].class)) { + throw new AssertionError( + "Expected String's value field to be char[], but was " + valueFieldType); + } + STRING_VALUE_OFFSET = theUnsafe.objectFieldOffset(valueField); + } catch (NoSuchFieldException | SecurityException e) { + throw new AssertionError("Failed to find String's 'value' offset", e); + } + } + + @Override + public Class<String> getEncodedClass() { + return String.class; + } + + @Override + public void serialize(String string, CodedOutputStream codedOut) throws IOException { + codedOut.writeStringNoTag(string); + } + + @Override + public String deserialize(CodedInputStream codedIn) throws IOException { + int length = codedIn.readInt32(); + if (length == 0) { + return EMPTY_STRING; + } + + char[] maybeDecoded = new char[length]; + for (int i = 0; i < length; i++) { + // Read one byte at a time to avoid creating a new ByteString/copy of the underlying array. + byte b = codedIn.readRawByte(); + // Check highest order bit, if it's set we've crossed into extended ascii/utf8. + if ((b & 0x80) == 0) { + maybeDecoded[i] = (char) b; + } else { + // Fail, we encountered a non-ascii byte. Copy what we have so far plus and then the rest + // of the data into a buffer and let String's constructor do the UTF-8 decoding work. + byte[] decodeFrom = new byte[length]; + for (int j = 0; j < i; j++) { + decodeFrom[j] = (byte) maybeDecoded[j]; + } + decodeFrom[i] = b; + for (int j = i + 1; j < length; j++) { + decodeFrom[j] = codedIn.readRawByte(); + } + return new String(decodeFrom, StandardCharsets.UTF_8); + } + } + + try { + String result = (String) theUnsafe.allocateInstance(String.class); + theUnsafe.putObject(result, STRING_VALUE_OFFSET, maybeDecoded); + return result; + } catch (Exception e) { + // This should only catch InstantiationException, but that makes IntelliJ unhappy for + // some reason; it insists that that exception cannot be thrown from here, even though it + // is set to JDK 8 + throw new IllegalStateException("Could not create string", e); + } + } + + /** + * Get a reference to {@link sun.misc.Unsafe} or throw an {@link AssertionError} if failing to do + * so. Failure is highly unlikely, but possible if the underlying VM stores unsafe in an + * unexpected location. + */ + private static Unsafe getUnsafe() { + try { + // sun.misc.Unsafe is intentionally difficult to get a hold of - it gives us the power to + // do things like access raw memory and segfault the JVM. + return AccessController.doPrivileged( + new PrivilegedExceptionAction<Unsafe>() { + @Override + public Unsafe run() throws Exception { + Class<Unsafe> unsafeClass = Unsafe.class; + // Unsafe usually exists in the field 'theUnsafe', however check all fields + // in case it's somewhere else in this VM's version of Unsafe. + for (Field f : unsafeClass.getDeclaredFields()) { + f.setAccessible(true); + Object fieldValue = f.get(null); + if (unsafeClass.isInstance(fieldValue)) { + return unsafeClass.cast(fieldValue); + } + } + throw new AssertionError("Failed to find sun.misc.Unsafe instance"); + } + }); + } catch (PrivilegedActionException pae) { + throw new AssertionError("Unable to get sun.misc.Unsafe", pae); + } + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodec.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodec.java new file mode 100644 index 0000000000..8aaba608cc --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodec.java @@ -0,0 +1,39 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe.serialization.strings; + +import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; +import com.google.protobuf.CodedInputStream; +import com.google.protobuf.CodedOutputStream; +import java.io.IOException; + +/** Dead-simple serialization for {@link String}s. */ +class StringCodec implements ObjectCodec<String> { + + @Override + public Class<String> getEncodedClass() { + return String.class; + } + + @Override + public void serialize(String str, CodedOutputStream codedOut) throws IOException { + codedOut.writeStringNoTag(str); + } + + @Override + public String deserialize(CodedInputStream codedIn) throws IOException { + return codedIn.readString(); + } +} diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java new file mode 100644 index 0000000000..24f36ecb07 --- /dev/null +++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/StringCodecs.java @@ -0,0 +1,43 @@ +// Copyright 2017 The Bazel Authors. All rights reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +package com.google.devtools.build.lib.skyframe.serialization.strings; + +import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; + +/** Utility for accessing (potentially platform-specific) {@link String} {@link ObjectCodec}s. */ +public final class StringCodecs { + + private static final FastStringCodec fastStringCodec = new FastStringCodec(); + private static final StringCodec stringCodec = new StringCodec(); + + private StringCodecs() {} + + /** + * Returns singleton instance optimized for almost-always ASCII data. This instance can still + * serialize/deserialize UTF-8 data, but with potentially worse performance than + * {@link #simple()}. + */ + public static ObjectCodec<String> asciiOptimized() { + return fastStringCodec; + } + + /** + * Returns singleton instance of basic implementation. Should be preferred over + * {@link #asciiOptimized()} when a sufficient amount of UTF-8 data is expected. + */ + public static ObjectCodec<String> simple() { + return stringCodec; + } +} |