// Copyright 2017 The Bazel Authors. All rights reserved. // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. package com.google.devtools.build.lib.skyframe.serialization.strings; import com.google.common.base.Preconditions; import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec; import com.google.protobuf.CodedInputStream; import com.google.protobuf.CodedOutputStream; import java.io.IOException; import java.lang.reflect.Field; import java.nio.charset.StandardCharsets; import java.security.AccessController; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; import sun.misc.Unsafe; /** * Similar to {@link StringCodec}, except with deserialization optimized for ascii data. It can * still handle UTF-8, though less efficiently than {@link StringCodec}. Should be used when the * majority of the data passing through will be ascii. * *

Users MUST check if this class is usable by checking {@link #isAvailable()}. */ class FastStringCodec implements ObjectCodec { /** Sentinel value for missing {@link #STRING_VALUE_OFFSET}. */ private static final long UNSUPPORTED_STRING_VALUE_OFFSET = -1; private static final Unsafe theUnsafe; private static final long STRING_VALUE_OFFSET; private static final String EMPTY_STRING = ""; static { theUnsafe = getUnsafe(); STRING_VALUE_OFFSET = getStringValueOffset(); } /** Returns whether or not this implementation is supported. */ static boolean isAvailable() { return STRING_VALUE_OFFSET != UNSUPPORTED_STRING_VALUE_OFFSET; } FastStringCodec() { Preconditions.checkState(isAvailable(), "FastStringCodec isn't available!"); } @Override public Class getEncodedClass() { return String.class; } @Override public void serialize(String string, CodedOutputStream codedOut) throws IOException { codedOut.writeStringNoTag(string); } @Override public String deserialize(CodedInputStream codedIn) throws IOException { int length = codedIn.readInt32(); if (length == 0) { return EMPTY_STRING; } char[] maybeDecoded = new char[length]; for (int i = 0; i < length; i++) { // Read one byte at a time to avoid creating a new ByteString/copy of the underlying array. byte b = codedIn.readRawByte(); // Check highest order bit, if it's set we've crossed into extended ascii/utf8. if ((b & 0x80) == 0) { maybeDecoded[i] = (char) b; } else { // Fail, we encountered a non-ascii byte. Copy what we have so far plus and then the rest // of the data into a buffer and let String's constructor do the UTF-8 decoding work. byte[] decodeFrom = new byte[length]; for (int j = 0; j < i; j++) { decodeFrom[j] = (byte) maybeDecoded[j]; } decodeFrom[i] = b; for (int j = i + 1; j < length; j++) { decodeFrom[j] = codedIn.readRawByte(); } return new String(decodeFrom, StandardCharsets.UTF_8); } } try { String result = (String) theUnsafe.allocateInstance(String.class); theUnsafe.putObject(result, STRING_VALUE_OFFSET, maybeDecoded); return result; } catch (Exception e) { // This should only catch InstantiationException, but that makes IntelliJ unhappy for // some reason; it insists that that exception cannot be thrown from here, even though it // is set to JDK 8 throw new IllegalStateException("Could not create string", e); } } /** * Get a reference to {@link sun.misc.Unsafe} or throw an {@link AssertionError} if failing to do * so. Failure is highly unlikely, but possible if the underlying VM stores unsafe in an * unexpected location. */ private static Unsafe getUnsafe() { try { // sun.misc.Unsafe is intentionally difficult to get a hold of - it gives us the power to // do things like access raw memory and segfault the JVM. return AccessController.doPrivileged( new PrivilegedExceptionAction() { @Override public Unsafe run() throws Exception { Class unsafeClass = Unsafe.class; // Unsafe usually exists in the field 'theUnsafe', however check all fields // in case it's somewhere else in this VM's version of Unsafe. for (Field f : unsafeClass.getDeclaredFields()) { f.setAccessible(true); Object fieldValue = f.get(null); if (unsafeClass.isInstance(fieldValue)) { return unsafeClass.cast(fieldValue); } } throw new AssertionError("Failed to find sun.misc.Unsafe instance"); } }); } catch (PrivilegedActionException pae) { throw new AssertionError("Unable to get sun.misc.Unsafe", pae); } } private static long getStringValueOffset() { try { // We expect a String's value field to be a char[] - if that's not the case then we're // probably on a more modern JDK and this optimization isn't available. Field valueField = String.class.getDeclaredField("value"); Class valueFieldType = valueField.getType(); if (valueFieldType.equals(char[].class)) { return theUnsafe.objectFieldOffset(valueField); } else { // value was of a different type, bail. return UNSUPPORTED_STRING_VALUE_OFFSET; } } catch (NoSuchFieldException | SecurityException e) { throw new AssertionError("Failed to find String's 'value' field/offset", e); } } }