aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/skyframe/serialization/strings/FastStringCodec.java
blob: f4b44c15687a07ffb0fa6ae2da90ce76be0993e2 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
// Copyright 2017 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package com.google.devtools.build.lib.skyframe.serialization.strings;

import com.google.common.base.Preconditions;
import com.google.devtools.build.lib.skyframe.serialization.ObjectCodec;
import com.google.protobuf.CodedInputStream;
import com.google.protobuf.CodedOutputStream;
import java.io.IOException;
import java.lang.reflect.Field;
import java.nio.charset.StandardCharsets;
import java.security.AccessController;
import java.security.PrivilegedActionException;
import java.security.PrivilegedExceptionAction;
import sun.misc.Unsafe;

/**
 * Similar to {@link StringCodec}, except with deserialization optimized for ascii data. It can
 * still handle UTF-8, though less efficiently than {@link StringCodec}. Should be used when the
 * majority of the data passing through will be ascii.
 *
 * <p>Users <b>MUST</b> check if this class is usable by checking {@link #isAvailable()}.
 */
class FastStringCodec implements ObjectCodec<String> {

  /** Sentinel value for missing {@link #STRING_VALUE_OFFSET}. */
  private static final long UNSUPPORTED_STRING_VALUE_OFFSET = -1;

  private static final Unsafe theUnsafe;
  private static final long STRING_VALUE_OFFSET;

  private static final String EMPTY_STRING = "";

  static {
    theUnsafe = getUnsafe();
    STRING_VALUE_OFFSET = getStringValueOffset();
  }

  /** Returns whether or not this implementation is supported. */
  static boolean isAvailable() {
    return STRING_VALUE_OFFSET != UNSUPPORTED_STRING_VALUE_OFFSET;
  }

  FastStringCodec() {
    Preconditions.checkState(isAvailable(), "FastStringCodec isn't available!");
  }

  @Override
  public Class<String> getEncodedClass() {
    return String.class;
  }

  @Override
  public void serialize(String string, CodedOutputStream codedOut) throws IOException {
    codedOut.writeStringNoTag(string);
  }

  @Override
  public String deserialize(CodedInputStream codedIn) throws IOException {
    int length = codedIn.readInt32();
    if (length == 0) {
      return EMPTY_STRING;
    }

    char[] maybeDecoded = new char[length];
    for (int i = 0; i < length; i++) {
      // Read one byte at a time to avoid creating a new ByteString/copy of the underlying array.
      byte b = codedIn.readRawByte();
      // Check highest order bit, if it's set we've crossed into extended ascii/utf8.
      if ((b & 0x80) == 0) {
        maybeDecoded[i] = (char) b;
      } else {
        // Fail, we encountered a non-ascii byte. Copy what we have so far plus and then the rest
        // of the data into a buffer and let String's constructor do the UTF-8 decoding work.
        byte[] decodeFrom = new byte[length];
        for (int j = 0; j < i; j++) {
          decodeFrom[j] = (byte) maybeDecoded[j];
        }
        decodeFrom[i] = b;
        for (int j = i + 1; j < length; j++) {
          decodeFrom[j] = codedIn.readRawByte();
        }
        return new String(decodeFrom, StandardCharsets.UTF_8);
      }
    }

    try {
      String result = (String) theUnsafe.allocateInstance(String.class);
      theUnsafe.putObject(result, STRING_VALUE_OFFSET, maybeDecoded);
      return result;
    } catch (Exception e) {
      // This should only catch InstantiationException, but that makes IntelliJ unhappy for
      // some reason; it insists that that exception cannot be thrown from here, even though it
      // is set to JDK 8
      throw new IllegalStateException("Could not create string", e);
    }
  }

  /**
   * Get a reference to {@link sun.misc.Unsafe} or throw an {@link AssertionError} if failing to do
   * so. Failure is highly unlikely, but possible if the underlying VM stores unsafe in an
   * unexpected location.
   */
  private static Unsafe getUnsafe() {
    try {
      // sun.misc.Unsafe is intentionally difficult to get a hold of - it gives us the power to
      // do things like access raw memory and segfault the JVM.
      return AccessController.doPrivileged(
          new PrivilegedExceptionAction<Unsafe>() {
            @Override
            public Unsafe run() throws Exception {
              Class<Unsafe> unsafeClass = Unsafe.class;
              // Unsafe usually exists in the field 'theUnsafe', however check all fields
              // in case it's somewhere else in this VM's version of Unsafe.
              for (Field f : unsafeClass.getDeclaredFields()) {
                f.setAccessible(true);
                Object fieldValue = f.get(null);
                if (unsafeClass.isInstance(fieldValue)) {
                  return unsafeClass.cast(fieldValue);
                }
              }
              throw new AssertionError("Failed to find sun.misc.Unsafe instance");
            }
          });
    } catch (PrivilegedActionException pae) {
      throw new AssertionError("Unable to get sun.misc.Unsafe", pae);
    }
  }

  private static long getStringValueOffset() {
    try {
      // We expect a String's value field to be a char[] - if that's not the case then we're
      // probably on a more modern JDK and this optimization isn't available.
      Field valueField = String.class.getDeclaredField("value");
      Class<?> valueFieldType = valueField.getType();
      if (valueFieldType.equals(char[].class)) {
        return theUnsafe.objectFieldOffset(valueField);
      } else {
        // value was of a different type, bail.
        return UNSUPPORTED_STRING_VALUE_OFFSET;
      }
    } catch (NoSuchFieldException | SecurityException e) {
      throw new AssertionError("Failed to find String's 'value' field/offset", e);
    }
  }
}