aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools
diff options
context:
space:
mode:
authorGravatar janakr <janakr@google.com>2018-04-12 13:45:17 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-04-12 13:46:48 -0700
commitac9ba406a5bb5dd8663c45986a5413bd5dd3235d (patch)
tree71ece2dc48c4a6b0cb571743ae59b471ffacc876 /src/main/java/com/google/devtools
parentd60869c57389b521427ab7ffa1d555de7c133d84 (diff)
Prefix-compress strings during memoizing serialization.
PiperOrigin-RevId: 192662977
Diffstat (limited to 'src/main/java/com/google/devtools')
-rw-r--r--src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java84
1 files changed, 82 insertions, 2 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
index c60b62fb5f..234c74f7d7 100644
--- a/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
+++ b/src/main/java/com/google/devtools/build/lib/skyframe/serialization/Memoizer.java
@@ -135,6 +135,8 @@ class Memoizer {
/** A context for serializing; wraps a memo table. Not thread-safe. */
static class Serializer {
private final SerializingMemoTable memo = new SerializingMemoTable();
+ @Nullable private String lastString = null;
+
/**
* Serializes an object using the given codec and current memo table state.
*
@@ -149,18 +151,59 @@ class Memoizer {
throws SerializationException, IOException {
MemoizationStrategy strategy = codec.getStrategy();
if (strategy == MemoizationStrategy.DO_NOT_MEMOIZE) {
- codec.serialize(context, obj, codedOut);
+ // TODO(janakr): there is no reason this is limited to the DO_NOT_MEMOIZE case, but we don't
+ // memoize Strings, so putting the code here saves a tiny bit of work in the other cases. If
+ // the StringCodec#getStrategy changes, this block of code will have to move.
+ if (!maybeEmitString(context, obj, codec, codedOut)) {
+ codec.serialize(context, obj, codedOut);
+ }
} else {
// The caller already checked the table, so this is definitely a new value.
serializeMemoContent(context, obj, codec, codedOut, strategy);
}
}
+ private <T> boolean maybeEmitString(
+ SerializationContext context,
+ T obj,
+ ObjectCodec<? super T> codec,
+ CodedOutputStream codedOut)
+ throws SerializationException, IOException {
+ if (!(obj instanceof String)) {
+ return false;
+ }
+ int commonPrefixLen = -1;
+ String str = (String) obj;
+ if (lastString != null) {
+ commonPrefixLen = commonPrefixLen(str, lastString);
+ if (commonPrefixLen != 0) {
+ @SuppressWarnings("unchecked")
+ T checkObj = (T) codec.getEncodedClass().cast(str.substring(commonPrefixLen));
+ obj = checkObj;
+ }
+ }
+ lastString = str;
+ codec.serialize(context, obj, codedOut);
+ if (commonPrefixLen > -1) {
+ codedOut.writeInt32NoTag(commonPrefixLen);
+ }
+ return true;
+ }
+
@Nullable
Integer getMemoizedIndex(Object obj) {
return memo.lookupNullable(obj);
}
+ private static int commonPrefixLen(String first, String second) {
+ int shared = 0;
+ int max = Math.min(first.length(), second.length());
+ while (shared < max && first.charAt(shared) == second.charAt(shared)) {
+ ++shared;
+ }
+ return shared;
+ }
+
// Corresponds to MemoContent in the abstract grammar.
private <T> void serializeMemoContent(
SerializationContext context,
@@ -224,6 +267,7 @@ class Memoizer {
*/
static class Deserializer {
private final DeserializingMemoTable memo = new DeserializingMemoTable();
+ @Nullable private String lastString = null;
@Nullable private Integer tagForMemoizedBefore = null;
private final Deque<Object> memoizedBeforeStackForSanityChecking = new ArrayDeque<>();
@@ -243,7 +287,7 @@ class Memoizer {
codec);
MemoizationStrategy strategy = codec.getStrategy();
if (strategy == MemoizationStrategy.DO_NOT_MEMOIZE) {
- return codec.deserialize(context, codedIn);
+ return maybeTransformString(codec.deserialize(context, codedIn), codec, codedIn);
} else {
switch (strategy) {
case MEMOIZE_BEFORE:
@@ -256,6 +300,42 @@ class Memoizer {
}
}
+ private <T> T maybeTransformString(
+ T value, ObjectCodec<? extends T> codec, CodedInputStream codedIn) throws IOException {
+ if (!(value instanceof String)) {
+ return value;
+ }
+ String str = (String) value;
+ if (lastString != null) {
+ int commonPrefixLen = codedIn.readInt32();
+ Preconditions.checkState(
+ commonPrefixLen > -1, "Bad data for %s and %s (%s)", str, lastString, commonPrefixLen);
+ if (commonPrefixLen > 0) {
+ int lastLen = lastString.length();
+ Preconditions.checkState(
+ lastLen >= commonPrefixLen,
+ "Bad data for %s (%s and %s)",
+ str,
+ lastString,
+ commonPrefixLen);
+ if (str.isEmpty()) {
+ // This is a substring or the same string. Save some garbage by re-using if possible.
+ if (commonPrefixLen < lastLen) {
+ str = lastString.substring(0, commonPrefixLen);
+ } else {
+ // commonPrefixLen == lastLen.
+ str = lastString;
+ }
+ } else {
+ str = lastString.substring(0, commonPrefixLen) + str;
+ }
+ value = codec.getEncodedClass().cast(str);
+ }
+ }
+ lastString = str;
+ return value;
+ }
+
Object getMemoized(int memoIndex) {
return Preconditions.checkNotNull(memo.lookup(memoIndex), memoIndex);
}