aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/actions/cache/CompactPersistentActionCache.java
blob: 3780b4e55f95407517892cbf42d1f1a0a7db029e (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.actions.cache;

import static java.nio.charset.StandardCharsets.ISO_8859_1;

import com.google.common.base.Preconditions;
import com.google.common.collect.ImmutableList;
import com.google.devtools.build.lib.concurrent.ThreadSafety.ConditionallyThreadSafe;
import com.google.devtools.build.lib.util.Clock;
import com.google.devtools.build.lib.util.CompactStringIndexer;
import com.google.devtools.build.lib.util.PersistentMap;
import com.google.devtools.build.lib.util.StringIndexer;
import com.google.devtools.build.lib.util.VarInt;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.UnixGlob;

import java.io.ByteArrayOutputStream;
import java.io.DataInputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.PrintStream;
import java.nio.BufferUnderflowException;
import java.nio.ByteBuffer;
import java.util.Collection;
import java.util.HashMap;
import java.util.Map;

/**
 * An implementation of the ActionCache interface that uses
 * {@link CompactStringIndexer} to reduce memory footprint and saves
 * cached actions using the {@link PersistentMap}.
 *
 * <p>This cache is not fully correct: as hashes are xor'd together, a permutation of input
 * file contents will erroneously be considered up to date.
 */
@ConditionallyThreadSafe // condition: each instance must instantiated with
                         // different cache root
public class CompactPersistentActionCache implements ActionCache {
  private static final int SAVE_INTERVAL_SECONDS = 3;
  private static final long NANOS_PER_SECOND = 1000 * 1000 * 1000;

  // Key of the action cache record that holds information used to verify referential integrity
  // between action cache and string indexer. Must be < 0 to avoid conflict with real action
  // cache records.
  private static final int VALIDATION_KEY = -10;

  private static final int VERSION = 10;

  private final class ActionMap extends PersistentMap<Integer, byte[]> {
    private final Clock clock;
    private long nextUpdate;

    public ActionMap(Map<Integer, byte[]> map, Clock clock, Path mapFile, Path journalFile)
        throws IOException {
      super(VERSION, map, mapFile, journalFile);
      this.clock = clock;
      // Using nanoTime. currentTimeMillis may not provide enough granularity.
      nextUpdate = clock.nanoTime() / NANOS_PER_SECOND + SAVE_INTERVAL_SECONDS;
      load();
    }

    @Override
    protected boolean updateJournal() {
      // Using nanoTime. currentTimeMillis may not provide enough granularity.
      long time = clock.nanoTime() / NANOS_PER_SECOND;
      if (SAVE_INTERVAL_SECONDS == 0 || time > nextUpdate) {
        nextUpdate = time + SAVE_INTERVAL_SECONDS;
        // Force flushing of the PersistentStringIndexer instance. This is needed to ensure
        // that filename index data on disk is always up-to-date when we save action cache
        // data.
        indexer.flush();
        return true;
      }
      return false;
    }

    @Override
    protected boolean keepJournal() {
      // We must first flush the journal to get an accurate measure of its size.
      forceFlush();
      try {
        return journalSize() * 100 < cacheSize();
      } catch (IOException e) {
        return false;
      }
    }

    @Override
    protected Integer readKey(DataInputStream in) throws IOException {
      return in.readInt();
    }

    @Override
    protected byte[] readValue(DataInputStream in)
        throws IOException {
      int size = in.readInt();
      if (size < 0) {
        throw new IOException("found negative array size: " + size);
      }
      byte[] data = new byte[size];
      in.readFully(data);
      return data;
    }

    @Override
    protected void writeKey(Integer key, DataOutputStream out)
        throws IOException {
      out.writeInt(key);
    }

    @Override
    // TODO(bazel-team): (2010) This method, writeKey() and related Metadata methods
    // should really use protocol messages. Doing so would allow easy inspection
    // of the action cache content and, more importantly, would cut down on the
    // need to change VERSION to different number every time we touch those
    // methods. Especially when we'll start to add stuff like statistics for
    // each action.
    protected void writeValue(byte[] value, DataOutputStream out)
        throws IOException {
      out.writeInt(value.length);
      out.write(value);
    }
  }

  private final PersistentMap<Integer, byte[]> map;
  private final PersistentStringIndexer indexer;
  static final ActionCache.Entry CORRUPTED = new ActionCache.Entry(null);

  public CompactPersistentActionCache(Path cacheRoot, Clock clock) throws IOException {
    Path cacheFile = cacheFile(cacheRoot);
    Path journalFile = journalFile(cacheRoot);
    Path indexFile = cacheRoot.getChild("filename_index_v" + VERSION + ".blaze");
    // we can now use normal hash map as backing map, since dependency checker
    // will manually purge records from the action cache.
    Map<Integer, byte[]> backingMap = new HashMap<>();

    try {
      indexer = PersistentStringIndexer.newPersistentStringIndexer(indexFile, clock);
    } catch (IOException e) {
      renameCorruptedFiles(cacheRoot);
      throw new IOException("Failed to load filename index data", e);
    }

    try {
      map = new ActionMap(backingMap, clock, cacheFile, journalFile);
    } catch (IOException e) {
      renameCorruptedFiles(cacheRoot);
      throw new IOException("Failed to load action cache data", e);
    }

    // Validate referential integrity between two collections.
    if (!map.isEmpty()) {
      String integrityError = validateIntegrity(indexer.size(), map.get(VALIDATION_KEY));
      if (integrityError != null) {
        renameCorruptedFiles(cacheRoot);
        throw new IOException("Failed action cache referential integrity check: " + integrityError);
      }
    }
  }

  /**
   * Rename corrupted files so they could be analyzed later. This would also ensure
   * that next initialization attempt will create empty cache.
   */
  private static void renameCorruptedFiles(Path cacheRoot) {
    try {
      for (Path path : UnixGlob.forPath(cacheRoot).addPattern("action_*_v" + VERSION + ".*")
          .glob()) {
        path.renameTo(path.getParentDirectory().getChild(path.getBaseName() + ".bad"));
      }
      for (Path path : UnixGlob.forPath(cacheRoot).addPattern("filename_*_v" + VERSION + ".*")
          .glob()) {
        path.renameTo(path.getParentDirectory().getChild(path.getBaseName() + ".bad"));
      }
    } catch (IOException e) {
      // do nothing
    }
  }

  /**
   * @return non-null error description if indexer contains no data or integrity check has failed,
   *     and null otherwise
   */
  private static String validateIntegrity(int indexerSize, byte[] validationRecord) {
    if (indexerSize == 0) {
      return "empty index";
    }
    if (validationRecord == null) {
      return "no validation record";
    }
    try {
      int validationSize = ByteBuffer.wrap(validationRecord).asIntBuffer().get();
      if (validationSize <= indexerSize) {
        return null;
      } else {
        return String.format("Validation mismatch: validation entry %d is too large " +
                             "compared to index size %d", validationSize, indexerSize);
      }
    } catch (BufferUnderflowException e) {
      return e.getMessage();
    }
  }

  public static Path cacheFile(Path cacheRoot) {
    return cacheRoot.getChild("action_cache_v" + VERSION + ".blaze");
  }

  public static Path journalFile(Path cacheRoot) {
    return cacheRoot.getChild("action_journal_v" + VERSION + ".blaze");
  }

  @Override
  public ActionCache.Entry createEntry(String key) {
    return new ActionCache.Entry(key);
  }

  @Override
  public ActionCache.Entry get(String key) {
    int index = indexer.getIndex(key);
    if (index < 0) {
      return null;
    }
    byte[] data;
    synchronized (this) {
      data = map.get(index);
    }
    try {
      return data != null ? CompactPersistentActionCache.decode(indexer, data) : null;
    } catch (IOException e) {
      // return entry marked as corrupted.
      return CORRUPTED;
    }
  }

  @Override
  public void put(String key, ActionCache.Entry entry) {
    // Encode record. Note that both methods may create new mappings in the indexer.
    int index = indexer.getOrCreateIndex(key);
    byte[] content = encode(indexer, entry);

    // Update validation record.
    ByteBuffer buffer = ByteBuffer.allocate(4); // size of int in bytes
    int indexSize = indexer.size();
    buffer.asIntBuffer().put(indexSize);

    // Note the benign race condition here in which two threads might race on
    // updating the VALIDATION_KEY. If the most recent update loses the race,
    // a value lower than the indexer size will remain in the validation record.
    // This will still pass the integrity check.
    synchronized (this) {
      map.put(VALIDATION_KEY, buffer.array());
      // Now update record itself.
      map.put(index, content);
    }
  }

  @Override
  public synchronized void remove(String key) {
    map.remove(indexer.getIndex(key));
  }

  @Override
  public synchronized long save() throws IOException {
    long indexSize = indexer.save();
    long mapSize = map.save();
    return indexSize + mapSize;
  }

  @Override
  public synchronized String toString() {
    StringBuilder builder = new StringBuilder();
    // map.size() - 1 to avoid counting the validation key.
    builder.append("Action cache (" + (map.size() - 1) + " records):\n");
    int size = map.size() > 1000 ? 10 : map.size();
    int ct = 0;
    for (Map.Entry<Integer, byte[]> entry: map.entrySet()) {
      if (entry.getKey() == VALIDATION_KEY) { continue; }
      String content;
      try {
        content = decode(indexer, entry.getValue()).toString();
      } catch (IOException e) {
        content = e + "\n";
      }
      builder.append("-> ").append(indexer.getStringForIndex(entry.getKey())).append("\n")
          .append(content).append("  packed_len = ").append(entry.getValue().length).append("\n");
      if (++ct > size) {
        builder.append("...");
        break;
      }
    }
    return builder.toString();
  }

  /**
   * Dumps action cache content.
   */
  @Override
  public synchronized void dump(PrintStream out) {
    out.println("String indexer content:\n");
    out.println(indexer);
    out.println("Action cache (" + map.size() + " records):\n");
    for (Map.Entry<Integer, byte[]> entry: map.entrySet()) {
      if (entry.getKey() == VALIDATION_KEY) { continue; }
      String content;
      try {
        content = CompactPersistentActionCache.decode(indexer, entry.getValue()).toString();
      } catch (IOException e) {
        content = e + "\n";
      }
      out.println(entry.getKey() + ", " + indexer.getStringForIndex(entry.getKey()) + ":\n"
          +  content + "\n      packed_len = " + entry.getValue().length + "\n");
    }
  }

  /**
   * @return action data encoded as a byte[] array.
   */
  private static byte[] encode(StringIndexer indexer, ActionCache.Entry entry) {
    Preconditions.checkState(!entry.isCorrupted());

    try {
      byte[] actionKeyBytes = entry.getActionKey().getBytes(ISO_8859_1);
      Collection<String> files = entry.getPaths();

      // Estimate the size of the buffer:
      //   5 bytes max for the actionKey length
      // + the actionKey itself
      // + 16 bytes for the digest
      // + 5 bytes max for the file list length
      // + 5 bytes max for each file id
      int maxSize = VarInt.MAX_VARINT_SIZE + actionKeyBytes.length + Digest.MD5_SIZE
          + VarInt.MAX_VARINT_SIZE + files.size() * VarInt.MAX_VARINT_SIZE;
      ByteArrayOutputStream sink = new ByteArrayOutputStream(maxSize);

      VarInt.putVarInt(actionKeyBytes.length, sink);
      sink.write(actionKeyBytes);

      entry.getFileDigest().write(sink);

      VarInt.putVarInt(files.size(), sink);
      for (String file : files) {
        VarInt.putVarInt(indexer.getOrCreateIndex(file), sink);
      }
      return sink.toByteArray();
    } catch (IOException e) {
      // This Exception can never be thrown by ByteArrayOutputStream.
      throw new AssertionError(e);
    }
  }

  /**
   * Creates new action cache entry using given compressed entry data. Data
   * will stay in the compressed format until entry is actually used by the
   * dependency checker.
   */
  private static ActionCache.Entry decode(StringIndexer indexer, byte[] data) throws IOException {
    try {
      ByteBuffer source = ByteBuffer.wrap(data);

      byte[] actionKeyBytes = new byte[VarInt.getVarInt(source)];
      source.get(actionKeyBytes);
      String actionKey = new String(actionKeyBytes, ISO_8859_1);

      Digest digest = Digest.read(source);

      int count = VarInt.getVarInt(source);
      ImmutableList.Builder<String> builder = new ImmutableList.Builder<>();
      for (int i = 0; i < count; i++) {
        int id = VarInt.getVarInt(source);
        String filename = (id >= 0 ? indexer.getStringForIndex(id) : null);
        if (filename == null) {
          throw new IOException("Corrupted file index");
        }
        builder.add(filename);
      }
      if (source.remaining() > 0) {
        throw new IOException("serialized entry data has not been fully decoded");
      }
      return new Entry(actionKey, builder.build(), digest);
    } catch (BufferUnderflowException e) {
      throw new IOException("encoded entry data is incomplete", e);
    }
  }
}