aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/packages/GlobCache.java
blob: 233a199ec5faef71ae468f2c4b04f49e44426caa (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.packages;

import com.google.common.annotations.VisibleForTesting;
import com.google.common.base.Preconditions;
import com.google.common.base.Predicate;
import com.google.common.base.Throwables;
import com.google.common.collect.Iterables;
import com.google.common.collect.Lists;
import com.google.common.collect.Sets;
import com.google.common.util.concurrent.SettableFuture;
import com.google.devtools.build.lib.cmdline.PackageIdentifier;
import com.google.devtools.build.lib.concurrent.ThreadSafety;
import com.google.devtools.build.lib.util.Pair;
import com.google.devtools.build.lib.vfs.Path;
import com.google.devtools.build.lib.vfs.Symlinks;
import com.google.devtools.build.lib.vfs.UnixGlob;

import java.io.IOException;
import java.util.ArrayList;
import java.util.Collection;
import java.util.HashMap;
import java.util.LinkedHashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.CancellationException;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
import java.util.concurrent.ThreadPoolExecutor;
import java.util.concurrent.atomic.AtomicReference;

/**
 * Caches the results of glob expansion for a package.
 */
  // Used outside of Bazel!
@ThreadSafety.ThreadCompatible
public class GlobCache {
  public static class BadGlobException extends Exception {
    BadGlobException(String message) {
      super(message);
    }
  }

  /**
   * A mapping from glob expressions (e.g. "*.java") to the list of files it
   * matched (in the order returned by VFS) at the time the package was
   * constructed.  Required for sound dependency analysis.
   *
   * We don't use a Multimap because it provides no way to distinguish "key not
   * present" from (key -> {}).
   */
  private final Map<Pair<String, Boolean>, Future<List<Path>>> globCache = new HashMap<>();

  /**
   * The directory in which our package's BUILD file resides.
   */
  private final Path packageDirectory;

  /**
   * The name of the package we belong to.
   */
  private final PackageIdentifier packageId;

  /**
   * The package locator-based directory traversal predicate.
   */
  private final Predicate<Path> childDirectoryPredicate;

  /**
   * System call caching layer.
   */
  private AtomicReference<? extends UnixGlob.FilesystemCalls> syscalls;

  /**
   * The thread pool for glob evaluation.
   */
  private final ThreadPoolExecutor globExecutor;

  /**
   * Create a glob expansion cache.
   * @param packageDirectory globs will be expanded relatively to this
   *                         directory.
   * @param packageId the name of the package this cache belongs to.
   * @param locator the package locator.
   * @param globExecutor thread pool for glob evaluation.
   */
  public GlobCache(final Path packageDirectory,
                   final PackageIdentifier packageId,
                   final CachingPackageLocator locator,
                   AtomicReference<? extends UnixGlob.FilesystemCalls> syscalls,
                   ThreadPoolExecutor globExecutor) {
    this.packageDirectory = Preconditions.checkNotNull(packageDirectory);
    this.packageId = Preconditions.checkNotNull(packageId);
    this.globExecutor = Preconditions.checkNotNull(globExecutor);
    this.syscalls = syscalls == null ? new AtomicReference<>(UnixGlob.DEFAULT_SYSCALLS) : syscalls;

    Preconditions.checkNotNull(locator);
    childDirectoryPredicate = new Predicate<Path>() {
      @Override
      public boolean apply(Path directory) {
        if (directory.equals(packageDirectory)) {
          return true;
        }

        PackageIdentifier subPackageId = new PackageIdentifier(
            packageId.getRepository(),
            packageId.getPackageFragment().getRelative(directory.relativeTo(packageDirectory)));
        UnixGlob.FilesystemCalls syscalls = GlobCache.this.syscalls.get();
        if (syscalls != UnixGlob.DEFAULT_SYSCALLS) {
          // This is needed because in case the BUILD file exists, we do not call readdir() on its
          // directory. However, the package needs to be re-evaluated if the BUILD file is removed.
          // Therefore, we add this BUILD file to our dependencies by statting it through the
          // recording syscall object so that the BUILD file itself is added to the dependencies of
          // this package.
          //
          // The stat() call issued by locator.getBuildFileForPackage() does not quite cut it
          // because 1. it is cached so there may not be a stat() call at all and 2. even if there
          // is, it does not go through the proxy in GlobCache.this.syscalls.
          //
          // Note that this does not cause any significant slowdown; the BUILD file cache will have
          // already evaluated the very same stat, so we don't pay any I/O cost, only a cache
          // lookup.
          syscalls.statNullable(directory.getChild("BUILD"), Symlinks.FOLLOW);
        }

        return locator.getBuildFileForPackage(subPackageId) == null;
      }
    };
  }

  /**
   * Returns the future result of evaluating glob "pattern" against this
   * package's directory, using the package's cache of previously-started
   * globs if possible.
   *
   * @return the list of paths matching the pattern, relative to the package's
   *   directory.
   * @throws BadGlobException if the glob was syntactically invalid, or
   *  contained uplevel references.
   */
  Future<List<Path>> getGlobAsync(String pattern, boolean excludeDirs)
      throws BadGlobException {
    Future<List<Path>> cached = globCache.get(Pair.of(pattern, excludeDirs));
    if (cached == null) {
      cached = safeGlob(pattern, excludeDirs);
      setGlobPaths(pattern, excludeDirs, cached);
    }
    return cached;
  }

  @VisibleForTesting
  List<String> getGlob(String pattern)
      throws IOException, BadGlobException, InterruptedException {
    return getGlob(pattern, false);
  }

  @VisibleForTesting
  protected List<String> getGlob(String pattern, boolean excludeDirs)
      throws IOException, BadGlobException, InterruptedException {
    Future<List<Path>> futureResult = getGlobAsync(pattern, excludeDirs);
    List<Path> globPaths = fromFuture(futureResult);
    // Replace the UnixGlob.GlobFuture with a completed future object, to allow
    // garbage collection of the GlobFuture and GlobVisitor objects.
    if (!(futureResult instanceof SettableFuture<?>)) {
      SettableFuture<List<Path>> completedFuture = SettableFuture.create();
      completedFuture.set(globPaths);
      globCache.put(Pair.of(pattern, excludeDirs), completedFuture);
    }

    List<String> result = Lists.newArrayListWithCapacity(globPaths.size());
    for (Path path : globPaths) {
      String relative = path.relativeTo(packageDirectory).getPathString();
      // Don't permit "" (meaning ".") in the glob expansion, since it's
      // invalid as a label, plus users should say explicitly if they
      // really want to name the package directory.
      if (!relative.isEmpty()) {
        result.add(relative);
      }
    }
    return result;
  }

  /**
   * Adds glob entries to the cache, making sure they are sorted first.
   */
  @VisibleForTesting
  void setGlobPaths(String pattern, boolean excludeDirectories, Future<List<Path>> result) {
    globCache.put(Pair.of(pattern, excludeDirectories), result);
  }

  /**
   * Actually execute a glob against the filesystem.  Otherwise similar to
   * getGlob().
   */
  @VisibleForTesting
  Future<List<Path>> safeGlob(String pattern, boolean excludeDirs) throws BadGlobException {
    // Forbidden patterns:
    if (pattern.indexOf('?') != -1) {
      throw new BadGlobException("glob pattern '" + pattern + "' contains forbidden '?' wildcard");
    }
    // Patterns forbidden by UnixGlob library:
    String error = UnixGlob.checkPatternForError(pattern);
    if (error != null) {
      throw new BadGlobException(error + " (in glob pattern '" + pattern + "')");
    }
    return UnixGlob.forPath(packageDirectory)
        .addPattern(pattern)
        .setExcludeDirectories(excludeDirs)
        .setDirectoryFilter(childDirectoryPredicate)
        .setThreadPool(globExecutor)
        .setFilesystemCalls(syscalls)
        .globAsync(true);
  }

  /**
   * Sanitize the future exceptions - the only expected checked exception
   * is IOException.
   */
  private static List<Path> fromFuture(Future<List<Path>> future)
      throws IOException, InterruptedException {
    try {
      return future.get();
    } catch (ExecutionException e) {
      Throwable cause = e.getCause();
      Throwables.propagateIfPossible(cause,
          IOException.class, InterruptedException.class);
      throw new RuntimeException(e);
    }
  }

  /**
   * Returns true iff all this package's globs are up-to-date.  That is,
   * re-evaluating the package's BUILD file at this moment would yield an
   * equivalent Package instance.  (This call requires filesystem I/O to
   * re-evaluate the globs.)
   */
  public boolean globsUpToDate() throws InterruptedException {
    // Start all globs in parallel.
    Map<Pair<String, Boolean>, Future<List<Path>>> newGlobs = new HashMap<>();
    try {
      for (Map.Entry<Pair<String, Boolean>, Future<List<Path>>> entry : globCache.entrySet()) {
        Pair<String, Boolean> key = entry.getKey();
        try {
          newGlobs.put(key, safeGlob(key.first, key.second));
        } catch (BadGlobException e) {
          return false;
        }
      }

      for (Map.Entry<Pair<String, Boolean>, Future<List<Path>>> entry : globCache.entrySet()) {
        try {
          Pair<String, Boolean> key = entry.getKey();
          List<Path> newGlob = fromFuture(newGlobs.get(key));
          List<Path> oldGlob = fromFuture(entry.getValue());
          if (!oldGlob.equals(newGlob)) {
            return false;
          }
        } catch (IOException e) {
          return false;
        }
      }

      return true;
    } finally {
      finishBackgroundTasks(newGlobs.values());
    }
  }

  /**
   * Evaluate the build language expression "glob(includes, excludes)" in the
   * context of this package.
   *
   * <p>Called by PackageFactory via Package.
   */
  public List<String> glob(List<String> includes, List<String> excludes, boolean excludeDirs)
      throws IOException, BadGlobException, InterruptedException {
    // Start globbing all patterns in parallel. The getGlob() calls below will
    // block on an individual pattern's results, but the other globs can
    // continue in the background.
    for (String pattern : Iterables.concat(includes, excludes)) {
      getGlobAsync(pattern, excludeDirs);
    }

    LinkedHashSet<String> results = Sets.newLinkedHashSetWithExpectedSize(includes.size());
    for (String pattern : includes) {
      results.addAll(getGlob(pattern, excludeDirs));
    }
    for (String pattern : excludes) {
      results.removeAll(getGlob(pattern, excludeDirs));
    }

    Preconditions.checkState(!results.contains(null), "glob returned null");
    return new ArrayList<>(results);
  }

  public Set<Pair<String, Boolean>> getKeySet() {
    return globCache.keySet();
  }

  /**
   * Block on the completion of all potentially-abandoned background tasks.
   */
  public void finishBackgroundTasks() {
    finishBackgroundTasks(globCache.values());
  }

  public void cancelBackgroundTasks() {
    cancelBackgroundTasks(globCache.values());
  }

  private static void finishBackgroundTasks(Collection<Future<List<Path>>> tasks) {
    for (Future<List<Path>> task : tasks) {
      try {
        fromFuture(task);
      } catch (CancellationException | IOException | InterruptedException e) {
        // Ignore: If this was still going on in the background, some other
        // failure already occurred.
      }
    }
  }

  private static void cancelBackgroundTasks(Collection<Future<List<Path>>> tasks) {
    for (Future<List<Path>> task : tasks) {
      task.cancel(true);
    }

    for (Future<List<Path>> task : tasks) {
      try {
        task.get();
      } catch (CancellationException | ExecutionException | InterruptedException e) {
        // We don't care. Point is, the task does not bother us anymore.
      }
    }
  }

  @Override
  public String toString() {
    return "GlobCache for " + packageId + " in " + packageDirectory;
  }
}