aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java
blob: def46d8983d9567b34ca290921f890c5186ee043 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
// Copyright 2014 The Bazel Authors. All rights reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//    http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package com.google.devtools.build.lib.profiler;

import static com.google.devtools.build.lib.profiler.ProfilerTask.TASK_COUNT;

import com.google.common.base.Preconditions;
import com.google.common.collect.Iterables;
import com.google.devtools.build.lib.util.Clock;
import com.google.devtools.build.lib.util.VarInt;

import java.io.BufferedOutputStream;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.OutputStream;
import java.nio.ByteBuffer;
import java.util.ArrayList;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
import java.util.PriorityQueue;
import java.util.Queue;
import java.util.Timer;
import java.util.TimerTask;
import java.util.concurrent.ConcurrentLinkedQueue;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;

/**
 * Blaze internal profiler. Provides facility to report various Blaze tasks and
 * store them (asynchronously) in the file for future analysis.
 * <p>
 * Implemented as singleton so any caller should use Profiler.instance() to
 * obtain reference.
 * <p>
 * Internally, profiler uses two data structures - ThreadLocal task stack to track
 * nested tasks and single ConcurrentLinkedQueue to gather all completed tasks.
 * <p>
 * Also, due to the nature of the provided functionality (instrumentation of all
 * Blaze components), build.lib.profiler package will be used by almost every
 * other Blaze package, so special attention should be paid to avoid any
 * dependencies on the rest of the Blaze code, including build.lib.util and
 * build.lib.vfs. This is important because build.lib.util and build.lib.vfs
 * contain Profiler invocations and any dependency on those two packages would
 * create circular relationship.
 * <p>
 * All gathered instrumentation data will be stored in the file. Please, note,
 * that while file format is described here it is considered internal and can
 * change at any time. For scripting, using blaze analyze-profile --dump=raw
 * would be more robust and stable solution.
 * <p>
 * <pre>
 * Profiler file consists of the deflated stream with following overall structure:
 *   HEADER
 *   TASK_TYPE_TABLE
 *   TASK_RECORD...
 *   EOF_MARKER
 *
 * HEADER:
 *   int32: magic token (Profiler.MAGIC)
 *   int32: version format (Profiler.VERSION)
 *   string: file comment
 *
 * TASK_TYPE_TABLE:
 *   int32: number of type names below
 *   string... : type names. Each of the type names is assigned id according to
 *               their position in this table starting from 0.
 *
 * TASK_RECORD:
 *   int32 size: size of the encoded task record
 *   byte[size] encoded_task_record:
 *     varint64: thread id - as was returned by Thread.getId()
 *     varint32: task id - starting from 1.
 *     varint32: parent task id for subtasks or 0 for root tasks
 *     varint64: start time in ns, relative to the Profiler.start() invocation
 *     varint64: task duration in ns
 *     byte:     task type id (see TASK_TYPE_TABLE)
 *     varint32: description string index incremented by 1 (>0) or 0 this is
 *               a first occurrence of the description string
 *     AGGREGATED_STAT...: remainder of the field (if present) represents
 *                         aggregated stats for that task
 *   string: *optional* description string, will appear only if description
 *           string index above was 0. In that case this string will be
 *           assigned next sequential id so every unique description string
 *           will appear in the file only once - after that it will be
 *           referenced by id.
 *
 * AGGREGATE_STAT:
 *   byte:     stat type
 *   varint32: total number of subtask invocations
 *   varint64: cumulative duration of subtask invocations in ns.
 *
 * EOF_MARKER:
 *   int64: -1 - please note that this corresponds to the thread id in the
 *               TASK_RECORD which is always > 0
 * </pre>
 *
 * @see ProfilerTask enum for recognized task types.
 */
//@ThreadSafe - commented out to avoid cyclic dependency with lib.util package
public final class Profiler {
  static final int MAGIC = 0x11223344;

  // File version number. Note that merely adding new record types in
  // the ProfilerTask does not require bumping version number as long as original
  // enum values are not renamed or deleted.
  static final int VERSION = 0x03;

  // EOF marker. Must be < 0.
  static final int EOF_MARKER = -1;

  // Profiler will check for gathered data and persist all of it in the
  // separate thread every SAVE_DELAY ms.
  private static final int SAVE_DELAY = 2000; // ms

  /**
   * The profiler (a static singleton instance). Inactive by default.
   */
  private static final Profiler instance = new Profiler();

  /**
   * A task that was very slow.
   */
  public final class SlowTask implements Comparable<SlowTask> {
    final long durationNanos;
    final Object object;
    ProfilerTask type;

    private SlowTask(TaskData taskData) {
      this.durationNanos = taskData.duration;
      this.object = taskData.object;
      this.type = taskData.type;
    }

    @Override
    public int compareTo(SlowTask other) {
      long delta = durationNanos - other.durationNanos;
      if (delta < 0) {  // Very clumsy
        return -1;
      } else if (delta > 0) {
        return 1;
      } else {
        return 0;
      }
    }

    public long getDurationNanos() {
      return durationNanos;
    }

    public String getDescription() {
      return toDescription(object);
    }

    public ProfilerTask getType() {
      return type;
    }
  }

  /**
   * Container for the single task record.
   * Should never be instantiated directly - use TaskStack.create() instead.
   *
   * Class itself is not thread safe, but all access to it from Profiler
   * methods is.
   */
  //@ThreadCompatible - commented out to avoid cyclic dependency with lib.util.
  private final class TaskData {
    final long threadId;
    final long startTime;
    long duration = 0L;
    final int id;
    final int parentId;
    int[] counts; // number of invocations per ProfilerTask type
    long[] durations; // time spend in the task per ProfilerTask type
    final ProfilerTask type;
    final Object object;

    TaskData(long startTime, TaskData parent,
             ProfilerTask eventType, Object object) {
      threadId = Thread.currentThread().getId();
      counts = null;
      durations = null;
      id = taskId.incrementAndGet();
      parentId = (parent == null  ? 0 : parent.id);
      this.startTime = startTime;
      this.type = eventType;
      this.object = Preconditions.checkNotNull(object);
    }

    /**
     * Aggregates information about an *immediate* subtask.
     */
    public void aggregateChild(ProfilerTask type, long duration) {
      int index = type.ordinal();
      if (counts == null) {
        // one entry for each ProfilerTask type
        counts = new int[TASK_COUNT];
        durations = new long[TASK_COUNT];
      }
      counts[index]++;
      durations[index] += duration;
    }

    @Override
    public String toString() {
      return "Thread " + threadId + ", task " + id + ", type " + type + ", " + object;
    }
  }

  /**
   * Tracks nested tasks for each thread.
   *
   * java.util.ArrayDeque is the most efficient stack implementation in the
   * Java Collections Framework (java.util.Stack class is older synchronized
   * alternative). It is, however, used here strictly for LIFO operations.
   * However, ArrayDeque is 1.6 only. For 1.5 best approach would be to utilize
   * ArrayList and emulate stack using it.
   */
  //@ThreadSafe - commented out to avoid cyclic dependency with lib.util.
  private final class TaskStack extends ThreadLocal<List<TaskData>> {

    @Override
    public List<TaskData> initialValue() {
      return new ArrayList<>();
    }

    public TaskData peek() {
      List<TaskData> list = get();
      if (list.isEmpty()) {
        return null;
      }
      return list.get(list.size() - 1);
    }

    public TaskData pop() {
      List<TaskData> list = get();
      return list.remove(list.size() - 1);
    }

    public boolean isEmpty() {
      return get().isEmpty();
    }

    public void push(ProfilerTask eventType, Object object) {
      get().add(create(clock.nanoTime(), eventType, object));
    }

    public TaskData create(long startTime, ProfilerTask eventType, Object object) {
      return new TaskData(startTime, peek(), eventType, object);
    }

    @Override
    public String toString() {
      StringBuilder builder = new StringBuilder(
          "Current task stack for thread " + Thread.currentThread().getName() + ":\n");
      List<TaskData> list = get();
      for (int i = list.size() - 1; i >= 0; i--) {
        builder.append(list.get(i));
        builder.append("\n");
      }
      return builder.toString();
    }
  }

  private static String toDescription(Object object) {
    return (object instanceof Describable)
        ? ((Describable) object).describe()
        : object.toString();
  }

  /**
   * Implements datastore for object description indices. Intended to be used
   * only by the Profiler.save() method.
   */
  //@ThreadCompatible - commented out to avoid cyclic dependency with lib.util.
  private final class ObjectDescriber {
    private Map<Object, Integer> descMap = new IdentityHashMap<>(2000);
    private int indexCounter = 0;

    ObjectDescriber() { }

    int getDescriptionIndex(Object object) {
      Integer index = descMap.get(object);
      return (index != null) ? index : -1;
    }

    String getDescription(Object object) {
      String description = toDescription(object);

      Integer oldIndex = descMap.put(object, indexCounter++);
      // Do not use Preconditions class below due to the rather expensive
      // toString() calls used in the message.
      if (oldIndex != null) {
        throw new IllegalStateException(" Object '" + description + "' @ "
            + System.identityHashCode(object) + " already had description index "
            + oldIndex + " while assigning index " + descMap.get(object));
      } else if (description.length() > 20000) {
        // Note size 64k byte limitation in DataOutputStream#writeUTF().
        description = description.substring(0, 20000);
      }
      return description;
    }

    boolean isUnassigned(int index) {
      return (index < 0);
    }
  }

  /**
   * Aggregator class that keeps track of the slowest tasks of the specified type.
   *
   * <p><code>priorityQueues</p> is sharded so that all threads need not compete for the same
   * lock if they do the same operation at the same time. Access to the individual queues is
   * synchronized on the queue objects themselves.
   */
  private final class SlowestTaskAggregator {
    private static final int SHARDS = 16;
    private final int size;

    @SuppressWarnings({"unchecked", "rawtypes"})
    private final PriorityQueue<SlowTask>[] priorityQueues = new PriorityQueue[SHARDS];

    SlowestTaskAggregator(int size) {
      this.size = size;

      for (int i = 0; i < SHARDS; i++) {
        priorityQueues[i] = new PriorityQueue<>(size + 1);
      }
    }

    // @ThreadSafe
    void add(TaskData taskData) {
      PriorityQueue<SlowTask> queue =
          priorityQueues[(int) (Thread.currentThread().getId() % SHARDS)];
      synchronized (queue) {
        if (queue.size() == size) {
          // Optimization: check if we are faster than the fastest element. If we are, we would
          // be the ones to fall off the end of the queue, therefore, we can safely return early.
          if (queue.peek().getDurationNanos() > taskData.duration) {
            return;
          }

          queue.add(new SlowTask(taskData));
          queue.remove();
        } else {
          queue.add(new SlowTask(taskData));
        }
      }
    }

    // @ThreadSafe
    void clear() {
      for (int i = 0; i < SHARDS; i++) {
        PriorityQueue<SlowTask> queue = priorityQueues[i];
        synchronized (queue) {
          queue.clear();
        }
      }
    }

    // @ThreadSafe
    Iterable<SlowTask> getSlowestTasks() {
      // This is slow, but since it only happens during the end of the invocation, it's OK
      PriorityQueue<SlowTask> merged = new PriorityQueue<>(size * SHARDS);
      for (int i = 0; i < SHARDS; i++) {
        PriorityQueue<SlowTask> queue = priorityQueues[i];
        synchronized (queue) {
          merged.addAll(queue);
        }
      }

      while (merged.size() > size) {
        merged.remove();
      }

      return merged;
    }
  }

  /**
   * Which {@link ProfilerTask}s are profiled.
   */
  public enum ProfiledTaskKinds {
    /**
     * Do not profile anything.
     *
     * <p>Performance is best with this case, but we lose critical path analysis and slowest
     * operation tracking.
     */
    NONE {
      @Override
      boolean isProfiling(ProfilerTask type) {
        return false;
      }
    },

    /**
     * Profile on a few, known-to-be-slow tasks.
     *
     * <p>Performance is somewhat decreased in comparison to {@link #NONE}, but we still track the
     * slowest operations (VFS).
     */
    SLOWEST {
      @Override
      boolean isProfiling(ProfilerTask type) {
        return type.collectsSlowestInstances();
      }
    },

    /**
     * Profile all tasks.
     *
     * <p>This is in use when {@code --profile} is specified.
     */
    ALL {
      @Override
      boolean isProfiling(ProfilerTask type) {
        return true;
      }
    };

    /** Whether the Profiler collects data for the given task type. */
    abstract boolean isProfiling(ProfilerTask type);
  }

  private Clock clock;
  private ProfiledTaskKinds profiledTaskKinds;
  private volatile long profileStartTime = 0L;
  private volatile boolean recordAllDurations = false;
  private AtomicInteger taskId = new AtomicInteger();

  private TaskStack taskStack;
  private Queue<TaskData> taskQueue;
  private DataOutputStream out;
  private Timer timer;
  private IOException saveException;
  private ObjectDescriber describer;
  @SuppressWarnings("unchecked")
  private final SlowestTaskAggregator[] slowestTasks =
  new SlowestTaskAggregator[ProfilerTask.values().length];

  private Profiler() {
    for (ProfilerTask task : ProfilerTask.values()) {
      if (task.slowestInstancesCount != 0) {
        slowestTasks[task.ordinal()] = new SlowestTaskAggregator(task.slowestInstancesCount);
      }
    }
  }

  public static Profiler instance() {
    return instance;
  }

  /**
   * Returns the nanoTime of the current profiler instance, or an arbitrary
   * constant if not active.
   */
  public static long nanoTimeMaybe() {
    if (instance.isActive()) {
      return instance.clock.nanoTime();
    }
    return -1;
  }

  /**
   * Enable profiling.
   *
   * <p>Subsequent calls to beginTask/endTask will be recorded
   * in the provided output stream. Please note that stream performance is
   * extremely important and buffered streams should be utilized.
   *
   * @param profiledTaskKinds which kinds of {@link ProfilerTask}s to track
   * @param stream output stream to store profile data. Note: passing unbuffered stream object
   *     reference may result in significant performance penalties
   * @param comment a comment to insert in the profile data
   * @param recordAllDurations iff true, record all tasks regardless of their duration; otherwise
   *     some tasks may get aggregated if they finished quick enough
   * @param clock a {@code BlazeClock.instance()}
   * @param execStartTimeNanos execution start time in nanos obtained from {@code clock.nanoTime()}
   */
  public synchronized void start(ProfiledTaskKinds profiledTaskKinds, OutputStream stream,
      String comment, boolean recordAllDurations, Clock clock, long execStartTimeNanos)
      throws IOException {
    Preconditions.checkState(!isActive(), "Profiler already active");
    taskStack = new TaskStack();
    taskQueue = new ConcurrentLinkedQueue<>();
    describer = new ObjectDescriber();

    this.profiledTaskKinds = profiledTaskKinds;
    this.clock = clock;

    // sanity check for current limitation on the number of supported types due
    // to using enum.ordinal() to store them instead of EnumSet for performance reasons.
    Preconditions.checkState(TASK_COUNT < 256,
        "The profiler implementation supports only up to 255 different ProfilerTask values.");

    // reset state for the new profiling session
    taskId.set(0);
    this.recordAllDurations = recordAllDurations;
    this.saveException = null;
    if (stream != null) {
      this.timer = new Timer("ProfilerTimer", true);
      // Wrapping deflater stream in the buffered stream proved to reduce CPU consumption caused by
      // the save() method. Values for buffer sizes were chosen by running small amount of tests
      // and identifying point of diminishing returns - but I have not really tried to optimize
      // them.
      this.out = new DataOutputStream(new BufferedOutputStream(new DeflaterOutputStream(
          stream, new Deflater(Deflater.BEST_SPEED, false), 65536), 262144));

      this.out.writeInt(MAGIC); // magic
      this.out.writeInt(VERSION); // protocol_version
      this.out.writeUTF(comment);
      // ProfileTask.values() method sorts enums using their ordinal() value, so
      // there there is no need to store ordinal() value for each entry.
      this.out.writeInt(TASK_COUNT);
      for (ProfilerTask type : ProfilerTask.values()) {
        this.out.writeUTF(type.toString());
      }

      // Start save thread
      timer.schedule(new TimerTask() {
        @Override public void run() { save(); }
      }, SAVE_DELAY, SAVE_DELAY);
    } else {
      this.out = null;
    }

    // activate profiler
    profileStartTime = execStartTimeNanos;
  }

  public synchronized Iterable<SlowTask> getSlowestTasks() {
    List<Iterable<SlowTask>> slowestTasksByType = new ArrayList<>();

    for (SlowestTaskAggregator aggregator : slowestTasks) {
      if (aggregator != null) {
        slowestTasksByType.add(aggregator.getSlowestTasks());
      }
    }

    return Iterables.concat(slowestTasksByType);
  }

  /**
   * Disable profiling and complete profile file creation.
   * Subsequent calls to beginTask/endTask will no longer
   * be recorded in the profile.
   */
  public synchronized void stop() throws IOException {
    if (saveException != null) {
      throw saveException;
    }
    if (!isActive()) {
      return;
    }
    // Log a final event to update the duration of ProfilePhase.FINISH.
    logEvent(ProfilerTask.INFO, "Finishing");
    save();
    clear();

    for (SlowestTaskAggregator aggregator : slowestTasks) {
      if (aggregator != null) {
        aggregator.clear();
      }
    }

    if (saveException != null) {
      throw saveException;
    }
    if (out != null) {
      out.writeInt(EOF_MARKER);
      out.close();
      out = null;
    }
  }

  /**
   *  Returns true iff profiling is currently enabled.
   */
  public boolean isActive() {
    return profileStartTime != 0L;
  }

  public boolean isProfiling(ProfilerTask type) {
    return profiledTaskKinds.isProfiling(type);
  }

  /**
   * Saves all gathered information from taskQueue queue to the file.
   * Method is invoked internally by the Timer-based thread and at the end of
   * profiling session.
   */
  private synchronized void save() {
    if (out == null) {
      return;
    }
    try {
      // Allocate the sink once to avoid GC
      ByteBuffer sink = ByteBuffer.allocate(1024);
      while (!taskQueue.isEmpty()) {
        sink.clear();
        TaskData data = taskQueue.poll();

        VarInt.putVarLong(data.threadId, sink);
        VarInt.putVarInt(data.id, sink);
        VarInt.putVarInt(data.parentId, sink);
        VarInt.putVarLong(data.startTime - profileStartTime, sink);
        VarInt.putVarLong(data.duration, sink);

        // To save space (and improve performance), convert all description
        // strings to the canonical object and use IdentityHashMap to assign
        // unique numbers for each string.
        int descIndex = describer.getDescriptionIndex(data.object);
        VarInt.putVarInt(descIndex + 1, sink); // Add 1 to avoid encoding negative values.

        // Save types using their ordinal() value
        sink.put((byte) data.type.ordinal());

        // Save aggregated data stats.
        if (data.counts != null) {
          for (int i = 0; i < TASK_COUNT; i++) {
            if (data.counts[i] > 0) {
              sink.put((byte) i); // aggregated type ordinal value
              VarInt.putVarInt(data.counts[i], sink);
              VarInt.putVarLong(data.durations[i], sink);
            }
          }
        }

        this.out.writeInt(sink.position());
        this.out.write(sink.array(), 0, sink.position());
        if (describer.isUnassigned(descIndex)) {
          this.out.writeUTF(describer.getDescription(data.object));
        }
      }
      this.out.flush();
    } catch (IOException e) {
      saveException = e;
      clear();
      try {
        out.close();
      } catch (IOException e2) {
        // ignore it
      }
    }
  }

  private synchronized void clear() {
    profileStartTime = 0L;
    if (timer != null) {
      timer.cancel();
      timer = null;
    }
    taskStack = null;
    taskQueue = null;
    describer = null;

    // Note that slowest task aggregator are not cleared here because clearing happens
    // periodically over the course of a command invocation.
  }

  /**
   * Unless --record_full_profiler_data is given we drop small tasks and add their time to the
   * parents duration.
   */
  private boolean wasTaskSlowEnoughToRecord(ProfilerTask type, long duration) {
    return (recordAllDurations || duration >= type.minDuration);
  }

  /**
   * Adds task directly to the main queue bypassing task stack. Used for simple
   * tasks that are known to not have any subtasks.
   *
   * @param startTime task start time (obtained through {@link Profiler#nanoTimeMaybe()})
   * @param duration task duration
   * @param type task type
   * @param object object associated with that task. Can be String object that
   *               describes it.
   */
  private void logTask(long startTime, long duration, ProfilerTask type, Object object) {
    Preconditions.checkNotNull(object);
    Preconditions.checkState(startTime > 0, "startTime was %s", startTime);
    if (duration < 0) {
      // See note in Clock#nanoTime, which is used by Profiler#nanoTimeMaybe.
      duration = 0;
    }

    TaskData parent = taskStack.peek();
    if (parent != null) {
      parent.aggregateChild(type, duration);
    }
    if (wasTaskSlowEnoughToRecord(type, duration)) {
      TaskData data = taskStack.create(startTime, type, object);
      data.duration = duration;
      if (out != null) {
        taskQueue.add(data);
      }

      SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];

      if (aggregator != null) {
        aggregator.add(data);
      }
    }
  }

  /**
   * Used externally to submit simple task (one that does not have any subtasks).
   * Depending on the minDuration attribute of the task type, task may be
   * just aggregated into the parent task and not stored directly.
   *
   * @param startTime task start time (obtained through {@link
   *        Profiler#nanoTimeMaybe()})
   * @param type task type
   * @param object object associated with that task. Can be String object that
   *               describes it.
   */
  public void logSimpleTask(long startTime, ProfilerTask type, Object object) {
    if (isActive() && isProfiling(type)) {
      logTask(startTime, clock.nanoTime() - startTime, type, object);
    }
  }

  /**
   * Used externally to submit simple task (one that does not have any
   * subtasks). Depending on the minDuration attribute of the task type, task
   * may be just aggregated into the parent task and not stored directly.
   *
   * <p>Note that start and stop time must both be acquired from the same clock
   * instance.
   *
   * @param startTime task start time
   * @param stopTime task stop time
   * @param type task type
   * @param object object associated with that task. Can be String object that
   *               describes it.
   */
  public void logSimpleTask(long startTime, long stopTime, ProfilerTask type, Object object) {
    if (isActive() && isProfiling(type)) {
      logTask(startTime, stopTime - startTime, type, object);
    }
  }

  /**
   * Used externally to submit simple task (one that does not have any
   * subtasks). Depending on the minDuration attribute of the task type, task
   * may be just aggregated into the parent task and not stored directly.
   *
   * @param startTime task start time (obtained through {@link
   *        Profiler#nanoTimeMaybe()})
   * @param duration the duration of the task
   * @param type task type
   * @param object object associated with that task. Can be String object that
   *               describes it.
   */
  public void logSimpleTaskDuration(long startTime, long duration, ProfilerTask type,
                                    Object object) {
    if (isActive() && isProfiling(type)) {
      logTask(startTime, duration, type, object);
    }
  }

  /**
   * Used to log "events" - tasks with zero duration.
   */
  public void logEvent(ProfilerTask type, Object object) {
    if (isActive() && isProfiling(type)) {
      logTask(clock.nanoTime(), 0, type, object);
    }
  }

  /**
   * Records the beginning of the task specified by the parameters. This method
   * should always be followed by completeTask() invocation to mark the end of
   * task execution (usually ensured by try {} finally {} block). Failure to do
   * so will result in task stack corruption.
   *
   * Use of this method allows to support nested task monitoring. For tasks that
   * are known to not have any subtasks, logSimpleTask() should be used instead.
   *
   * @param type predefined task type - see ProfilerTask for available types.
   * @param object object associated with that task. Can be String object that
   *               describes it.
   */
  public void startTask(ProfilerTask type, Object object) {
    // ProfilerInfo.allTasksById is supposed to be an id -> Task map, but it is in fact a List,
    // which means that we cannot drop tasks to which we had already assigned ids. Therefore,
    // non-leaf tasks must not have a minimum duration. However, we don't quite consistently
    // enforce this, and Blaze only works because we happen not to add child tasks to those parent
    // tasks that have a minimum duration.
    Preconditions.checkNotNull(object);
    if (isActive() && isProfiling(type)) {
      taskStack.push(type, object);
    }
  }

  /**
   * Records the end of the task and moves tasks from the thread-local stack to
   * the main queue. Will validate that given task type matches task at the top
   * of the stack.
   *
   * @param type task type.
   */
  public void completeTask(ProfilerTask type) {
    if (isActive() && isProfiling(type)) {
      long endTime = clock.nanoTime();
      TaskData data = taskStack.pop();
      // Do not use Preconditions class below due to the very expensive
      // toString() calls used in the message.
      if (data.type != type) {
        throw new IllegalStateException("Inconsistent Profiler.completeTask() call for the "
            + type + " task.\n " + taskStack);
      }
      data.duration = endTime - data.startTime;
      if (data.parentId > 0) {
        taskStack.peek().aggregateChild(data.type, data.duration);
      }
      boolean shouldRecordTask = wasTaskSlowEnoughToRecord(type, data.duration);
      if (out != null && (shouldRecordTask || data.counts != null)) {
        taskQueue.add(data);
      }

      if (shouldRecordTask) {
        SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];

        if (aggregator != null) {
          aggregator.add(data);
        }
      }
    }
  }

  /**
   * Convenience method to log phase marker tasks.
   */
  public void markPhase(ProfilePhase phase) {
    MemoryProfiler.instance().markPhase(phase);
    if (isActive() && isProfiling(ProfilerTask.PHASE)) {
      Preconditions.checkState(taskStack.isEmpty(), "Phase tasks must not be nested");
      logEvent(ProfilerTask.PHASE, phase.description);
    }
  }

  /**
   * Convenience method to log spawn tasks.
   *
   * TODO(bazel-team): Right now method expects single string of the spawn action
   * as task description (usually either argv[0] or a name of the main executable
   * in case of complex shell commands). Maybe it should accept Command object
   * and create more user friendly description.
   */
  public void logSpawn(long startTime, String arg0) {
    if (isActive() && isProfiling(ProfilerTask.SPAWN)) {
      logTask(startTime, clock.nanoTime() - startTime, ProfilerTask.SPAWN, arg0);
    }
  }

}