aboutsummaryrefslogtreecommitdiffhomepage
path: root/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java
diff options
context:
space:
mode:
authorGravatar ulfjack <ulfjack@google.com>2018-06-12 07:38:51 -0700
committerGravatar Copybara-Service <copybara-piper@google.com>2018-06-12 07:40:05 -0700
commit21d60de9af1770b6ef08702d5dc9e23ed928df10 (patch)
tree28b640381cdb05afbf1fce6c4d14f948e1a29837 /src/main/java/com/google/devtools/build/lib/profiler/Profiler.java
parent71404a77556d564beddc8ec53c17ddbf6c8b8ab5 (diff)
Refactor profiler
- move the save method to an inner class - don't use a timer, use a blocking queue instead - add a format enum (in anticipation of adding a json output format) - update the test to use an in memory buffer, and avoid FoundationTestCase Compared to the original https://github.com/bazelbuild/bazel/commit/15b8c259db111012b4642287172cb4d1d82151f3, it contains these changes: - Make it so we don't create a queue if we are not going to write any data! The queue is now owned by the writer, and if there is no writer, there is no queue. This was causing a memory regression because slowest task profiling is enabled by default, in which case the profiler is started with no output file. In that case, there's no thread that is emptying the queue, but the queue was still created by default. - add additional tests for slowest task and histogram handling; these also provide coverage for the case where the profiler is started without an output stream - move all the writer thread handling into the inner class - make writer access thread-safe - add a bunch of documentation PiperOrigin-RevId: 200212978
Diffstat (limited to 'src/main/java/com/google/devtools/build/lib/profiler/Profiler.java')
-rw-r--r--src/main/java/com/google/devtools/build/lib/profiler/Profiler.java372
1 files changed, 218 insertions, 154 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java b/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java
index 0bf0e4117b..80f04cedd2 100644
--- a/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java
+++ b/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java
@@ -36,12 +36,11 @@ import java.util.ArrayList;
import java.util.IdentityHashMap;
import java.util.List;
import java.util.Map;
-import java.util.Queue;
-import java.util.Timer;
-import java.util.TimerTask;
-import java.util.concurrent.ConcurrentLinkedQueue;
+import java.util.concurrent.BlockingQueue;
+import java.util.concurrent.LinkedBlockingDeque;
import java.util.concurrent.TimeUnit;
import java.util.concurrent.atomic.AtomicInteger;
+import java.util.concurrent.atomic.AtomicReference;
import java.util.logging.Logger;
import java.util.zip.Deflater;
import java.util.zip.DeflaterOutputStream;
@@ -130,22 +129,23 @@ public final class Profiler {
// EOF marker. Must be < 0.
public static final int EOF_MARKER = -1;
- // Profiler will check for gathered data and persist all of it in the
- // separate thread every SAVE_DELAY ms.
- private static final int SAVE_DELAY = 2000; // ms
-
- /**
- * The profiler (a static singleton instance). Inactive by default.
- */
+ /** The profiler (a static singleton instance). Inactive by default. */
private static final Profiler instance = new Profiler();
private static final int HISTOGRAM_BUCKETS = 20;
+ private static final TaskData POISON_PILL = new TaskData(0, 0, null, null, "poison pill");
+
+ /** File format enum. */
+ public static enum Format {
+ BINARY_BAZEL_FORMAT;
+ }
+
/** A task that was very slow. */
public static final class SlowTask implements Comparable<SlowTask> {
final long durationNanos;
final String description;
- ProfilerTask type;
+ final ProfilerTask type;
private SlowTask(TaskData taskData) {
this.durationNanos = taskData.duration;
@@ -186,7 +186,7 @@ public final class Profiler {
* methods is.
*/
@ThreadCompatible
- private final class TaskData {
+ private static final class TaskData {
final long threadId;
final long startTimeNanos;
final int id;
@@ -198,12 +198,11 @@ public final class Profiler {
int[] counts; // number of invocations per ProfilerTask type
long[] durations; // time spend in the task per ProfilerTask type
- TaskData(long startTimeNanos, TaskData parent, ProfilerTask eventType, String description) {
- threadId = Thread.currentThread().getId();
- counts = null;
- durations = null;
- id = taskId.incrementAndGet();
- parentId = (parent == null ? 0 : parent.id);
+ TaskData(
+ int id, long startTimeNanos, TaskData parent, ProfilerTask eventType, String description) {
+ this.id = id;
+ this.threadId = Thread.currentThread().getId();
+ this.parentId = (parent == null ? 0 : parent.id);
this.startTimeNanos = startTimeNanos;
this.type = eventType;
this.description = Preconditions.checkNotNull(description);
@@ -238,7 +237,6 @@ public final class Profiler {
*/
@ThreadSafe
private final class TaskStack extends ThreadLocal<List<TaskData>> {
-
@Override
public List<TaskData> initialValue() {
return new ArrayList<>();
@@ -266,7 +264,7 @@ public final class Profiler {
}
public TaskData create(long startTimeNanos, ProfilerTask eventType, String description) {
- return new TaskData(startTimeNanos, peek(), eventType, description);
+ return new TaskData(taskId.incrementAndGet(), startTimeNanos, peek(), eventType, description);
}
@Override
@@ -428,19 +426,29 @@ public final class Profiler {
private Clock clock;
private ProfiledTaskKinds profiledTaskKinds;
- private volatile long profileStartTime = 0L;
+ private volatile long profileStartTime;
private volatile boolean recordAllDurations = false;
+
+ /** This counter provides a unique id for every task, used to provide a parent/child relation. */
private AtomicInteger taskId = new AtomicInteger();
+ /**
+ * The reference to the current writer, if any. If the referenced writer is null, then disk writes
+ * are disabled. This can happen when slowest task recording is enabled.
+ */
+ private AtomicReference<FileWriter> writerRef = new AtomicReference<>();
+
+ /**
+ * This is a per-thread data structure that's used to track the current stack of open tasks, the
+ * purpose of which is to track the parent id of every task. This is also used to ensure that
+ * {@link #profile} and {@link #completeTask} calls always occur in pairs.
+ */
+ // TODO(ulfjack): We can infer the parent/child relationship after the fact instead of tracking it
+ // at runtime. That would allow us to remove this data structure entirely.
private TaskStack taskStack;
- private Queue<TaskData> taskQueue;
- private DataOutputStream out;
- private Timer timer;
- private IOException saveException;
- private ObjectDescriber describer;
- @SuppressWarnings("unchecked")
+
private final SlowestTaskAggregator[] slowestTasks =
- new SlowestTaskAggregator[ProfilerTask.values().length];
+ new SlowestTaskAggregator[ProfilerTask.values().length];
private final StatRecorder[] tasksHistograms = new StatRecorder[ProfilerTask.values().length];
@@ -470,6 +478,16 @@ public final class Profiler {
}
}
+ /**
+ * Returns task histograms. This must be called between calls to {@link #start} and {@link #stop},
+ * or the returned recorders are all empty. Note that the returned recorders may still be modified
+ * concurrently (but at least they are thread-safe, so that's good).
+ *
+ * <p>The stat recorders are indexed by {@code ProfilerTask#ordinal}.
+ */
+ // TODO(ulfjack): This returns incomplete data by design. Maybe we should return the histograms on
+ // stop instead? However, this is currently only called from one location in a module, and that
+ // can't call stop itself. What to do?
public ImmutableList<StatRecorder> getTasksHistograms() {
return ImmutableList.copyOf(tasksHistograms);
}
@@ -508,15 +526,13 @@ public final class Profiler {
public synchronized void start(
ProfiledTaskKinds profiledTaskKinds,
OutputStream stream,
+ Format format,
String comment,
boolean recordAllDurations,
Clock clock,
- long execStartTimeNanos)
- throws IOException {
+ long execStartTimeNanos) {
Preconditions.checkState(!isActive(), "Profiler already active");
- taskStack = new TaskStack();
- taskQueue = new ConcurrentLinkedQueue<>();
- describer = new ObjectDescriber();
+ initHistograms();
this.profiledTaskKinds = profiledTaskKinds;
this.clock = clock;
@@ -529,38 +545,25 @@ public final class Profiler {
// reset state for the new profiling session
taskId.set(0);
this.recordAllDurations = recordAllDurations;
- this.saveException = null;
+ this.taskStack = new TaskStack();
+ FileWriter writer = null;
if (stream != null) {
- this.timer = new Timer("ProfilerTimer", true);
- // Wrapping deflater stream in the buffered stream proved to reduce CPU consumption caused by
- // the save() method. Values for buffer sizes were chosen by running small amount of tests
- // and identifying point of diminishing returns - but I have not really tried to optimize
- // them.
- this.out = new DataOutputStream(new BufferedOutputStream(new DeflaterOutputStream(
- stream, new Deflater(Deflater.BEST_SPEED, false), 65536), 262144));
-
- this.out.writeInt(MAGIC); // magic
- this.out.writeInt(VERSION); // protocol_version
- this.out.writeUTF(comment);
- // ProfileTask.values() method sorts enums using their ordinal() value, so
- // there there is no need to store ordinal() value for each entry.
- this.out.writeInt(TASK_COUNT);
- for (ProfilerTask type : ProfilerTask.values()) {
- this.out.writeUTF(type.toString());
+ if (format == Format.BINARY_BAZEL_FORMAT) {
+ writer = new BinaryFormatWriter(stream, profileStartTime, comment);
+ writer.start();
}
-
- // Start save thread
- timer.schedule(new TimerTask() {
- @Override public void run() { save(); }
- }, SAVE_DELAY, SAVE_DELAY);
- } else {
- this.out = null;
}
+ this.writerRef.set(writer);
// activate profiler
profileStartTime = execStartTimeNanos;
}
+ /**
+ * Returns task histograms. This must be called between calls to {@link #start} and {@link #stop},
+ * or the returned list is empty.
+ */
+ // TODO(ulfjack): This returns incomplete data by design. Also see getTasksHistograms.
public synchronized Iterable<SlowTask> getSlowestTasks() {
List<Iterable<SlowTask>> slowestTasksByType = new ArrayList<>();
@@ -579,31 +582,25 @@ public final class Profiler {
* be recorded in the profile.
*/
public synchronized void stop() throws IOException {
- if (saveException != null) {
- throw saveException;
- }
if (!isActive()) {
return;
}
// Log a final event to update the duration of ProfilePhase.FINISH.
logEvent(ProfilerTask.INFO, "Finishing");
- save();
- clear();
+ FileWriter writer = writerRef.getAndSet(null);
+ if (writer != null) {
+ writer.shutdown();
+ writer = null;
+ }
+ taskStack = null;
+ initHistograms();
+ profileStartTime = 0L;
for (SlowestTaskAggregator aggregator : slowestTasks) {
if (aggregator != null) {
aggregator.clear();
}
}
-
- if (saveException != null) {
- throw saveException;
- }
- if (out != null) {
- out.writeInt(EOF_MARKER);
- out.close();
- out = null;
- }
}
/**
@@ -618,81 +615,6 @@ public final class Profiler {
}
/**
- * Saves all gathered information from taskQueue queue to the file.
- * Method is invoked internally by the Timer-based thread and at the end of
- * profiling session.
- */
- private synchronized void save() {
- if (out == null) {
- return;
- }
- try {
- // Allocate the sink once to avoid GC
- ByteBuffer sink = ByteBuffer.allocate(1024);
- TaskData data;
- while ((data = taskQueue.poll()) != null) {
- sink.clear();
-
- VarInt.putVarLong(data.threadId, sink);
- VarInt.putVarInt(data.id, sink);
- VarInt.putVarInt(data.parentId, sink);
- VarInt.putVarLong(data.startTimeNanos - profileStartTime, sink);
- VarInt.putVarLong(data.duration, sink);
-
- // To save space (and improve performance), convert all description
- // strings to the canonical object and use IdentityHashMap to assign
- // unique numbers for each string.
- int descIndex = describer.getDescriptionIndex(data.description);
- VarInt.putVarInt(descIndex + 1, sink); // Add 1 to avoid encoding negative values.
-
- // Save types using their ordinal() value
- sink.put((byte) data.type.ordinal());
-
- // Save aggregated data stats.
- if (data.counts != null) {
- for (int i = 0; i < TASK_COUNT; i++) {
- if (data.counts[i] > 0) {
- sink.put((byte) i); // aggregated type ordinal value
- VarInt.putVarInt(data.counts[i], sink);
- VarInt.putVarLong(data.durations[i], sink);
- }
- }
- }
-
- this.out.writeInt(sink.position());
- this.out.write(sink.array(), 0, sink.position());
- if (describer.isUnassigned(descIndex)) {
- this.out.writeUTF(describer.memoizeDescription(data.description));
- }
- }
- this.out.flush();
- } catch (IOException e) {
- saveException = e;
- clear();
- try {
- out.close();
- } catch (IOException e2) {
- // ignore it
- }
- }
- }
-
- private synchronized void clear() {
- initHistograms();
- profileStartTime = 0L;
- if (timer != null) {
- timer.cancel();
- timer = null;
- }
- taskStack = null;
- taskQueue = null;
- describer = null;
-
- // Note that slowest task aggregator are not cleared here because clearing happens
- // periodically over the course of a command invocation.
- }
-
- /**
* Unless --record_full_profiler_data is given we drop small tasks and add their time to the
* parents duration.
*/
@@ -721,8 +643,8 @@ public final class Profiler {
(int) TimeUnit.NANOSECONDS.toMillis(duration), description);
// Store instance fields as local variables so they are not nulled out from under us by #clear.
TaskStack localStack = taskStack;
- Queue<TaskData> localQueue = taskQueue;
- if (localStack == null || localQueue == null) {
+ FileWriter currentWriter = writerRef.get();
+ if (localStack == null) {
// Variables have been nulled out by #clear in between the check the caller made and this
// point in the code. Probably due to an asynchronous crash.
logger.severe("Variables null in profiler for " + type + ", probably due to async crash");
@@ -735,8 +657,8 @@ public final class Profiler {
if (wasTaskSlowEnoughToRecord(type, duration)) {
TaskData data = localStack.create(startTimeNanos, type, description);
data.duration = duration;
- if (out != null) {
- localQueue.add(data);
+ if (currentWriter != null) {
+ currentWriter.enqueue(data);
}
SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];
@@ -863,13 +785,13 @@ public final class Profiler {
taskStack.peek().aggregateChild(data.type, data.duration);
}
boolean shouldRecordTask = wasTaskSlowEnoughToRecord(type, data.duration);
- if (out != null && (shouldRecordTask || data.counts != null)) {
- taskQueue.add(data);
+ FileWriter writer = writerRef.get();
+ if ((shouldRecordTask || data.counts != null) && writer != null) {
+ writer.enqueue(data);
}
if (shouldRecordTask) {
SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()];
-
if (aggregator != null) {
aggregator.add(data);
}
@@ -885,4 +807,146 @@ public final class Profiler {
logEvent(ProfilerTask.PHASE, phase.description);
}
}
+
+ private abstract static class FileWriter implements Runnable {
+ protected final BlockingQueue<TaskData> queue;
+ protected final Thread thread;
+ protected IOException savedException;
+
+ FileWriter() {
+ this.queue = new LinkedBlockingDeque<>();
+ this.thread = new Thread(this);
+ }
+
+ public void shutdown() throws IOException {
+ // Add poison pill to queue and then wait for writer thread to shut down.
+ queue.add(POISON_PILL);
+ try {
+ thread.join();
+ } catch (InterruptedException e) {
+ thread.interrupt();
+ Thread.currentThread().interrupt();
+ }
+ if (savedException != null) {
+ throw savedException;
+ }
+ }
+
+ public void start() {
+ thread.start();
+ }
+
+ public void enqueue(TaskData data) {
+ queue.add(data);
+ }
+ }
+
+ /** Writes the profile in the binary Bazel profile format. */
+ private static class BinaryFormatWriter extends FileWriter {
+ private final DataOutputStream out;
+ private final long profileStartTime;
+ private final String comment;
+
+ BinaryFormatWriter(
+ OutputStream out,
+ long profileStartTime,
+ String comment) {
+ // Wrapping deflater stream in the buffered stream proved to reduce CPU consumption caused by
+ // the write() method. Values for buffer sizes were chosen by running small amount of tests
+ // and identifying point of diminishing returns - but I have not really tried to optimize
+ // them.
+ this.out =
+ new DataOutputStream(
+ new BufferedOutputStream(
+ new DeflaterOutputStream(
+ // the DeflaterOutputStream has its own output buffer of 65k, chosen at random
+ out, new Deflater(Deflater.BEST_SPEED, false), 65536),
+ 262144)); // buffer size, basically chosen at random
+ this.profileStartTime = profileStartTime;
+ this.comment = comment;
+ }
+
+ private void writeHeader() throws IOException {
+ out.writeInt(MAGIC); // magic
+ out.writeInt(VERSION); // protocol_version
+ out.writeUTF(comment);
+ // ProfileTask.values() method sorts enums using their ordinal() value, so
+ // there there is no need to store ordinal() value for each entry.
+ out.writeInt(TASK_COUNT);
+ for (ProfilerTask type : ProfilerTask.values()) {
+ out.writeUTF(type.toString());
+ }
+ }
+
+ /**
+ * Saves all gathered information from taskQueue queue to the file.
+ * Method is invoked internally by the Timer-based thread and at the end of
+ * profiling session.
+ */
+ @Override
+ public void run() {
+ try {
+ boolean receivedPoisonPill = false;
+ try {
+ writeHeader();
+ // Allocate the sink once to avoid GC
+ ByteBuffer sink = ByteBuffer.allocate(1024);
+ ObjectDescriber describer = new ObjectDescriber();
+ TaskData data;
+ while ((data = queue.take()) != POISON_PILL) {
+ sink.clear();
+
+ VarInt.putVarLong(data.threadId, sink);
+ VarInt.putVarInt(data.id, sink);
+ VarInt.putVarInt(data.parentId, sink);
+ VarInt.putVarLong(data.startTimeNanos - profileStartTime, sink);
+ VarInt.putVarLong(data.duration, sink);
+
+ // To save space (and improve performance), convert all description
+ // strings to the canonical object and use IdentityHashMap to assign
+ // unique numbers for each string.
+ int descIndex = describer.getDescriptionIndex(data.description);
+ VarInt.putVarInt(descIndex + 1, sink); // Add 1 to avoid encoding negative values.
+
+ // Save types using their ordinal() value
+ sink.put((byte) data.type.ordinal());
+
+ // Save aggregated data stats.
+ if (data.counts != null) {
+ for (int i = 0; i < TASK_COUNT; i++) {
+ if (data.counts[i] > 0) {
+ sink.put((byte) i); // aggregated type ordinal value
+ VarInt.putVarInt(data.counts[i], sink);
+ VarInt.putVarLong(data.durations[i], sink);
+ }
+ }
+ }
+
+ out.writeInt(sink.position());
+ out.write(sink.array(), 0, sink.position());
+ if (describer.isUnassigned(descIndex)) {
+ out.writeUTF(describer.memoizeDescription(data.description));
+ }
+ }
+ receivedPoisonPill = true;
+ out.writeInt(EOF_MARKER);
+ out.close();
+ } catch (IOException e) {
+ this.savedException = e;
+ try {
+ out.close();
+ } catch (IOException e2) {
+ // ignore it
+ }
+ if (!receivedPoisonPill) {
+ while (queue.take() != POISON_PILL) {
+ // We keep emptying the queue, but we can't write anything.
+ }
+ }
+ }
+ } catch (InterruptedException e) {
+ // Exit silently.
+ }
+ }
+ }
}