diff options
Diffstat (limited to 'src/main')
4 files changed, 206 insertions, 172 deletions
diff --git a/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java b/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java index 0bf0e4117b..652756b58c 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/Profiler.java @@ -37,11 +37,11 @@ import java.util.IdentityHashMap; import java.util.List; import java.util.Map; import java.util.Queue; -import java.util.Timer; -import java.util.TimerTask; -import java.util.concurrent.ConcurrentLinkedQueue; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.LinkedBlockingDeque; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicInteger; +import java.util.function.Consumer; import java.util.logging.Logger; import java.util.zip.Deflater; import java.util.zip.DeflaterOutputStream; @@ -130,10 +130,6 @@ public final class Profiler { // EOF marker. Must be < 0. public static final int EOF_MARKER = -1; - // Profiler will check for gathered data and persist all of it in the - // separate thread every SAVE_DELAY ms. - private static final int SAVE_DELAY = 2000; // ms - /** * The profiler (a static singleton instance). Inactive by default. */ @@ -141,6 +137,13 @@ public final class Profiler { private static final int HISTOGRAM_BUCKETS = 20; + private static final TaskData POISON_PILL = new TaskData(0, 0, null, null, "poison pill"); + + /** File format enum. */ + public static enum Format { + BINARY_BAZEL_FORMAT; + } + /** A task that was very slow. */ public static final class SlowTask implements Comparable<SlowTask> { final long durationNanos; @@ -186,7 +189,7 @@ public final class Profiler { * methods is. */ @ThreadCompatible - private final class TaskData { + private static final class TaskData { final long threadId; final long startTimeNanos; final int id; @@ -198,12 +201,11 @@ public final class Profiler { int[] counts; // number of invocations per ProfilerTask type long[] durations; // time spend in the task per ProfilerTask type - TaskData(long startTimeNanos, TaskData parent, ProfilerTask eventType, String description) { - threadId = Thread.currentThread().getId(); - counts = null; - durations = null; - id = taskId.incrementAndGet(); - parentId = (parent == null ? 0 : parent.id); + TaskData( + int id, long startTimeNanos, TaskData parent, ProfilerTask eventType, String description) { + this.id = id; + this.threadId = Thread.currentThread().getId(); + this.parentId = (parent == null ? 0 : parent.id); this.startTimeNanos = startTimeNanos; this.type = eventType; this.description = Preconditions.checkNotNull(description); @@ -238,7 +240,6 @@ public final class Profiler { */ @ThreadSafe private final class TaskStack extends ThreadLocal<List<TaskData>> { - @Override public List<TaskData> initialValue() { return new ArrayList<>(); @@ -266,7 +267,7 @@ public final class Profiler { } public TaskData create(long startTimeNanos, ProfilerTask eventType, String description) { - return new TaskData(startTimeNanos, peek(), eventType, description); + return new TaskData(taskId.incrementAndGet(), startTimeNanos, peek(), eventType, description); } @Override @@ -432,15 +433,13 @@ public final class Profiler { private volatile boolean recordAllDurations = false; private AtomicInteger taskId = new AtomicInteger(); + private Thread writerThread; private TaskStack taskStack; - private Queue<TaskData> taskQueue; - private DataOutputStream out; - private Timer timer; + private BlockingQueue<TaskData> taskQueue; private IOException saveException; - private ObjectDescriber describer; @SuppressWarnings("unchecked") private final SlowestTaskAggregator[] slowestTasks = - new SlowestTaskAggregator[ProfilerTask.values().length]; + new SlowestTaskAggregator[ProfilerTask.values().length]; private final StatRecorder[] tasksHistograms = new StatRecorder[ProfilerTask.values().length]; @@ -508,15 +507,15 @@ public final class Profiler { public synchronized void start( ProfiledTaskKinds profiledTaskKinds, OutputStream stream, + Format format, String comment, boolean recordAllDurations, Clock clock, - long execStartTimeNanos) - throws IOException { + long execStartTimeNanos) { Preconditions.checkState(!isActive(), "Profiler already active"); taskStack = new TaskStack(); - taskQueue = new ConcurrentLinkedQueue<>(); - describer = new ObjectDescriber(); + taskQueue = new LinkedBlockingDeque<>(); + initHistograms(); this.profiledTaskKinds = profiledTaskKinds; this.clock = clock; @@ -530,37 +529,27 @@ public final class Profiler { taskId.set(0); this.recordAllDurations = recordAllDurations; this.saveException = null; + this.writerThread = null; if (stream != null) { - this.timer = new Timer("ProfilerTimer", true); - // Wrapping deflater stream in the buffered stream proved to reduce CPU consumption caused by - // the save() method. Values for buffer sizes were chosen by running small amount of tests - // and identifying point of diminishing returns - but I have not really tried to optimize - // them. - this.out = new DataOutputStream(new BufferedOutputStream(new DeflaterOutputStream( - stream, new Deflater(Deflater.BEST_SPEED, false), 65536), 262144)); - - this.out.writeInt(MAGIC); // magic - this.out.writeInt(VERSION); // protocol_version - this.out.writeUTF(comment); - // ProfileTask.values() method sorts enums using their ordinal() value, so - // there there is no need to store ordinal() value for each entry. - this.out.writeInt(TASK_COUNT); - for (ProfilerTask type : ProfilerTask.values()) { - this.out.writeUTF(type.toString()); + if (format == Format.BINARY_BAZEL_FORMAT) { + BinaryFormatWriter writer = + new BinaryFormatWriter( + stream, taskQueue, profileStartTime, comment, (IOException e) -> abortWriting(e)); + // Start save thread + writerThread = new Thread(writer); + writerThread.start(); } - - // Start save thread - timer.schedule(new TimerTask() { - @Override public void run() { save(); } - }, SAVE_DELAY, SAVE_DELAY); - } else { - this.out = null; } // activate profiler profileStartTime = execStartTimeNanos; } + private synchronized void abortWriting(IOException e) { + saveException = e; + writerThread = null; + } + public synchronized Iterable<SlowTask> getSlowestTasks() { List<Iterable<SlowTask>> slowestTasksByType = new ArrayList<>(); @@ -587,8 +576,21 @@ public final class Profiler { } // Log a final event to update the duration of ProfilePhase.FINISH. logEvent(ProfilerTask.INFO, "Finishing"); - save(); - clear(); + if (writerThread != null) { + // Add poison pill to queue and then wait for writer thread to shut down. + taskQueue.add(POISON_PILL); + try { + writerThread.join(); + } catch (InterruptedException e) { + writerThread.interrupt(); + Thread.currentThread().interrupt(); + } + writerThread = null; + } + initHistograms(); + profileStartTime = 0L; + taskStack = null; + taskQueue = null; for (SlowestTaskAggregator aggregator : slowestTasks) { if (aggregator != null) { @@ -599,11 +601,6 @@ public final class Profiler { if (saveException != null) { throw saveException; } - if (out != null) { - out.writeInt(EOF_MARKER); - out.close(); - out = null; - } } /** @@ -618,81 +615,6 @@ public final class Profiler { } /** - * Saves all gathered information from taskQueue queue to the file. - * Method is invoked internally by the Timer-based thread and at the end of - * profiling session. - */ - private synchronized void save() { - if (out == null) { - return; - } - try { - // Allocate the sink once to avoid GC - ByteBuffer sink = ByteBuffer.allocate(1024); - TaskData data; - while ((data = taskQueue.poll()) != null) { - sink.clear(); - - VarInt.putVarLong(data.threadId, sink); - VarInt.putVarInt(data.id, sink); - VarInt.putVarInt(data.parentId, sink); - VarInt.putVarLong(data.startTimeNanos - profileStartTime, sink); - VarInt.putVarLong(data.duration, sink); - - // To save space (and improve performance), convert all description - // strings to the canonical object and use IdentityHashMap to assign - // unique numbers for each string. - int descIndex = describer.getDescriptionIndex(data.description); - VarInt.putVarInt(descIndex + 1, sink); // Add 1 to avoid encoding negative values. - - // Save types using their ordinal() value - sink.put((byte) data.type.ordinal()); - - // Save aggregated data stats. - if (data.counts != null) { - for (int i = 0; i < TASK_COUNT; i++) { - if (data.counts[i] > 0) { - sink.put((byte) i); // aggregated type ordinal value - VarInt.putVarInt(data.counts[i], sink); - VarInt.putVarLong(data.durations[i], sink); - } - } - } - - this.out.writeInt(sink.position()); - this.out.write(sink.array(), 0, sink.position()); - if (describer.isUnassigned(descIndex)) { - this.out.writeUTF(describer.memoizeDescription(data.description)); - } - } - this.out.flush(); - } catch (IOException e) { - saveException = e; - clear(); - try { - out.close(); - } catch (IOException e2) { - // ignore it - } - } - } - - private synchronized void clear() { - initHistograms(); - profileStartTime = 0L; - if (timer != null) { - timer.cancel(); - timer = null; - } - taskStack = null; - taskQueue = null; - describer = null; - - // Note that slowest task aggregator are not cleared here because clearing happens - // periodically over the course of a command invocation. - } - - /** * Unless --record_full_profiler_data is given we drop small tasks and add their time to the * parents duration. */ @@ -735,9 +657,7 @@ public final class Profiler { if (wasTaskSlowEnoughToRecord(type, duration)) { TaskData data = localStack.create(startTimeNanos, type, description); data.duration = duration; - if (out != null) { - localQueue.add(data); - } + localQueue.add(data); SlowestTaskAggregator aggregator = slowestTasks[type.ordinal()]; @@ -863,7 +783,7 @@ public final class Profiler { taskStack.peek().aggregateChild(data.type, data.duration); } boolean shouldRecordTask = wasTaskSlowEnoughToRecord(type, data.duration); - if (out != null && (shouldRecordTask || data.counts != null)) { + if (shouldRecordTask || data.counts != null) { taskQueue.add(data); } @@ -885,4 +805,110 @@ public final class Profiler { logEvent(ProfilerTask.PHASE, phase.description); } } + + /** Writes the profile in the binary Bazel profile format. */ + private static class BinaryFormatWriter implements Runnable { + private final DataOutputStream out; + private final BlockingQueue<TaskData> queue; + private final long profileStartTime; + private final String comment; + private final Consumer<? super IOException> failureHandler; + + BinaryFormatWriter( + OutputStream out, + BlockingQueue<TaskData> queue, + long profileStartTime, + String comment, + Consumer<? super IOException> failureHandler) { + // Wrapping deflater stream in the buffered stream proved to reduce CPU consumption caused by + // the write() method. Values for buffer sizes were chosen by running small amount of tests + // and identifying point of diminishing returns - but I have not really tried to optimize + // them. + this.out = + new DataOutputStream( + new BufferedOutputStream( + new DeflaterOutputStream( + // the DeflaterOutputStream has its own output buffer of 65k, chosen at random + out, new Deflater(Deflater.BEST_SPEED, false), 65536), + 262144)); // buffer size, basically chosen at random + this.queue = queue; + this.profileStartTime = profileStartTime; + this.comment = comment; + this.failureHandler = failureHandler; + } + + private void writeHeader() throws IOException { + out.writeInt(MAGIC); // magic + out.writeInt(VERSION); // protocol_version + out.writeUTF(comment); + // ProfileTask.values() method sorts enums using their ordinal() value, so + // there there is no need to store ordinal() value for each entry. + out.writeInt(TASK_COUNT); + for (ProfilerTask type : ProfilerTask.values()) { + out.writeUTF(type.toString()); + } + } + + /** + * Saves all gathered information from taskQueue queue to the file. + * Method is invoked internally by the Timer-based thread and at the end of + * profiling session. + */ + @Override + public void run() { + try { + writeHeader(); + // Allocate the sink once to avoid GC + ByteBuffer sink = ByteBuffer.allocate(1024); + ObjectDescriber describer = new ObjectDescriber(); + TaskData data; + while ((data = queue.take()) != POISON_PILL) { + sink.clear(); + + VarInt.putVarLong(data.threadId, sink); + VarInt.putVarInt(data.id, sink); + VarInt.putVarInt(data.parentId, sink); + VarInt.putVarLong(data.startTimeNanos - profileStartTime, sink); + VarInt.putVarLong(data.duration, sink); + + // To save space (and improve performance), convert all description + // strings to the canonical object and use IdentityHashMap to assign + // unique numbers for each string. + int descIndex = describer.getDescriptionIndex(data.description); + VarInt.putVarInt(descIndex + 1, sink); // Add 1 to avoid encoding negative values. + + // Save types using their ordinal() value + sink.put((byte) data.type.ordinal()); + + // Save aggregated data stats. + if (data.counts != null) { + for (int i = 0; i < TASK_COUNT; i++) { + if (data.counts[i] > 0) { + sink.put((byte) i); // aggregated type ordinal value + VarInt.putVarInt(data.counts[i], sink); + VarInt.putVarLong(data.durations[i], sink); + } + } + } + + out.writeInt(sink.position()); + out.write(sink.array(), 0, sink.position()); + if (describer.isUnassigned(descIndex)) { + out.writeUTF(describer.memoizeDescription(data.description)); + } + } + out.writeInt(EOF_MARKER); + out.close(); + } catch (IOException e) { + failureHandler.accept(e); + try { + out.close(); + } catch (IOException e2) { + // ignore it + } + } catch (InterruptedException e) { + // Exit silently. + } + } + } } diff --git a/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java b/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java index 39c9c866dc..2666930f2f 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/ProfilerTask.java @@ -30,12 +30,12 @@ public enum ProfilerTask { ACTION("action processing", 0x666699), __ACTION_BUILDER("parallel builder completion queue", 0xCC3399), // unused __ACTION_SUBMIT("execution queue submission", 0xCC3399), // unused - ACTION_CHECK("action dependency checking", 10000000, 0x999933, 0), + ACTION_CHECK("action dependency checking", 10000000, 0x999933, 0, false), ACTION_EXECUTE("action execution", 0x99CCFF), - ACTION_LOCK("action resource lock", 10000000, 0xCC9933, 0), - ACTION_RELEASE("action resource release", 10000000, 0x006666, 0), + ACTION_LOCK("action resource lock", 10000000, 0xCC9933, 0, false), + ACTION_RELEASE("action resource release", 10000000, 0x006666, 0, false), __ACTION_GRAPH("action graph dependency", 0x3399FF), // unused - ACTION_UPDATE("update action information", 10000000, 0x993300, 0), + ACTION_UPDATE("update action information", 10000000, 0x993300, 0, false), ACTION_COMPLETE("complete action execution", 0xCCCC99), INFO("general information", 0x000066), __EXCEPTION("exception", 0xFFCC66), // unused @@ -47,12 +47,12 @@ public enum ProfilerTask { SCANNER("include scanner", 0x669999), // 30 is a good number because the slowest items are stored in a heap, with temporarily // one more element, and with 31 items, a heap becomes a complete binary tree - LOCAL_PARSE("Local parse to prepare for remote execution", 50000000, 0x6699CC, 30), - UPLOAD_TIME("Remote execution upload time", 50000000, 0x6699CC, 0), - PROCESS_TIME("Remote execution process wall time", 50000000, 0xF999CC, 0), - REMOTE_QUEUE("Remote execution queuing time", 50000000, 0xCC6600, 0), - REMOTE_SETUP("Remote execution setup", 50000000, 0xA999CC, 0), - FETCH("Remote execution file fetching", 50000000, 0xBB99CC, 0), + LOCAL_PARSE("Local parse to prepare for remote execution", 50000000, 0x6699CC, 30, false), + UPLOAD_TIME("Remote execution upload time", 50000000, 0x6699CC, 0, false), + PROCESS_TIME("Remote execution process wall time", 50000000, 0xF999CC, 0, false), + REMOTE_QUEUE("Remote execution queuing time", 50000000, 0xCC6600, 0, false), + REMOTE_SETUP("Remote execution setup", 50000000, 0xA999CC, 0, false), + FETCH("Remote execution file fetching", 50000000, 0xBB99CC, 0, false), VFS_STAT("VFS stat", 10000000, 0x9999FF, 30, true), VFS_DIR("VFS readdir", 10000000, 0x0066CC, 30, true), VFS_READLINK("VFS readlink", 10000000, 0x99CCCC, 30, true), @@ -67,7 +67,7 @@ public enum ProfilerTask { VFS_VMFS_STAT("VMFS stat", 10000000, 0x9999FF, 0, true), VFS_VMFS_DIR("VMFS readdir", 10000000, 0x0066CC, 0, true), VFS_VMFS_READ("VMFS read", 10000000, 0x99CC33, 0, true), - WAIT("thread wait", 5000000, 0x66CCCC, 0), + WAIT("thread wait", 5000000, 0x66CCCC, 0, false), __CONFIGURED_TARGET("configured target creation", 0x663300), // unused __TRANSITIVE_CLOSURE("transitive closure creation", 0x996600), // unused __TEST("for testing only", 0x000000), // unused @@ -108,15 +108,7 @@ public enum ProfilerTask { /** True if the metric records VFS operations */ private final boolean vfs; - ProfilerTask(String description, int color) { - this(description, /* minDuration= */ -1, color, /* slowestInstanceCount= */ 0, /*vfs=*/ false); - } - - ProfilerTask(String description, long minDuration, int color, int slowestInstanceCount) { - this(description, minDuration, color, slowestInstanceCount, /*vfs=*/ false); - } - - ProfilerTask(String description, long minDuration, int color, int slowestInstanceCount, + private ProfilerTask(String description, long minDuration, int color, int slowestInstanceCount, boolean vfs) { this.description = description; this.minDuration = minDuration; @@ -125,8 +117,17 @@ public enum ProfilerTask { this.vfs = vfs; } - ProfilerTask(String description) { - this(description, -1, 0x000000, 0); + private ProfilerTask(String description, int color) { + this( + description, + /* minDuration= */ -1, + color, + /* slowestInstanceCount= */ 0, + /* vfs= */ false); + } + + private ProfilerTask(String description) { + this(description, -1, 0x000000, 0, /* vfs= */ false); } /** Whether the Profiler collects the slowest instances of this task. */ diff --git a/src/main/java/com/google/devtools/build/lib/profiler/analysis/ProfileInfo.java b/src/main/java/com/google/devtools/build/lib/profiler/analysis/ProfileInfo.java index 6104674086..5050eb4436 100644 --- a/src/main/java/com/google/devtools/build/lib/profiler/analysis/ProfileInfo.java +++ b/src/main/java/com/google/devtools/build/lib/profiler/analysis/ProfileInfo.java @@ -35,6 +35,7 @@ import com.google.devtools.build.lib.vfs.Path; import java.io.BufferedInputStream; import java.io.DataInputStream; import java.io.IOException; +import java.io.InputStream; import java.io.PrintStream; import java.io.UnsupportedEncodingException; import java.nio.ByteBuffer; @@ -882,23 +883,22 @@ public class ProfileInfo { /** * Loads and parses Blaze profile file. * - * @param profileFile profile file path + * @param profileStream profile file path * * @return ProfileInfo object with some fields populated (call calculateStats() * and analyzeRelationships() to populate the remaining fields) * @throws UnsupportedEncodingException if the file format is invalid * @throws IOException if the file can't be read */ - public static ProfileInfo loadProfile(Path profileFile) - throws IOException { - // It is extremely important to wrap InflaterInputStream using - // BufferedInputStream because majority of reads would be done using - // readInt()/readLong() methods and InflaterInputStream is very inefficient - // in handling small read requests (performance difference with 1MB buffer - // used below is almost 10x). - DataInputStream in = new DataInputStream( - new BufferedInputStream(new InflaterInputStream( - profileFile.getInputStream(), new Inflater(false), 65536), 1024 * 1024)); + public static ProfileInfo loadProfile(InputStream profileStream) throws IOException { + // It is extremely important to wrap InflaterInputStream using BufferedInputStream because + // the majority of reads would be done using readInt()/readLong() methods and + // InflaterInputStream is very inefficient in handling small read requests (performance + // difference with 1MB buffer used below is almost 10x). + DataInputStream in = + new DataInputStream( + new BufferedInputStream( + new InflaterInputStream(profileStream, new Inflater(false), 65536), 1024 * 1024)); if (in.readInt() != Profiler.MAGIC) { in.close(); @@ -995,7 +995,10 @@ public class ProfileInfo { public static ProfileInfo loadProfileVerbosely(Path profileFile, InfoListener reporter) throws IOException { reporter.info("Loading " + profileFile.getPathString()); - ProfileInfo profileInfo = ProfileInfo.loadProfile(profileFile); + ProfileInfo profileInfo; + try (InputStream in = profileFile.getInputStream()) { + profileInfo = ProfileInfo.loadProfile(in); + } if (profileInfo.isCorruptedOrIncomplete()) { reporter.warn("Profile file is incomplete or corrupted - not all records were parsed"); } diff --git a/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java b/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java index dcdca11575..a7b1a96a0c 100644 --- a/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java +++ b/src/main/java/com/google/devtools/build/lib/runtime/BlazeRuntime.java @@ -86,7 +86,6 @@ import com.google.devtools.common.options.OptionsParser; import com.google.devtools.common.options.OptionsParsingException; import com.google.devtools.common.options.OptionsProvider; import com.google.devtools.common.options.TriState; -import java.io.BufferedOutputStream; import java.io.File; import java.io.IOException; import java.io.OutputStream; @@ -277,7 +276,7 @@ public final class BlazeRuntime { Path profilePath = env.getWorkspace().getRelative(options.profilePath); recordFullProfilerData = options.recordFullProfilerData; - out = new BufferedOutputStream(profilePath.getOutputStream(), 1024 * 1024); + out = profilePath.getOutputStream(); env.getReporter().handle(Event.info("Writing profile data to '" + profilePath + "'")); profiledTasks = ProfiledTaskKinds.ALL; } else if (options.alwaysProfileSlowOperations) { @@ -289,8 +288,13 @@ public final class BlazeRuntime { Profiler.instance().start( profiledTasks, out, - getProductName() + " profile for " + env.getOutputBase() + " at " + new Date() - + ", build ID: " + buildID, + Profiler.Format.BINARY_BAZEL_FORMAT, + String.format( + "%s profile for %s at %s, build ID: %s", + getProductName(), + env.getOutputBase(), + new Date(), + buildID), recordFullProfilerData, clock, execStartTimeNanos); |