aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2018-04-30 04:21:09 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-04-30 04:23:46 -0700
commit914796d5e9bc7b0c619b53c7eb24cfe7d6c7fb9b (patch)
tree17a2ec7b1a0902ebaf25451a418825e834e57571 /tensorflow
parentc2186af6c28f8817122b27f0cd29e16daeae68f1 (diff)
Cleaning up tracing code.
PiperOrigin-RevId: 194768567
Diffstat (limited to 'tensorflow')
-rw-r--r--tensorflow/compiler/jit/xla_device.cc13
-rw-r--r--tensorflow/compiler/xla/service/gpu/gpu_compiler.cc10
-rw-r--r--tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc2
-rw-r--r--tensorflow/core/common_runtime/copy_tensor.cc2
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_device.cc25
-rw-r--r--tensorflow/core/common_runtime/gpu/gpu_util.cc2
-rw-r--r--tensorflow/core/common_runtime/process_util.cc25
-rw-r--r--tensorflow/core/common_runtime/sycl/sycl_device.cc11
-rw-r--r--tensorflow/core/common_runtime/threadpool_device.cc22
-rw-r--r--tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc4
-rw-r--r--tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc8
-rw-r--r--tensorflow/core/framework/dataset.h2
-rw-r--r--tensorflow/core/kernels/data/map_and_batch_dataset_op.cc4
-rw-r--r--tensorflow/core/kernels/function_ops.cc8
-rw-r--r--tensorflow/core/lib/core/threadpool.cc17
-rw-r--r--tensorflow/core/platform/default/device_tracer.cc31
-rw-r--r--tensorflow/core/platform/default/tracing.cc40
-rw-r--r--tensorflow/core/platform/default/tracing_impl.h22
-rw-r--r--tensorflow/core/platform/posix/tracing.cc40
-rw-r--r--tensorflow/core/platform/tracing.cc98
-rw-r--r--tensorflow/core/platform/tracing.h351
21 files changed, 303 insertions, 434 deletions
diff --git a/tensorflow/compiler/jit/xla_device.cc b/tensorflow/compiler/jit/xla_device.cc
index c814b7eb02..70263b1ff9 100644
--- a/tensorflow/compiler/jit/xla_device.cc
+++ b/tensorflow/compiler/jit/xla_device.cc
@@ -260,11 +260,10 @@ Status XlaDevice::FillContextMap(const Graph* graph,
void XlaDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
VLOG(1) << "XlaDevice::Compute " << op_kernel->name() << ":"
<< op_kernel->type_string();
- // When TraceMe profiling is off (which is the default), the
- // following TraceMe constructor is simply a conditional test of
- // false value. Measurements show that its overhead is negligible.
- port::Tracing::TraceMe trace_me(op_kernel->name(), op_kernel->type_string(),
- op_kernel->IsExpensive());
+ // When Xprof profiling is off (which is the default), constructing the
+ // activity is simple enough that its overhead is negligible.
+ tracing::ScopedActivity activity(op_kernel->name(), op_kernel->type_string(),
+ op_kernel->IsExpensive());
op_kernel->Compute(context);
}
@@ -272,8 +271,8 @@ void XlaDevice::ComputeAsync(AsyncOpKernel* op_kernel, OpKernelContext* context,
AsyncOpKernel::DoneCallback done) {
VLOG(1) << "XlaDevice::ComputeAsync " << op_kernel->name() << ":"
<< op_kernel->type_string();
- port::Tracing::TraceMe trace_me(op_kernel->name(), op_kernel->type_string(),
- op_kernel->IsExpensive());
+ tracing::ScopedActivity activity(op_kernel->name(), op_kernel->type_string(),
+ op_kernel->IsExpensive());
op_kernel->ComputeAsync(context, done);
}
diff --git a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
index 30bfc9351a..796c3070f2 100644
--- a/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
+++ b/tensorflow/compiler/xla/service/gpu/gpu_compiler.cc
@@ -100,7 +100,7 @@ namespace gpu {
namespace {
-using tensorflow::port::Tracing;
+namespace tracing = tensorflow::tracing;
// Returns the directory containing nvvm libdevice files. config_cuda_data_dir
// should be equal to config().debug_options().xla_gpu_cuda_data_dir() of the
@@ -410,7 +410,7 @@ void WarnIfBadDriverJITVersion() {
// code (i.e. a cubin) as a byte array.
StatusOr<std::vector<uint8>> CompilePtx(const string& ptx, int cc_major,
int cc_minor) {
- Tracing::TraceMe annotation("Compile PTX", /*is_expensive=*/true);
+ tracing::ScopedActivity activity("Compile PTX", /*is_expensive=*/true);
const string ptxas_path =
tensorflow::io::JoinPath(tensorflow::CudaRoot(), "bin", "ptxas");
VLOG(2) << "Using ptxas at " << ptxas_path;
@@ -481,8 +481,8 @@ StatusOr<std::unique_ptr<HloModule>> GpuCompiler::RunHloPasses(
std::unique_ptr<HloModule> module, se::StreamExecutor* stream_exec,
DeviceMemoryAllocator* device_allocator) {
XLA_SCOPED_LOGGING_TIMER("GpuCompiler::RunHloPasses");
- Tracing::TraceMe annotation("HLO Transforms", module->name(),
- /*is_expensive=*/true);
+ tracing::ScopedActivity activity("HLO Transforms", module->name(),
+ /*is_expensive=*/true);
TF_RETURN_IF_ERROR(
OptimizeHloModule(module.get(), stream_exec, device_allocator));
return std::move(module);
@@ -692,7 +692,7 @@ std::vector<uint8> GpuCompiler::CompilePtxOrGetCachedResult(const string& ptx,
int cc_major,
int cc_minor) {
XLA_SCOPED_LOGGING_TIMER("GpuCompiler::CompilePtxOrGetCachedResult");
- Tracing::TraceMe annotation("PTX->CUBIN", /*is_expensive=*/true);
+ tracing::ScopedActivity activity("PTX->CUBIN", /*is_expensive=*/true);
bool inserted;
decltype(compilation_cache_.begin()) iter;
// Pointers into compilation_cache_ where the ptx and (optional) cubin are
diff --git a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
index df9d9be889..d70cb07c57 100644
--- a/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
+++ b/tensorflow/compiler/xla/service/gpu/llvm_gpu_backend/gpu_backend_lib.cc
@@ -491,7 +491,7 @@ StatusOr<string> CompileToPtx(llvm::Module* module,
string ptx;
{
- tensorflow::port::Tracing::TraceMe annotation(
+ tensorflow::tracing::ScopedActivity activity(
"Compiling IR", llvm_ir::AsString(module->getName()),
/*is_expensive=*/true);
XLA_SCOPED_LOGGING_TIMER("Compile module " +
diff --git a/tensorflow/core/common_runtime/copy_tensor.cc b/tensorflow/core/common_runtime/copy_tensor.cc
index e35548729b..08d120c7a5 100644
--- a/tensorflow/core/common_runtime/copy_tensor.cc
+++ b/tensorflow/core/common_runtime/copy_tensor.cc
@@ -237,7 +237,7 @@ void CopyTensor::ViaDMA(StringPiece edge_name, DeviceContext* send_dev_context,
const AllocatorAttributes dst_alloc_attr,
const Tensor* input, Tensor* output,
StatusCallback done) {
- port::Tracing::ScopedAnnotation annotation(edge_name);
+ tracing::ScopedAnnotation annotation(edge_name);
VLOG(1) << "Copy " << edge_name;
const DeviceType src_device_type(
diff --git a/tensorflow/core/common_runtime/gpu/gpu_device.cc b/tensorflow/core/common_runtime/gpu/gpu_device.cc
index 944f0c82e7..9b434e5e2f 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_device.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_device.cc
@@ -406,12 +406,8 @@ Status BaseGPUDevice::FillContextMap(const Graph* graph,
}
void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
- // ScopedActivity is cheap when tracing is not active, but we
- // can avoid computing the Hash64.
- // TODO(pbar) This would no longer be needed if Ops have a unique id.
- const uint64 id = port::Tracing::IsActive() ? Hash64(op_kernel->name()) : 0;
- port::Tracing::ScopedActivity region(port::Tracing::EventCategory::kCompute,
- id);
+ tracing::ScopedRegion region(tracing::EventCategory::kCompute,
+ op_kernel->name());
// NOTE(tucker): We need to discriminate between Eigen GPU
// operations and all others. If an operation is Eigen
@@ -425,11 +421,9 @@ void BaseGPUDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
if (op_kernel->is_internal() && op_kernel->type_string() == "_Recv") {
context->SetStatus(errors::Internal(
"Invalid synchronous 'Compute' on GPU for '_Recv' op"));
- } else if (port::Tracing::ScopedAnnotation::Enabled()) {
- port::Tracing::ScopedAnnotation annotation(op_kernel->name(),
- op_kernel->type_string());
- ComputeHelper(op_kernel, context);
} else {
+ tracing::ScopedAnnotation annotation(op_kernel->name(),
+ op_kernel->type_string());
ComputeHelper(op_kernel, context);
}
}
@@ -527,11 +521,10 @@ void BaseGPUDevice::ComputeAsync(AsyncOpKernel* op_kernel,
<< op_kernel->type_string() << " on GPU" << tf_gpu_id_ << " stream["
<< stream_id << "]";
- // When TraceMe profiling is off (which is the default), the
- // following TraceMe constructor is simply a conditional test of
- // false value. Measurements show that its overhead is negligible.
- port::Tracing::TraceMe activity(op_kernel->name(), op_kernel->type_string(),
- op_kernel->IsExpensive());
+ // When Xprof profiling is off (which is the default), constructing the
+ // activity is simple enough that its overhead is negligible.
+ tracing::ScopedActivity activity(op_kernel->name(), op_kernel->type_string(),
+ op_kernel->IsExpensive());
se::cuda::ScopedActivateExecutorContext scoped_activation{stream->parent()};
op_kernel->ComputeAsync(context, done);
}
@@ -573,7 +566,7 @@ Status BaseGPUDevice::MaybeCopyTensorToGPU(
},
std::move(done), std::placeholders::_1);
- port::Tracing::ScopedAnnotation annotation("MakeTensorFromProto");
+ tracing::ScopedAnnotation annotation("MakeTensorFromProto");
device_contexts_[0]->CopyCPUTensorToDevice(&from, this, copy,
std::move(wrapped_done));
return Status::OK();
diff --git a/tensorflow/core/common_runtime/gpu/gpu_util.cc b/tensorflow/core/common_runtime/gpu/gpu_util.cc
index 7ba853fa51..d38413d79c 100644
--- a/tensorflow/core/common_runtime/gpu/gpu_util.cc
+++ b/tensorflow/core/common_runtime/gpu/gpu_util.cc
@@ -149,7 +149,7 @@ void GPUUtil::SetProtoFromGPU(const Tensor& tensor, Device* dev,
char* buf = nullptr;
const int64 total_bytes = is_dead ? 0 : tensor.TotalBytes();
if (total_bytes > 0) {
- port::Tracing::ScopedAnnotation annotation("SetProtoFromGPU");
+ tracing::ScopedAnnotation annotation("SetProtoFromGPU");
alloc = ProcessState::singleton()->GetCUDAHostAllocator(0);
buf = alloc->Allocate<char>(total_bytes);
if (LogMemory::IsEnabled()) {
diff --git a/tensorflow/core/common_runtime/process_util.cc b/tensorflow/core/common_runtime/process_util.cc
index f8f3a1ecd7..21912236d0 100644
--- a/tensorflow/core/common_runtime/process_util.cc
+++ b/tensorflow/core/common_runtime/process_util.cc
@@ -79,21 +79,18 @@ thread::ThreadPool* NewThreadPoolFromSessionOptions(
}
void SchedClosure(std::function<void()> closure) {
- if (port::Tracing::IsActive()) {
- const uint64 id = port::Tracing::UniqueId();
- port::Tracing::RecordEvent(port::Tracing::EventCategory::kScheduleClosure,
- id);
- std::function<void()> wrapper = std::bind(
- [id](std::function<void()> closure) {
- port::Tracing::ScopedActivity region(
- port::Tracing::EventCategory::kRunClosure, id);
- closure();
- },
- std::move(closure));
- Env::Default()->SchedClosure(std::move(wrapper));
- } else {
- Env::Default()->SchedClosure(std::move(closure));
+ if (!tracing::EventCollector::IsEnabled()) {
+ return Env::Default()->SchedClosure(std::move(closure));
}
+ uint64 id = tracing::GetUniqueArg();
+ tracing::RecordEvent(tracing::EventCategory::kScheduleClosure, id);
+
+ Env::Default()->SchedClosure(std::bind(
+ [id](std::function<void()> closure) {
+ tracing::ScopedRegion region(tracing::EventCategory::kRunClosure, id);
+ closure();
+ },
+ std::move(closure)));
}
void SchedNonBlockingClosureAfter(int64 micros, std::function<void()> closure) {
diff --git a/tensorflow/core/common_runtime/sycl/sycl_device.cc b/tensorflow/core/common_runtime/sycl/sycl_device.cc
index 6e1a45b3ef..f3bd72f697 100644
--- a/tensorflow/core/common_runtime/sycl/sycl_device.cc
+++ b/tensorflow/core/common_runtime/sycl/sycl_device.cc
@@ -27,12 +27,11 @@ SYCLDevice::~SYCLDevice() {}
void SYCLDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
assert(context);
- if (port::Tracing::IsActive()) {
- // TODO(pbar) We really need a useful identifier of the graph node.
- const uint64 id = Hash64(op_kernel->name());
- port::Tracing::ScopedActivity region(port::Tracing::EventCategory::kCompute,
- id);
- }
+ // When ThreadScape profiling is off (which is the default), constructing the
+ // following code is simple enough that its overhead is negligible.
+ tracing::ScopedRegion region(tracing::EventCategory::kCompute,
+ op_kernel->name());
+
op_kernel->Compute(context);
}
diff --git a/tensorflow/core/common_runtime/threadpool_device.cc b/tensorflow/core/common_runtime/threadpool_device.cc
index 6d8de6a3c0..f7a07fe503 100644
--- a/tensorflow/core/common_runtime/threadpool_device.cc
+++ b/tensorflow/core/common_runtime/threadpool_device.cc
@@ -48,20 +48,14 @@ ThreadPoolDevice::ThreadPoolDevice(const SessionOptions& options,
ThreadPoolDevice::~ThreadPoolDevice() {}
void ThreadPoolDevice::Compute(OpKernel* op_kernel, OpKernelContext* context) {
- // When TraceMe profiling is off (which is the default), the
- // following TraceMe constructor is simply a conditional test of
- // false value. Measurements show that its overhead is negligible.
- port::Tracing::TraceMe trace_me(op_kernel->name(), op_kernel->type_string(),
- op_kernel->IsExpensive());
- if (port::Tracing::IsActive()) {
- // TODO(pbar) We really need a useful identifier of the graph node.
- const uint64 id = Hash64(op_kernel->name());
- port::Tracing::ScopedActivity region(port::Tracing::EventCategory::kCompute,
- id);
- op_kernel->Compute(context);
- } else {
- op_kernel->Compute(context);
- }
+ // When Xprof/ThreadScape profiling is off (which is the default), the
+ // following code is simple enough that its overhead is negligible.
+ tracing::ScopedActivity activity(op_kernel->name(), op_kernel->type_string(),
+ op_kernel->IsExpensive());
+ tracing::ScopedRegion region(tracing::EventCategory::kCompute,
+ op_kernel->name());
+
+ op_kernel->Compute(context);
}
Allocator* ThreadPoolDevice::GetAllocator(AllocatorAttributes attr) {
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
index 23968e24c8..e025e555dd 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_master_service.cc
@@ -285,7 +285,7 @@ class GrpcMasterService : public AsyncServiceInterface {
#undef ENQUEUE_REQUEST
// Start tracing, including the ID attached to the RPC.
- port::Tracing::TraceMe* TraceRpc(
+ tracing::ScopedActivity* TraceRpc(
StringPiece name,
const std::multimap<::grpc::string_ref, ::grpc::string_ref>& metadata) {
StringPiece id;
@@ -293,7 +293,7 @@ class GrpcMasterService : public AsyncServiceInterface {
if (it != metadata.end()) {
id = StringPiece(it->second.data(), it->second.size());
}
- return new port::Tracing::TraceMe(name, id);
+ return new tracing::ScopedActivity(name, id);
}
TF_DISALLOW_COPY_AND_ASSIGN(GrpcMasterService);
diff --git a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
index 1b92a79a67..b832a2115c 100644
--- a/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
+++ b/tensorflow/core/distributed_runtime/rpc/grpc_remote_master.cc
@@ -119,11 +119,11 @@ class GrpcRemoteMaster : public MasterInterface {
private:
// Start tracing, attaching a unique ID to both the trace and the RPC.
- port::Tracing::TraceMe TraceRpc(StringPiece name,
- ::grpc::ClientContext* ctx) {
- string trace_id = strings::StrCat(port::Tracing::UniqueId());
+ tracing::ScopedActivity TraceRpc(StringPiece name,
+ ::grpc::ClientContext* ctx) {
+ string trace_id = strings::StrCat(tracing::GetUniqueArg());
ctx->AddMetadata(GrpcIdKey(), trace_id);
- return port::Tracing::TraceMe(name, trace_id);
+ return tracing::ScopedActivity(name, trace_id);
}
void SetDeadline(::grpc::ClientContext* ctx, int64 time_in_ms) {
diff --git a/tensorflow/core/framework/dataset.h b/tensorflow/core/framework/dataset.h
index 8d127baac4..775d9f6eb6 100644
--- a/tensorflow/core/framework/dataset.h
+++ b/tensorflow/core/framework/dataset.h
@@ -521,7 +521,7 @@ class DatasetIterator : public IteratorBase {
Status GetNext(IteratorContext* ctx, std::vector<Tensor>* out_tensors,
bool* end_of_sequence) final {
- port::Tracing::TraceMe activity(params_.prefix);
+ tracing::ScopedActivity activity(params_.prefix);
Status s = GetNextInternal(ctx, out_tensors, end_of_sequence);
if (TF_PREDICT_FALSE(errors::IsOutOfRange(s) && !*end_of_sequence)) {
s = errors::Internal(
diff --git a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
index 605ef3c0b7..7bc43e2072 100644
--- a/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
+++ b/tensorflow/core/kernels/data/map_and_batch_dataset_op.cc
@@ -468,7 +468,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
void StartInvocationBatch(IteratorContext* ctx, int64 batch_index)
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
- port::Tracing::TraceMe activity(strings::StrCat(prefix(), "::Start"));
+ tracing::ScopedActivity activity(strings::StrCat(prefix(), "::Start"));
// Initialize batch result.
{
mutex_lock l(batch_results_[batch_index].mu);
@@ -493,7 +493,7 @@ class MapAndBatchDatasetOp : public UnaryDatasetOpKernel {
Status WaitForBatch(int64 batch_index, int64* num_elements)
EXCLUSIVE_LOCKS_REQUIRED(mu_) {
- port::Tracing::TraceMe activity(strings::StrCat(prefix(), "::Wait"));
+ tracing::ScopedActivity activity(strings::StrCat(prefix(), "::Wait"));
batch_results_[batch_index].counter->Wait();
Status status = Status::OK();
for (size_t i = 0; i < dataset()->batch_size_; ++i, ++*num_elements) {
diff --git a/tensorflow/core/kernels/function_ops.cc b/tensorflow/core/kernels/function_ops.cc
index f8e0267578..8f66f0a7b9 100644
--- a/tensorflow/core/kernels/function_ops.cc
+++ b/tensorflow/core/kernels/function_ops.cc
@@ -324,7 +324,7 @@ class RemoteCallOp : public AsyncOpKernel {
handle = cached_entry->second;
} else {
VLOG(1) << "Instantiating " << func_.name() << " on " << target_device;
- port::Tracing::TraceMe activity(strings::StrCat(
+ tracing::ScopedActivity activity(strings::StrCat(
"RemoteCall: Instantiate: ", func_.name(), " on ", target_device));
OP_REQUIRES_OK_ASYNC(
ctx,
@@ -355,12 +355,12 @@ class RemoteCallOp : public AsyncOpKernel {
args.push_back(argument);
}
auto* rets = new std::vector<Tensor>;
- auto* trace = new port::Tracing::TraceMe(strings::StrCat(
+ auto* activity = new tracing::ScopedActivity(strings::StrCat(
"RemoteCall: Run: ", func_.name(), " on ", target_device));
VLOG(1) << "Running " << func_.name() << " on " << target_device
<< " with handle: " << handle;
lib->Run(opts, handle, args, rets,
- [rets, trace, done, ctx](const Status& status) {
+ [rets, activity, done, ctx](const Status& status) {
if (!status.ok()) {
ctx->SetStatus(status);
} else {
@@ -369,7 +369,7 @@ class RemoteCallOp : public AsyncOpKernel {
}
}
delete rets;
- delete trace;
+ delete activity;
done();
});
}
diff --git a/tensorflow/core/lib/core/threadpool.cc b/tensorflow/core/lib/core/threadpool.cc
index e55ed79d36..99684ae47b 100644
--- a/tensorflow/core/lib/core/threadpool.cc
+++ b/tensorflow/core/lib/core/threadpool.cc
@@ -59,10 +59,9 @@ struct EigenEnvironment {
Task CreateTask(std::function<void()> f) {
uint64 id = 0;
- if (port::Tracing::IsActive()) {
- id = port::Tracing::UniqueId();
- port::Tracing::RecordEvent(port::Tracing::EventCategory::kScheduleClosure,
- id);
+ if (tracing::EventCollector::IsEnabled()) {
+ id = tracing::GetUniqueArg();
+ tracing::RecordEvent(tracing::EventCategory::kScheduleClosure, id);
}
return Task{
std::unique_ptr<TaskImpl>(new TaskImpl{
@@ -75,13 +74,9 @@ struct EigenEnvironment {
void ExecuteTask(const Task& t) {
WithContext wc(t.f->context);
- if (t.f->trace_id != 0) {
- port::Tracing::ScopedActivity region(
- port::Tracing::EventCategory::kRunClosure, t.f->trace_id);
- t.f->f();
- } else {
- t.f->f();
- }
+ tracing::ScopedRegion region(tracing::EventCategory::kRunClosure,
+ t.f->trace_id);
+ t.f->f();
}
};
diff --git a/tensorflow/core/platform/default/device_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc
index 8e60a7f091..ccddf1eafc 100644
--- a/tensorflow/core/platform/default/device_tracer.cc
+++ b/tensorflow/core/platform/default/device_tracer.cc
@@ -23,6 +23,7 @@ limitations under the License.
#include "tensorflow/core/common_runtime/step_stats_collector.h"
#include "tensorflow/core/framework/step_stats.pb.h"
#include "tensorflow/core/lib/core/errors.h"
+#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/lib/strings/stringprintf.h"
#include "tensorflow/core/platform/cupti_wrapper.h"
#include "tensorflow/core/platform/env.h"
@@ -288,7 +289,7 @@ TF_STATIC_THREAD_LOCAL_POD(const char *, tls_current_annotation);
class DeviceTracerImpl : public DeviceTracer,
public CUPTIClient,
- public port::Tracing::Engine {
+ public tracing::TraceCollector {
public:
DeviceTracerImpl();
~DeviceTracerImpl() override;
@@ -298,25 +299,25 @@ class DeviceTracerImpl : public DeviceTracer,
Status Stop() override;
Status Collect(StepStatsCollector *collector) override;
- // port::Tracing::Engine interface:
- bool IsEnabled() const override {
- // We only register the Engine while tracing is enabled.
- return true;
- }
- Annotation *PushAnnotation(StringPiece name) override {
- VLOG(2) << "PushAnnotation " << name;
- struct Impl : public port::Tracing::Engine::Annotation {
+ // tracing::TraceCollector interface:
+ virtual std::unique_ptr<Handle> CreateAnnotationHandle(
+ StringPiece name_part1, StringPiece name_part2) const {
+ struct Impl : public tracing::TraceCollector::Handle {
string annotation;
- explicit Impl(StringPiece n) : annotation(n.ToString()) {
+ explicit Impl(string &&name_scope) : annotation(name_scope) {
+ VLOG(2) << "CreateAnnotationHandle " << annotation;
// Remember the most recent ScopedAnnotation for each thread.
tls_current_annotation.get() = annotation.c_str();
}
~Impl() override { tls_current_annotation.get() = nullptr; }
};
- return new Impl(name);
+ return std::unique_ptr<Handle>(
+ new Impl{ConcatenateNames(name_part1, name_part2)});
}
- Tracer *StartTracing(StringPiece label, bool is_expensive) override {
- // We don't do anything with 'TraceMe' regions yet.
+
+ virtual std::unique_ptr<Handle> CreateActivityHandle(StringPiece, StringPiece,
+ bool) const {
+ // We don't do anything with 'Activities' yet.
return nullptr;
}
@@ -410,7 +411,7 @@ Status DeviceTracerImpl::Start() {
}
// Register as a TraceEngine to receive ScopedAnnotations.
- port::Tracing::RegisterEngine(this);
+ tracing::SetTraceCollector(this);
// Intercept launch and memcpy calls to capture the Op name annotation.
// TODO(pbar) Add callbacks for memcpy variants.
@@ -458,7 +459,7 @@ Status DeviceTracerImpl::Stop() {
return Status::OK();
}
CUPTI_CALL(Unsubscribe(subscriber_));
- port::Tracing::RegisterEngine(nullptr);
+ tracing::SetTraceCollector(nullptr);
TF_RETURN_IF_ERROR(cupti_manager_->DisableTrace());
end_walltime_us_ = NowInUsec();
CUPTI_CALL(GetTimestamp(&end_timestamp_));
diff --git a/tensorflow/core/platform/default/tracing.cc b/tensorflow/core/platform/default/tracing.cc
index 422564fb3e..3efcef09b8 100644
--- a/tensorflow/core/platform/default/tracing.cc
+++ b/tensorflow/core/platform/default/tracing.cc
@@ -15,21 +15,33 @@ limitations under the License.
#include "tensorflow/core/platform/tracing.h"
-namespace tensorflow {
-namespace port {
-
-void Tracing::RegisterEvent(EventCategory id, const char* name) {
- // TODO(opensource): implement
-}
+#include <cstdlib>
-void Tracing::Initialize() {}
+#ifndef PLATFORM_WINDOWS
+#include <unistd.h>
+#endif
-static bool DoInit() {
- Tracing::Initialize();
- return true;
+namespace tensorflow {
+namespace tracing {
+namespace {
+bool TryGetEnv(const char* name, const char** value) {
+ *value = getenv(name);
+ return *value != nullptr && (*value)[0] != '\0';
}
-
-static const bool dummy = DoInit();
-
-} // namespace port
+} // namespace
+
+void EventCollector::SetCurrentThreadName(const char*) {}
+
+const char* GetLogDir() {
+ const char* dir;
+ if (TryGetEnv("TEST_TMPDIR", &dir)) return dir;
+ if (TryGetEnv("TMP", &dir)) return dir;
+ if (TryGetEnv("TMPDIR", &dir)) return dir;
+#ifndef PLATFORM_WINDOWS
+ dir = "/tmp";
+ if (access(dir, R_OK | W_OK | X_OK) == 0) return dir;
+#endif
+ return "."; // Default to current directory.
+}
+} // namespace tracing
} // namespace tensorflow
diff --git a/tensorflow/core/platform/default/tracing_impl.h b/tensorflow/core/platform/default/tracing_impl.h
index 7834548896..b161378405 100644
--- a/tensorflow/core/platform/default/tracing_impl.h
+++ b/tensorflow/core/platform/default/tracing_impl.h
@@ -21,13 +21,8 @@ limitations under the License.
// IWYU pragma: private, include "third_party/tensorflow/core/platform/tracing.h"
// IWYU pragma: friend third_party/tensorflow/core/platform/tracing.h
-#include "tensorflow/core/lib/core/status.h"
-#include "tensorflow/core/lib/random/random.h"
#include "tensorflow/core/platform/tracing.h"
-namespace tensorflow {
-namespace port {
-
// Definitions that do nothing for platforms that don't have underlying thread
// tracing support.
#define TRACELITERAL(a) \
@@ -40,21 +35,12 @@ namespace port {
do { \
} while (0)
-inline uint64 Tracing::UniqueId() { return random::New64(); }
-inline bool Tracing::IsActive() { return false; }
-inline void Tracing::RegisterCurrentThread(const char* name) {}
-
-// Posts an atomic threadscape event with the supplied category and arg.
-inline void Tracing::RecordEvent(EventCategory category, uint64 arg) {
- // TODO(opensource): Implement
-}
-
-inline Tracing::ScopedActivity::ScopedActivity(EventCategory category,
- uint64 arg) {}
+namespace tensorflow {
+namespace tracing {
-inline Tracing::ScopedActivity::~ScopedActivity() {}
+inline bool EventCollector::IsEnabled() { return false; }
-} // namespace port
+} // namespace tracing
} // namespace tensorflow
#endif // TENSORFLOW_PLATFORM_DEFAULT_TRACING_IMPL_H_
diff --git a/tensorflow/core/platform/posix/tracing.cc b/tensorflow/core/platform/posix/tracing.cc
deleted file mode 100644
index 1d1aa53f2c..0000000000
--- a/tensorflow/core/platform/posix/tracing.cc
+++ /dev/null
@@ -1,40 +0,0 @@
-/* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License.
-==============================================================================*/
-
-#include "tensorflow/core/platform/tracing.h"
-
-#include <stdlib.h>
-#include <unistd.h>
-
-namespace tensorflow {
-namespace port {
-
-static bool TryGetEnv(const char* name, const char** value) {
- *value = getenv(name);
- return *value != nullptr && (*value)[0] != '\0';
-}
-
-const char* Tracing::LogDir() {
- const char* dir;
- if (TryGetEnv("TEST_TMPDIR", &dir)) return dir;
- if (TryGetEnv("TMP", &dir)) return dir;
- if (TryGetEnv("TMPDIR", &dir)) return dir;
- dir = "/tmp";
- if (access(dir, R_OK | W_OK | X_OK) == 0) return dir;
- return "."; // Default to current directory.
-}
-
-} // namespace port
-} // namespace tensorflow
diff --git a/tensorflow/core/platform/tracing.cc b/tensorflow/core/platform/tracing.cc
index f7d2a8e282..c0386c0a3f 100644
--- a/tensorflow/core/platform/tracing.cc
+++ b/tensorflow/core/platform/tracing.cc
@@ -15,24 +15,24 @@ limitations under the License.
#include "tensorflow/core/platform/tracing.h"
+#include <array>
#include <atomic>
#include <map>
#include <string>
#include <vector>
+#include "tensorflow/core/lib/hash/hash.h"
#include "tensorflow/core/lib/strings/str_util.h"
#include "tensorflow/core/lib/strings/strcat.h"
#include "tensorflow/core/platform/logging.h"
namespace tensorflow {
+namespace tracing {
+namespace {
+std::atomic<uint64> unique_arg{1};
+std::atomic<const TraceCollector*> trace_collector;
+} // namespace
-namespace port {
-
-int32 Tracing::category_id_[kEventCategoryMax];
-uint64 Tracing::event_mask_ = 0;
-std::map<string, int32>* Tracing::name_map_ = new std::map<string, int32>;
-
-// This needs to be kept in sync with the EventCategory enumeration.
-const char* Tracing::EventCategoryString(EventCategory category) {
+const char* GetEventCategoryName(EventCategory category) {
switch (category) {
case EventCategory::kScheduleClosure:
return "ScheduleClosure";
@@ -40,63 +40,45 @@ const char* Tracing::EventCategoryString(EventCategory category) {
return "RunClosure";
case EventCategory::kCompute:
return "Compute";
- case EventCategory::kEventCategoryMax:
- return "EventCategoryMax";
+ default:
+ return "Unknown";
}
- return "Unknown";
}
-// This function allows the user to specify arbitrary subsets of the
-// supported Threadscape events and activities.
-bool Tracing::ParseEventMask(const char* flagname, const string& value) {
- VLOG(1) << flagname << " set to " << value;
- int64 new_mask = 0;
- std::vector<string> events =
- str_util::Split(value, ',', str_util::SkipEmpty());
- for (string name : events) {
- bool clear = false;
- int64 mask = 0;
- if (name[0] == '!') {
- // invert the sense of the flag
- clear = true;
- name = name.substr(1);
- }
- if (name == "ALL") {
- mask = ~0;
- } else {
- auto it = name_map_->find(name);
- int32 id;
- if (it == name_map_->end()) {
- id = -1;
- } else {
- id = it->second;
- }
- if (id < 0) {
- LOG(ERROR) << "Can't parse event mask name " << name;
- return false;
- }
- mask = 1 << id;
- }
- if (clear) {
- new_mask &= ~mask;
- } else {
- new_mask |= mask;
- }
- }
- // parsing was successful; set the permanent event mask
- event_mask_ = new_mask;
- return true;
+std::array<const EventCollector*, GetNumEventCategories()>
+ EventCollector::instances_;
+
+void SetEventCollector(EventCategory category,
+ const EventCollector* collector) {
+ EventCollector::instances_[static_cast<unsigned>(category)] = collector;
+}
+
+uint64 GetUniqueArg() {
+ return unique_arg.fetch_add(1, std::memory_order_relaxed);
}
-/*static*/ std::atomic<Tracing::Engine*> Tracing::tracing_engine_;
+uint64 GetArgForName(StringPiece name) {
+ return Hash64(name.data(), name.size());
+}
-void Tracing::RegisterEngine(Engine* e) {
- tracing_engine_.store(e, std::memory_order_release);
+string TraceCollector::ConcatenateNames(StringPiece first, StringPiece second) {
+ std::string result;
+ bool has_two_parts = !first.empty() && !second.empty();
+ result.reserve(first.size() + second.size() +
+ static_cast<int>(has_two_parts));
+ result.append(first.data(), first.size());
+ if (has_two_parts) result.append({':'});
+ result.append(second.data(), second.size());
+ return result;
}
-Tracing::Engine::~Engine() {}
-Tracing::Engine::Annotation::~Annotation() {}
-Tracing::Engine::Tracer::~Tracer() {}
+void SetTraceCollector(const TraceCollector* collector) {
+ return trace_collector.store(collector, std::memory_order_release);
+}
+
+const TraceCollector* GetTraceCollector() {
+ return trace_collector.load(std::memory_order_acquire);
+}
-} // namespace port
+} // namespace tracing
} // namespace tensorflow
diff --git a/tensorflow/core/platform/tracing.h b/tensorflow/core/platform/tracing.h
index 3c6e7b0db5..c322777705 100644
--- a/tensorflow/core/platform/tracing.h
+++ b/tensorflow/core/platform/tracing.h
@@ -18,6 +18,7 @@ limitations under the License.
// Tracing interface
+#include <array>
#include <atomic>
#include <map>
#include <memory>
@@ -30,255 +31,205 @@ limitations under the License.
#include "tensorflow/core/platform/types.h"
namespace tensorflow {
+namespace tracing {
+
+// This enumeration contains the identifiers of all TensorFlow CPU profiler
+// events. It must be kept in sync with the code in GetEventCategoryName().
+enum struct EventCategory : unsigned {
+ kScheduleClosure = 0,
+ kRunClosure = 1,
+ kCompute = 2,
+ kNumCategories = 3 // sentinel - keep last
+};
+constexpr unsigned GetNumEventCategories() {
+ return static_cast<unsigned>(EventCategory::kNumCategories);
+}
+const char* GetEventCategoryName(EventCategory);
-namespace port {
-
-class Tracing {
+// Interface for CPU profiler events.
+class EventCollector {
public:
- // This enumeration contains the identifiers of all TensorFlow
- // threadscape events and code regions. Threadscape assigns its
- // own identifiers at runtime when we register our events and we
- // cannot know in advance what IDs it will choose. The "RecordEvent"
- // method and "ScopedActivity" use these event IDs for consistency
- // and remap them to threadscape IDs at runtime. This enum is limited
- // to 64 values since we use a bitmask to configure which events are
- // enabled. It must also be kept in step with the code in
- // "Tracing::EventCategoryString".
- enum EventCategory {
- kScheduleClosure = 0,
- kRunClosure = 1,
- kCompute = 2,
- kEventCategoryMax = 3 // sentinel - keep last
- };
- // Note: We currently only support up to 64 categories.
- static_assert(kEventCategoryMax <= 64, "only support up to 64 events");
+ virtual ~EventCollector() {}
+ virtual void RecordEvent(uint64 arg) const = 0;
+ virtual void StartRegion(uint64 arg) const = 0;
+ virtual void StopRegion() const = 0;
- // Called by main programs to initialize tracing facilities
- static void Initialize();
+ // Annotates the current thread with a name.
+ static void SetCurrentThreadName(const char* name);
+ // Returns whether event collection is enabled.
+ static bool IsEnabled();
- // Return the pathname of the directory where we are writing log files.
- static const char* LogDir();
+ private:
+ friend void SetEventCollector(EventCategory, const EventCollector*);
+ friend const EventCollector* GetEventCollector(EventCategory);
- // Returns a non-zero identifier which can be used to correlate
- // related events.
- static inline uint64 UniqueId();
+ static std::array<const EventCollector*, GetNumEventCategories()> instances_;
+};
+// Set the callback for RecordEvent and ScopedRegion of category.
+// Not thread safe. Only call while EventCollector::IsEnabled returns false.
+void SetEventCollector(EventCategory category, const EventCollector* collector);
+
+// Returns the callback for RecordEvent and ScopedRegion of category if
+// EventCollector::IsEnabled(), otherwise returns null.
+inline const EventCollector* GetEventCollector(EventCategory category) {
+ if (EventCollector::IsEnabled()) {
+ return EventCollector::instances_[static_cast<unsigned>(category)];
+ }
+ return nullptr;
+}
- // Returns true if a trace is in progress. Can be used to reduce tracing
- // overheads in fast-path code.
- static inline bool IsActive();
+// Returns a unique id to pass to RecordEvent/ScopedRegion. Never returns zero.
+uint64 GetUniqueArg();
- // Associate name with the current thread.
- static void RegisterCurrentThread(const char* name);
+// Returns an id for name to pass to RecordEvent/ScopedRegion.
+uint64 GetArgForName(StringPiece name);
- // Posts an event with the supplied category and arg.
- static void RecordEvent(EventCategory category, uint64 arg);
+// Records an atomic event through the currently registered EventCollector.
+inline void RecordEvent(EventCategory category, uint64 arg) {
+ if (auto collector = GetEventCollector(category)) {
+ collector->RecordEvent(arg);
+ }
+}
- // Traces a region of code. Posts a tracing "EnterCodeRegion" event
- // when created and an "ExitCodeRegion" event when destroyed.
- class ScopedActivity {
- public:
- explicit ScopedActivity(EventCategory category, uint64 arg);
- ~ScopedActivity();
+// Records an event for the duration of the instance lifetime through the
+// currently registered EventCollector.
+class ScopedRegion {
+ ScopedRegion(ScopedRegion&) = delete; // Not copy-constructible.
+ ScopedRegion& operator=(ScopedRegion&) = delete; // Not assignable.
- private:
-#if defined(PLATFORM_GOOGLE)
- const bool enabled_;
- const int32 region_id_;
-#endif
+ public:
+ ScopedRegion(ScopedRegion&& other) noexcept // Move-constructible.
+ : collector_(other.collector_) {
+ other.collector_ = nullptr;
+ }
- TF_DISALLOW_COPY_AND_ASSIGN(ScopedActivity);
- };
+ ScopedRegion(EventCategory category, uint64 arg)
+ : collector_(GetEventCollector(category)) {
+ if (collector_) {
+ collector_->StartRegion(arg);
+ }
+ }
- // Trace collection engine can be registered with this module.
- // If no engine is registered, ScopedAnnotation and TraceMe are no-ops.
- class Engine;
- static void RegisterEngine(Engine*);
+ // Same as ScopedRegion(category, GetUniqueArg()), but faster if
+ // EventCollector::IsEnaled() returns false.
+ ScopedRegion(EventCategory category)
+ : collector_(GetEventCollector(category)) {
+ if (collector_) {
+ collector_->StartRegion(GetUniqueArg());
+ }
+ }
- // Forward declaration of the GPU utility classes.
- class ScopedAnnotation;
- class TraceMe;
+ // Same as ScopedRegion(category, GetArgForName(name)), but faster if
+ // EventCollector::IsEnaled() returns false.
+ ScopedRegion(EventCategory category, StringPiece name)
+ : collector_(GetEventCollector(category)) {
+ if (collector_) {
+ collector_->StartRegion(GetArgForName(name));
+ }
+ }
- private:
- friend class TracingTest;
- friend class ScopedAnnotation;
- friend class TraceMe;
-
- // TODO: TF_EXPORT is for building //tensorflow/contrib/data:_dataset_ops.so
- // on Windows. Figure out a way to remove TF_EXPORT here.
- TF_EXPORT static std::atomic<Tracing::Engine*> tracing_engine_;
- static Tracing::Engine* engine() {
- return tracing_engine_.load(std::memory_order_acquire);
+ ~ScopedRegion() {
+ if (collector_) {
+ collector_->StopRegion();
+ }
}
- static void RegisterEvent(EventCategory id, const char* name);
- static const char* EventCategoryString(EventCategory category);
-
- //
- // Parses event mask expressions in 'value' of the form:
- // expr ::= <term> (,<term>)*
- // term ::= <event> | "!" <event>
- // event ::= "ALL" | <wait_event> | <other_event>
- // wait_event ::= "ENewSession" | "ECloseSession" | ...
- // other_event ::= "Send" | "Wait" | ...
- // ALL denotes all events, <event> turns on tracing for this event, and
- // !<event> turns off tracing for this event.
- // If the expression can be parsed correctly it returns true and sets
- // the event_mask_. Otherwise it returns false and the event_mask_ is left
- // unchanged.
- static bool ParseEventMask(const char* flagname, const string& value);
-
- // Bit mask of enabled trace categories.
- static uint64 event_mask_;
-
- // Records the mappings between Threadscape IDs and the "EventCategory" enum.
- static int32 category_id_[kEventCategoryMax];
- static std::map<string, int32>* name_map_;
+ bool IsEnabled() const { return collector_ != nullptr; }
+
+ private:
+ const EventCollector* collector_;
};
-// Trace collection engine that actually implements collection.
-class Tracing::Engine {
+// Interface for accelerator profiler annotations.
+class TraceCollector {
public:
- Engine() {}
- virtual ~Engine();
-
- // Returns true if Tracing is currently enabled.
- virtual bool IsEnabled() const = 0;
-
- // Represents an active annotation.
- class Annotation {
+ class Handle {
public:
- Annotation() {}
- virtual ~Annotation();
+ virtual ~Handle() {}
};
- // Represents an active trace.
- class Tracer {
- public:
- Tracer() {}
- virtual ~Tracer();
- };
+ virtual ~TraceCollector() {}
+ virtual std::unique_ptr<Handle> CreateAnnotationHandle(
+ StringPiece name_part1, StringPiece name_part2) const = 0;
+ virtual std::unique_ptr<Handle> CreateActivityHandle(
+ StringPiece name_part1, StringPiece name_part2,
+ bool is_expensive) const = 0;
- private:
- friend class ScopedAnnotation;
- friend class TraceMe;
-
- // Register the specified name as an annotation on the current thread.
- // Caller should delete the result to remove the annotation.
- // Annotations from the same thread are destroyed in a LIFO manner.
- // May return nullptr if annotations are not supported.
- virtual Annotation* PushAnnotation(StringPiece name) = 0;
-
- // Start tracing under the specified label. Caller should delete the result
- // to stop tracing.
- // May return nullptr if tracing is not supported.
- virtual Tracer* StartTracing(StringPiece label, bool is_expensive) = 0;
- // Same as above, but implementations can avoid copying the string.
- virtual Tracer* StartTracing(string&& label, bool is_expensive) {
- return StartTracing(StringPiece(label), is_expensive);
- }
+ protected:
+ static string ConcatenateNames(StringPiece first, StringPiece second);
- // Backwards compatibility one arg variants (assume is_expensive=true).
- Tracer* StartTracing(StringPiece label) {
- return StartTracing(label, /*is_expensive=*/true);
- }
- Tracer* StartTracing(string&& label) {
- return StartTracing(StringPiece(label), /*is_expensive=*/true);
- }
+ private:
+ friend void SetTraceCollector(const TraceCollector*);
+ friend const TraceCollector* GetTraceCollector();
};
+// Set the callback for ScopedAnnotation and ScopedActivity.
+void SetTraceCollector(const TraceCollector* collector);
+// Returns the callback for ScopedAnnotation and ScopedActivity.
+const TraceCollector* GetTraceCollector();
-// This class permits a user to apply annotation on kernels and memcpys
-// when launching them. While an annotation is in scope, all activities
-// within that scope get their names replaced by the annotation. The kernel
-// name replacement is done when constructing the protobuf for sending out to
-// a client (e.g., the stubby requestor) for both API and Activity records.
-//
-// Ownership: The creator of ScopedAnnotation assumes ownership of the object.
+// Adds an annotation to all activities for the duration of the instance
+// lifetime through the currently registered TraceCollector.
//
// Usage: {
-// ScopedAnnotation annotation("first set of kernels");
+// ScopedAnnotation annotation("my kernels");
// Kernel1<<<x,y>>>;
-// LaunchKernel2(); // Which eventually launches a cuda kernel.
+// LaunchKernel2(); // Launches a CUDA kernel.
// }
-// In the above scenario, the GPUProf UI would show 2 kernels with the name
-// "first set of kernels" executing -- they will appear as the same kernel.
-class Tracing::ScopedAnnotation {
+// This will add 'my kernels' to both kernels in the profiler UI
+class ScopedAnnotation {
public:
- explicit ScopedAnnotation(StringPiece name);
+ explicit ScopedAnnotation(StringPiece name)
+ : ScopedAnnotation(name, StringPiece()) {}
- // If tracing is enabled, set up an annotation with a label of
- // "<name_part1>:<name_part2>". Can be cheaper than the
+ // If tracing is enabled, add a name scope of
+ // "<name_part1>:<name_part2>". This can be cheaper than the
// single-argument constructor because the concatenation of the
// label string is only done if tracing is enabled.
- ScopedAnnotation(StringPiece name_part1, StringPiece name_part2);
+ ScopedAnnotation(StringPiece name_part1, StringPiece name_part2)
+ : handle_([&] {
+ auto trace_collector = GetTraceCollector();
+ return trace_collector ? trace_collector->CreateAnnotationHandle(
+ name_part1, name_part2)
+ : nullptr;
+ }()) {}
- // Returns true iff scoped annotations are active.
- static bool Enabled() {
- auto e = Tracing::engine();
- return e && e->IsEnabled();
- }
+ bool IsEnabled() const { return static_cast<bool>(handle_); }
private:
- std::unique_ptr<Engine::Annotation> annotation_;
+ std::unique_ptr<TraceCollector::Handle> handle_;
};
-// TODO(opensource): clean up the scoped classes for GPU tracing.
-// This class permits user-specified (CPU) tracing activities. A trace
-// activity is started when an object of this class is created and stopped
-// when the object is destroyed.
-class Tracing::TraceMe {
+// Adds an activity through the currently registered TraceCollector.
+// The activity starts when an object of this class is created and stops when
+// the object is destroyed.
+class ScopedActivity {
public:
- explicit TraceMe(StringPiece name);
- TraceMe(StringPiece name, bool is_expensive);
+ explicit ScopedActivity(StringPiece name, bool is_expensive = true)
+ : ScopedActivity(name, StringPiece(), is_expensive) {}
- // If tracing is enabled, set up a traceMe with a label of
+ // If tracing is enabled, set up an activity with a label of
// "<name_part1>:<name_part2>". This can be cheaper than the
// single-argument constructor because the concatenation of the
// label string is only done if tracing is enabled.
- TraceMe(StringPiece name_part1, StringPiece name_part2);
- TraceMe(StringPiece name_part1, StringPiece name_part2, bool is_expensive);
+ ScopedActivity(StringPiece name_part1, StringPiece name_part2,
+ bool is_expensive = true)
+ : handle_([&] {
+ auto trace_collector = GetTraceCollector();
+ return trace_collector ? trace_collector->CreateActivityHandle(
+ name_part1, name_part2, is_expensive)
+ : nullptr;
+ }()) {}
+
+ bool IsEnabled() const { return static_cast<bool>(handle_); }
private:
- std::unique_ptr<Engine::Tracer> tracer_;
+ std::unique_ptr<TraceCollector::Handle> handle_;
};
-inline Tracing::ScopedAnnotation::ScopedAnnotation(StringPiece name) {
- auto e = Tracing::engine();
- if (e && e->IsEnabled()) {
- annotation_.reset(e->PushAnnotation(name));
- }
-}
-
-inline Tracing::ScopedAnnotation::ScopedAnnotation(StringPiece name_part1,
- StringPiece name_part2) {
- auto e = Tracing::engine();
- if (e && e->IsEnabled()) {
- annotation_.reset(
- e->PushAnnotation(strings::StrCat(name_part1, ":", name_part2)));
- }
-}
-
-inline Tracing::TraceMe::TraceMe(StringPiece name) : TraceMe(name, true) {}
-
-inline Tracing::TraceMe::TraceMe(StringPiece name, bool is_expensive) {
- auto e = Tracing::engine();
- if (e && e->IsEnabled()) {
- tracer_.reset(e->StartTracing(name, is_expensive));
- }
-}
-
-inline Tracing::TraceMe::TraceMe(StringPiece name_part1, StringPiece name_part2)
- : TraceMe(name_part1, name_part2, true) {}
-
-inline Tracing::TraceMe::TraceMe(StringPiece name_part1, StringPiece name_part2,
- bool is_expensive) {
- auto e = Tracing::engine();
- if (e && e->IsEnabled()) {
- tracer_.reset(e->StartTracing(strings::StrCat(name_part1, ":", name_part2),
- is_expensive));
- }
-}
+// Return the pathname of the directory where we are writing log files.
+const char* GetLogDir();
-} // namespace port
+} // namespace tracing
} // namespace tensorflow
#if defined(PLATFORM_GOOGLE)