diff options
-rw-r--r-- | tensorflow/core/BUILD | 31 | ||||
-rw-r--r-- | tensorflow/core/common_runtime/direct_session.cc | 29 | ||||
-rw-r--r-- | tensorflow/core/debug/BUILD | 2 | ||||
-rw-r--r-- | tensorflow/core/platform/default/build_config.bzl | 8 | ||||
-rw-r--r-- | tensorflow/core/platform/default/device_tracer.cc (renamed from tensorflow/core/platform/default/gpu_tracer.cc) | 64 | ||||
-rw-r--r-- | tensorflow/core/platform/device_tracer.h (renamed from tensorflow/core/platform/gpu_tracer.h) | 32 | ||||
-rw-r--r-- | tensorflow/core/platform/device_tracer_test.cc (renamed from tensorflow/core/platform/gpu_tracer_test.cc) | 44 | ||||
-rw-r--r-- | tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh | 2 |
8 files changed, 104 insertions, 108 deletions
diff --git a/tensorflow/core/BUILD b/tensorflow/core/BUILD index ee14078496..d71f314e11 100644 --- a/tensorflow/core/BUILD +++ b/tensorflow/core/BUILD @@ -127,9 +127,9 @@ load( "tf_additional_verbs_lib_defines", "tf_additional_mpi_lib_defines", "tf_additional_gdr_lib_defines", - "tf_additional_gpu_tracer_srcs", - "tf_additional_gpu_tracer_deps", - "tf_additional_gpu_tracer_cuda_deps", + "tf_additional_device_tracer_srcs", + "tf_additional_device_tracer_deps", + "tf_additional_device_tracer_cuda_deps", "tf_pyclif_proto_library", "tf_jspb_proto_library", "tf_nano_proto_library", @@ -1461,7 +1461,7 @@ cc_library( "lib/jpeg/**/*", "platform/**/env_time.cc", "platform/**/cuda_libdevice_path.cc", - "platform/**/gpu_tracer.cc", + "platform/**/device_tracer.cc", "platform/variant_coding.cc", "platform/**/variant_cord_coding.cc", ], @@ -1472,7 +1472,7 @@ cc_library( "platform/**/cuda_libdevice_path.cc", "platform/**/stream_executor.h", "platform/**/env_time.cc", - "platform/**/gpu_tracer.cc", + "platform/**/device_tracer.cc", "platform/variant_coding.cc", "platform/**/variant_cord_coding.cc", ] + @@ -2085,12 +2085,9 @@ tf_cuda_library( "util/env_var.h", ], copts = tf_copts(), - cuda_deps = [ - ":gpu_tracer", - ], - linkstatic = 1, deps = [ ":core_cpu_internal", + ":device_tracer", ":framework", ":lib", ":lib_internal", @@ -2122,18 +2119,18 @@ cc_library( ) tf_cuda_library( - name = "gpu_tracer", - srcs = tf_additional_gpu_tracer_srcs(), + name = "device_tracer", + srcs = tf_additional_device_tracer_srcs(), hdrs = [ - "platform/gpu_tracer.h", + "platform/device_tracer.h", ], copts = tf_copts(), - cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_gpu_tracer_cuda_deps(), + cuda_deps = tf_additional_cupti_wrapper_deps() + tf_additional_device_tracer_cuda_deps(), deps = [ ":core_cpu_internal", ":lib", ":protos_all_cc", - ] + tf_additional_gpu_tracer_deps(), + ] + tf_additional_device_tracer_deps(), ) GPU_RUNTIME_HEADERS = [ @@ -3401,9 +3398,9 @@ tf_cc_test( ) tf_cc_test_gpu( - name = "gpu_tracer_test", + name = "device_tracer_test", size = "small", - srcs = ["platform/gpu_tracer_test.cc"], + srcs = ["platform/device_tracer_test.cc"], args = ["--heap_check=local"], linkstatic = tf_kernel_tests_linkstatic(), tags = tf_cuda_tests_tags() + ["nomac"], @@ -3411,12 +3408,12 @@ tf_cc_test_gpu( ":all_kernels", ":core_cpu", ":core_cpu_internal", + ":device_tracer", ":direct_session", ":direct_session_internal", ":framework", ":framework_internal", ":gpu_runtime", - ":gpu_tracer", ":lib", ":lib_internal", ":protos_all_cc", diff --git a/tensorflow/core/common_runtime/direct_session.cc b/tensorflow/core/common_runtime/direct_session.cc index 2f57164dcd..6dfe17405c 100644 --- a/tensorflow/core/common_runtime/direct_session.cc +++ b/tensorflow/core/common_runtime/direct_session.cc @@ -54,15 +54,13 @@ limitations under the License. #include "tensorflow/core/lib/strings/str_util.h" #include "tensorflow/core/lib/strings/strcat.h" #include "tensorflow/core/platform/cpu_info.h" +#include "tensorflow/core/platform/device_tracer.h" #include "tensorflow/core/platform/logging.h" #include "tensorflow/core/platform/mutex.h" #include "tensorflow/core/platform/types.h" #include "tensorflow/core/util/device_name_utils.h" #include "tensorflow/core/util/env_var.h" -#if GOOGLE_CUDA -#include "tensorflow/core/platform/gpu_tracer.h" -#endif // GOOGLE_CUDA namespace tensorflow { @@ -555,15 +553,19 @@ Status DirectSession::Run(const RunOptions& run_options, args.stats_collector = run_state.collector.get(); } -#if GOOGLE_CUDA - std::unique_ptr<GPUTracer> tracer; + std::unique_ptr<DeviceTracer> tracer; if (run_options.trace_level() >= RunOptions::HARDWARE_TRACE) { - tracer = CreateGPUTracer(); - // tracer will be NULL on non-GPU platforms. - // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object! - if (tracer) tracer->Start().IgnoreError(); + tracer = CreateDeviceTracer(); + // tracer may be NULL on platforms without accelerators. + if (tracer) { + Status s = tracer->Start(); + if (!s.ok()) { + run_state.executors_done.Notify(); + delete barrier; + return s; + } + } } -#endif // GOOGLE_CUDA // Register this step with session's cancellation manager, so that // `Session::Close()` will cancel the step. @@ -598,13 +600,10 @@ Status DirectSession::Run(const RunOptions& run_options, run_state.status.Update(errors::Cancelled("Run call was cancelled")); } -#if GOOGLE_CUDA if (tracer) { - // TODO(b/32704451): Don't just ignore the ::tensorflow::Status object! - tracer->Stop().IgnoreError(); - tracer->Collect(args.stats_collector).IgnoreError(); + TF_RETURN_IF_ERROR(tracer->Stop()); + TF_RETURN_IF_ERROR(tracer->Collect(args.stats_collector)); } -#endif // GOOGLE_CUDA { mutex_lock l(run_state.mu_); diff --git a/tensorflow/core/debug/BUILD b/tensorflow/core/debug/BUILD index 525f96a3de..6d796768de 100644 --- a/tensorflow/core/debug/BUILD +++ b/tensorflow/core/debug/BUILD @@ -89,9 +89,9 @@ tf_cuda_library( deps = [ ":debug", "//tensorflow/core:core_cpu_internal", + "//tensorflow/core:device_tracer", "//tensorflow/core:direct_session_internal", "//tensorflow/core:framework", - "//tensorflow/core:gpu_tracer", "//tensorflow/core:lib", "//tensorflow/core:lib_internal", "//tensorflow/core:proto_text", diff --git a/tensorflow/core/platform/default/build_config.bzl b/tensorflow/core/platform/default/build_config.bzl index 5eeb861bdd..0f8cf8f122 100644 --- a/tensorflow/core/platform/default/build_config.bzl +++ b/tensorflow/core/platform/default/build_config.bzl @@ -399,13 +399,13 @@ def tf_env_time_srcs(): def tf_additional_cupti_wrapper_deps(): return ["//tensorflow/core/platform/default/gpu:cupti_wrapper"] -def tf_additional_gpu_tracer_srcs(): - return ["platform/default/gpu_tracer.cc"] +def tf_additional_device_tracer_srcs(): + return ["platform/default/device_tracer.cc"] -def tf_additional_gpu_tracer_cuda_deps(): +def tf_additional_device_tracer_cuda_deps(): return [] -def tf_additional_gpu_tracer_deps(): +def tf_additional_device_tracer_deps(): return [] def tf_additional_libdevice_data(): diff --git a/tensorflow/core/platform/default/gpu_tracer.cc b/tensorflow/core/platform/default/device_tracer.cc index d6489f2f00..f4b0f16393 100644 --- a/tensorflow/core/platform/default/gpu_tracer.cc +++ b/tensorflow/core/platform/default/device_tracer.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/gpu_tracer.h" +#include "tensorflow/core/platform/device_tracer.h" #if GOOGLE_CUDA @@ -101,7 +101,7 @@ const char *getActivityOverheadKindString(CUpti_ActivityOverheadKind kind) { } // namespace namespace tensorflow { -namespace gputracer { +namespace devicetracer { // Forward declaration. class CUPTIManager; @@ -286,14 +286,14 @@ CUPTIManager *GetCUPTIManager() { // for the duration of the CUPTI API callback. TF_STATIC_THREAD_LOCAL_POD(const char *, tls_current_annotation); -class GPUTracerImpl : public GPUTracer, - public CUPTIClient, - public port::Tracing::Engine { +class DeviceTracerImpl : public DeviceTracer, + public CUPTIClient, + public port::Tracing::Engine { public: - GPUTracerImpl(); - ~GPUTracerImpl() override; + DeviceTracerImpl(); + ~DeviceTracerImpl() override; - // GPUTracer interface: + // DeviceTracer interface: Status Start() override; Status Stop() override; Status Collect(StepStatsCollector *collector) override; @@ -348,7 +348,7 @@ class GPUTracerImpl : public GPUTracer, }; // This is the subscriber callback which is invoked directly by CUPTI. - // The 'userdata' argument will be a pointer to the active 'GPUTracerImpl'. + // The 'userdata' argument will be a pointer to the active 'DeviceTracerImpl'. static void CUPTIAPI ApiCallback(void *userdata, CUpti_CallbackDomain domain, CUpti_CallbackId cbid, const void *cbdata); @@ -375,28 +375,28 @@ class GPUTracerImpl : public GPUTracer, uint64_t start_timestamp_ GUARDED_BY(mu_); uint64_t end_timestamp_ GUARDED_BY(mu_); - TF_DISALLOW_COPY_AND_ASSIGN(GPUTracerImpl); + TF_DISALLOW_COPY_AND_ASSIGN(DeviceTracerImpl); }; -GPUTracerImpl::GPUTracerImpl() { - VLOG(1) << "GPUTracer created."; +DeviceTracerImpl::DeviceTracerImpl() { + VLOG(1) << "DeviceTracer created."; cupti_manager_ = GetCUPTIManager(); CHECK(cupti_manager_); cupti_wrapper_.reset(new perftools::gputools::profiler::CuptiWrapper()); enabled_ = false; } -GPUTracerImpl::~GPUTracerImpl() { +DeviceTracerImpl::~DeviceTracerImpl() { // Unregister the CUPTI callbacks if needed to prevent them from accessing // freed memory. Stop().IgnoreError(); } -Status GPUTracerImpl::Start() { - VLOG(1) << "GPUTracer::Start"; +Status DeviceTracerImpl::Start() { + VLOG(1) << "DeviceTracer::Start"; mutex_lock l(mu_); if (enabled_) { - return errors::FailedPrecondition("GPUTracer is already enabled."); + return errors::FailedPrecondition("DeviceTracer is already enabled."); } // There can only be one CUPTI subscriber. If we can't create one then // there is another trace in progress (possibly by external code). @@ -451,8 +451,8 @@ Status GPUTracerImpl::Start() { return Status::OK(); } -Status GPUTracerImpl::Stop() { - VLOG(1) << "GPUTracer::Stop"; +Status DeviceTracerImpl::Stop() { + VLOG(1) << "DeviceTracer::Stop"; mutex_lock l(mu_); if (!enabled_) { return Status::OK(); @@ -466,20 +466,20 @@ Status GPUTracerImpl::Stop() { return Status::OK(); } -void GPUTracerImpl::AddCorrelationId(uint32 correlation_id, - const string &name) { +void DeviceTracerImpl::AddCorrelationId(uint32 correlation_id, + const string &name) { VLOG(2) << correlation_id << " : " << name; mutex_lock l(trace_mu_); if (correlations_.size() >= kMaxRecords) return; correlations_.emplace(correlation_id, name); } -/*static*/ void GPUTracerImpl::ApiCallback(void *userdata, - CUpti_CallbackDomain domain, - CUpti_CallbackId cbid, - const void *cbdata) { +/*static*/ void DeviceTracerImpl::ApiCallback(void *userdata, + CUpti_CallbackDomain domain, + CUpti_CallbackId cbid, + const void *cbdata) { auto *cbInfo = reinterpret_cast<const CUpti_CallbackData *>(cbdata); - GPUTracerImpl *tracer = reinterpret_cast<GPUTracerImpl *>(userdata); + DeviceTracerImpl *tracer = reinterpret_cast<DeviceTracerImpl *>(userdata); VLOG(2) << "ApiCallback " << domain << ":" << cbid << " func: " << cbInfo->functionName; @@ -533,7 +533,7 @@ void GPUTracerImpl::AddCorrelationId(uint32 correlation_id, } } -void GPUTracerImpl::ActivityCallback(const CUpti_Activity &record) { +void DeviceTracerImpl::ActivityCallback(const CUpti_Activity &record) { VLOG(2) << "ActivityCallback " << record.kind; mutex_lock l(trace_mu_); switch (record.kind) { @@ -570,10 +570,10 @@ void GPUTracerImpl::ActivityCallback(const CUpti_Activity &record) { } } -Status GPUTracerImpl::Collect(StepStatsCollector *collector) { +Status DeviceTracerImpl::Collect(StepStatsCollector *collector) { mutex_lock l(mu_); if (enabled_) { - return errors::FailedPrecondition("GPUTracer is still enabled."); + return errors::FailedPrecondition("DeviceTracer is still enabled."); } // TODO(pbar) Handle device IDs and prefix properly. @@ -630,10 +630,10 @@ Status GPUTracerImpl::Collect(StepStatsCollector *collector) { return Status::OK(); } -} // namespace gputracer +} // namespace devicetracer -std::unique_ptr<GPUTracer> CreateGPUTracer() { - std::unique_ptr<GPUTracer> tracer(new gputracer::GPUTracerImpl()); +std::unique_ptr<DeviceTracer> CreateDeviceTracer() { + std::unique_ptr<DeviceTracer> tracer(new devicetracer::DeviceTracerImpl()); return tracer; } @@ -643,7 +643,7 @@ std::unique_ptr<GPUTracer> CreateGPUTracer() { namespace tensorflow { -std::unique_ptr<GPUTracer> CreateGPUTracer() { return nullptr; } +std::unique_ptr<DeviceTracer> CreateDeviceTracer() { return nullptr; } } // namespace tensorflow diff --git a/tensorflow/core/platform/gpu_tracer.h b/tensorflow/core/platform/device_tracer.h index 3373d974e3..d0f86a5103 100644 --- a/tensorflow/core/platform/gpu_tracer.h +++ b/tensorflow/core/platform/device_tracer.h @@ -13,8 +13,8 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#ifndef TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_ -#define TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_ +#ifndef TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_ +#define TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_ #include <memory> @@ -24,16 +24,16 @@ namespace tensorflow { class StepStatsCollector; -// 'GPUTracer' is an interface for collecting low-level execution timings -// of GPU computation and DMA transfers. +// 'DeviceTracer' is an interface for collecting low-level execution timings +// of hardware accelerator (e.g. GPU) computation and DMA transfers. // // Typical usage pattern is as follows: // -// GPUTracer* tracer = CreateGPUTracer(); +// DeviceTracer* tracer = CreateDeviceTracer(); // if (tracer) { // tracer->Start(); // -// ... perform some GPU computations. +// ... perform some computations on a hardware accelerator. // // tracer->Stop(); // @@ -44,23 +44,23 @@ class StepStatsCollector; // // Notes: // Tracing is not supported on all plaforms. On platforms -// with no GPU tracing support, 'CreateGPUTracer' will return 'nullptr'. -// On most plaforms, GPU tracing will be a system-wide activity and -// a single 'GPUTracer' will collect activity from all GPUs. +// with no tracing support, 'CreateDeviceTracer' will return 'nullptr'. +// On most plaforms, hardware tracing will be a system-wide activity and +// a single 'DeviceTracer' will collect activity from all devices. // It is also common that only a single tracer may be active at any // given time. The 'Start' method will return an error if tracing is // already in progress elsewhere. // -class GPUTracer { +class DeviceTracer { public: - virtual ~GPUTracer() {} + virtual ~DeviceTracer() {} - // Start GPU tracing. + // Start device tracing. // Note that only a single trace can be active, in which case this // methods will return an 'Unavailable' error. virtual Status Start() = 0; - // Stop GPU tracing. + // Stop device tracing. // It is safe to call 'Stop' on a tracer which is not enabled. virtual Status Stop() = 0; @@ -70,10 +70,10 @@ class GPUTracer { virtual Status Collect(StepStatsCollector* collector) = 0; }; -// Creates a platform-specific GPUTracer. +// Creates a platform-specific DeviceTracer. // Returns 'nullptr' on platforms where tracing is not supported. -std::unique_ptr<GPUTracer> CreateGPUTracer(); +std::unique_ptr<DeviceTracer> CreateDeviceTracer(); } // namespace tensorflow -#endif // TENSORFLOW_CORE_PLATFORM_GPU_TRACER_H_ +#endif // TENSORFLOW_CORE_PLATFORM_DEVICE_TRACER_H_ diff --git a/tensorflow/core/platform/gpu_tracer_test.cc b/tensorflow/core/platform/device_tracer_test.cc index ce2985fd47..c0c08dabac 100644 --- a/tensorflow/core/platform/gpu_tracer_test.cc +++ b/tensorflow/core/platform/device_tracer_test.cc @@ -13,7 +13,7 @@ See the License for the specific language governing permissions and limitations under the License. ==============================================================================*/ -#include "tensorflow/core/platform/gpu_tracer.h" +#include "tensorflow/core/platform/device_tracer.h" #include <map> #include <memory> @@ -50,7 +50,7 @@ std::unique_ptr<Session> CreateSession() { return std::unique_ptr<Session>(NewSession(options)); } -class GPUTracerTest : public ::testing::Test { +class DeviceTracerTest : public ::testing::Test { public: void Initialize(std::initializer_list<float> a_values) { Graph graph(OpRegistry::Global()); @@ -84,10 +84,10 @@ class GPUTracerTest : public ::testing::Test { protected: void ExpectFailure(const Status& status, error::Code code) { - EXPECT_FALSE(status.ok()); + EXPECT_FALSE(status.ok()) << status.ToString(); if (!status.ok()) { LOG(INFO) << "Status message: " << status.error_message(); - EXPECT_EQ(code, status.code()); + EXPECT_EQ(code, status.code()) << status.ToString(); } } @@ -97,22 +97,22 @@ class GPUTracerTest : public ::testing::Test { GraphDef def_; }; -TEST_F(GPUTracerTest, StartStop) { - std::unique_ptr<GPUTracer> tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, StartStop) { + std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); if (!tracer) return; TF_EXPECT_OK(tracer->Start()); TF_EXPECT_OK(tracer->Stop()); } -TEST_F(GPUTracerTest, StopBeforeStart) { - std::unique_ptr<GPUTracer> tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, StopBeforeStart) { + std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); if (!tracer) return; TF_EXPECT_OK(tracer->Stop()); TF_EXPECT_OK(tracer->Stop()); } -TEST_F(GPUTracerTest, CollectBeforeStart) { - std::unique_ptr<GPUTracer> tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, CollectBeforeStart) { + std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); if (!tracer) return; StepStats stats; StepStatsCollector collector(&stats); @@ -120,8 +120,8 @@ TEST_F(GPUTracerTest, CollectBeforeStart) { EXPECT_EQ(stats.dev_stats_size(), 0); } -TEST_F(GPUTracerTest, CollectBeforeStop) { - std::unique_ptr<GPUTracer> tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, CollectBeforeStop) { + std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); if (!tracer) return; TF_EXPECT_OK(tracer->Start()); StepStats stats; @@ -131,9 +131,9 @@ TEST_F(GPUTracerTest, CollectBeforeStop) { TF_EXPECT_OK(tracer->Stop()); } -TEST_F(GPUTracerTest, StartTwoTracers) { - std::unique_ptr<GPUTracer> tracer1(CreateGPUTracer()); - std::unique_ptr<GPUTracer> tracer2(CreateGPUTracer()); +TEST_F(DeviceTracerTest, StartTwoTracers) { + std::unique_ptr<DeviceTracer> tracer1(CreateDeviceTracer()); + std::unique_ptr<DeviceTracer> tracer2(CreateDeviceTracer()); if (!tracer1 || !tracer2) return; TF_EXPECT_OK(tracer1->Start()); @@ -144,9 +144,9 @@ TEST_F(GPUTracerTest, StartTwoTracers) { TF_EXPECT_OK(tracer2->Stop()); } -TEST_F(GPUTracerTest, RunWithTracer) { - // On non-GPU platforms, we may not support GPUTracer. - std::unique_ptr<GPUTracer> tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, RunWithTracer) { + // On non-GPU platforms, we may not support DeviceTracer. + std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); if (!tracer) return; Initialize({3, 2, -1, 0}); @@ -172,8 +172,8 @@ TEST_F(GPUTracerTest, RunWithTracer) { EXPECT_FLOAT_EQ(5.0, mat(0, 0)); } -TEST_F(GPUTracerTest, TraceToStepStatsCollector) { - std::unique_ptr<GPUTracer> tracer(CreateGPUTracer()); +TEST_F(DeviceTracerTest, TraceToStepStatsCollector) { + std::unique_ptr<DeviceTracer> tracer(CreateDeviceTracer()); if (!tracer) return; Initialize({3, 2, -1, 0}); @@ -198,10 +198,10 @@ TEST_F(GPUTracerTest, TraceToStepStatsCollector) { collector.Finalize(); // Depending on whether this runs on CPU or GPU, we will have a // different number of devices. - EXPECT_GE(stats.dev_stats_size(), 1); + EXPECT_GE(stats.dev_stats_size(), 1) << "Saw stats: " << stats.DebugString(); } -TEST_F(GPUTracerTest, RunWithTraceOption) { +TEST_F(DeviceTracerTest, RunWithTraceOption) { Initialize({3, 2, -1, 0}); auto session = CreateSession(); ASSERT_TRUE(session != nullptr); diff --git a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh index 6a8b6417d6..924ab1a4ae 100644 --- a/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh +++ b/tensorflow/tools/ci_build/windows/bazel/bazel_test_lib.sh @@ -88,7 +88,7 @@ extra_failing_gpu_cc_tests="\ //tensorflow/core:cuda_libdevice_path_test + \ //tensorflow/core:common_runtime_direct_session_test + \ //tensorflow/core:common_runtime_direct_session_with_tracking_alloc_test + \ - //tensorflow/core:gpu_tracer_test + \ + //tensorflow/core:device_tracer_test + \ //tensorflow/core:ops_math_grad_test \ " |