diff options
22 files changed, 105 insertions, 242 deletions
diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc index ec34b0ea77..3a9951e14d 100644 --- a/tensorflow/c/eager/runtime.cc +++ b/tensorflow/c/eager/runtime.cc @@ -316,18 +316,12 @@ Status KernelAndDevice::Run(std::vector<Tensor>* input_tensors, allocator_pair.second->GetRecordsAndUnRef(); } auto* ms = stats->mutable_memory_stats(); - ms->set_host_temp_memory_size(context.host_temp_memory_size()); - ms->set_device_temp_memory_size(context.device_temp_memory_size()); - for (const auto& alloc_id : context.host_persistent_alloc_ids()) { - ms->mutable_host_persistent_tensor_alloc_ids()->Add(alloc_id); + ms->set_temp_memory_size(context.temp_memory_size()); + for (const auto& alloc_id : context.persistent_alloc_ids()) { + ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id); } - for (const auto& alloc_id : context.device_persistent_alloc_ids()) { - ms->mutable_device_persistent_tensor_alloc_ids()->Add(alloc_id); - } - ms->set_host_persistent_memory_size( - context.host_persistent_memory_allocated()); - ms->set_device_persistent_memory_size( - context.device_persistent_memory_allocated()); + + ms->set_persistent_memory_size(context.persistent_memory_allocated()); } return Status::OK(); } diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc index fe1cf1b12e..9d03caff1e 100644 --- a/tensorflow/core/common_runtime/executor.cc +++ b/tensorflow/core/common_runtime/executor.cc @@ -172,17 +172,11 @@ void SetMemory(NodeExecStatsWrapper* stats, OpKernelContext* ctx) { stats->AddAllocation(allocator_pair.first, allocator_pair.second); } auto* ms = stats->stats()->mutable_memory_stats(); - ms->set_host_temp_memory_size(ctx->host_temp_memory_size()); - ms->set_device_temp_memory_size(ctx->device_temp_memory_size()); - for (const auto& alloc_id : ctx->host_persistent_alloc_ids()) { - ms->mutable_host_persistent_tensor_alloc_ids()->Add(alloc_id); + ms->set_temp_memory_size(ctx->temp_memory_size()); + for (const auto& alloc_id : ctx->persistent_alloc_ids()) { + ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id); } - for (const auto& alloc_id : ctx->device_persistent_alloc_ids()) { - ms->mutable_device_persistent_tensor_alloc_ids()->Add(alloc_id); - } - ms->set_host_persistent_memory_size(ctx->host_persistent_memory_allocated()); - ms->set_device_persistent_memory_size( - ctx->device_persistent_memory_allocated()); + ms->set_persistent_memory_size(ctx->persistent_memory_allocated()); } void SetReferencedTensors(NodeExecStatsWrapper* stats, diff --git a/tensorflow/core/framework/cost_graph.proto b/tensorflow/core/framework/cost_graph.proto index f4837fbfc5..7885b0171a 100644 --- a/tensorflow/core/framework/cost_graph.proto +++ b/tensorflow/core/framework/cost_graph.proto @@ -45,10 +45,12 @@ message CostGraphDef { // Temporary memory used by this node. int64 temporary_memory_size = 6; - int64 host_temp_memory_size = 10; - int64 device_temp_memory_size = 11; - int64 host_persistent_memory_size = 12; - int64 device_persistent_memory_size = 16; + // Persistent memory used by this node. + int64 persistent_memory_size = 12; + + int64 host_temp_memory_size = 10 [deprecated = true]; + int64 device_temp_memory_size = 11 [deprecated = true]; + int64 device_persistent_memory_size = 16 [deprecated = true]; // Estimate of the computational cost of this node, in microseconds. int64 compute_cost = 9; diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc index 433005c8ab..c879dc6f3f 100644 --- a/tensorflow/core/framework/op_kernel.cc +++ b/tensorflow/core/framework/op_kernel.cc @@ -252,10 +252,8 @@ OpKernelContext::OpKernelContext(Params* params) OpKernelContext::OpKernelContext(Params* params, int num_outputs) : params_(params), outputs_(num_outputs), - host_temp_memory_size_(0), - device_temp_memory_size_(0), - host_persistent_memory_allocated_(0), - device_persistent_memory_allocated_(0) { + temp_memory_size_(0), + persistent_memory_allocated_(0) { Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes()); params_->ensure_eigen_gpu_device(); params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device, @@ -668,11 +666,7 @@ Status OpKernelContext::allocate_temp( if (a->TracksAllocationSizes()) { int64 alloc_size = a->AllocatedSize(const_cast<char*>(out_temp->tensor_data().data())); - if (allocate_on_host(allocator_attr)) { - record_host_temp_memory_size(alloc_size); - } else { - record_device_temp_memory_size(alloc_size); - } + record_temp_memory_size(alloc_size); } } return s; @@ -795,26 +789,15 @@ bool OpKernelContext::allocate_on_host(AllocatorAttributes alloc_attr) const { return alloc_attr.on_host() || device()->attributes().device_type() == "CPU"; } -void OpKernelContext::record_host_persistent_memory_allocation(int64 size, - int64 alloc_id) { - host_persistent_memory_allocated_ += size; - host_persistent_alloc_ids_.push_back(alloc_id); -} - -void OpKernelContext::record_device_persistent_memory_allocation( - int64 size, int64 alloc_id) { - device_persistent_memory_allocated_ += size; - device_persistent_alloc_ids_.push_back(alloc_id); -} - -std::vector<int64> OpKernelContext::host_persistent_alloc_ids() const { - return std::vector<int64>(host_persistent_alloc_ids_.begin(), - host_persistent_alloc_ids_.end()); +void OpKernelContext::record_persistent_memory_allocation(int64 size, + int64 alloc_id) { + persistent_memory_allocated_ += size; + persistent_alloc_ids_.push_back(alloc_id); } -std::vector<int64> OpKernelContext::device_persistent_alloc_ids() const { - return std::vector<int64>(device_persistent_alloc_ids_.begin(), - device_persistent_alloc_ids_.end()); +std::vector<int64> OpKernelContext::persistent_alloc_ids() const { + return std::vector<int64>(persistent_alloc_ids_.begin(), + persistent_alloc_ids_.end()); } // OpKernel registration ------------------------------------------------------ diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h index 3a9a6121c0..25150499ad 100644 --- a/tensorflow/core/framework/op_kernel.h +++ b/tensorflow/core/framework/op_kernel.h @@ -1033,33 +1033,21 @@ class OpKernelContext { bool allocate_on_host(AllocatorAttributes alloc_attr) const; // Records temporary memory sizes. - void record_host_temp_memory_size(int64 size) { - host_temp_memory_size_ += size; - } - void record_device_temp_memory_size(int64 size) { - device_temp_memory_size_ += size; - } + void record_temp_memory_size(int64 size) { temp_memory_size_ += size; } // Returns recorded size of temporary memory; - int64 host_temp_memory_size() const { return host_temp_memory_size_; } - int64 device_temp_memory_size() const { return device_temp_memory_size_; } + int64 temp_memory_size() const { return temp_memory_size_; } // Records persistent memory allocation, size can be negative indicating // deallocation. - void record_host_persistent_memory_allocation(int64 size, - int64 alloc_id = -1); - void record_device_persistent_memory_allocation(int64 size, - int64 alloc_id = -1); + void record_persistent_memory_allocation(int64 size, int64 alloc_id = -1); // Returns recorded size and ids of persistent memory. - int64 host_persistent_memory_allocated() const { - return host_persistent_memory_allocated_; + int64 persistent_memory_allocated() const { + return persistent_memory_allocated_; } - int64 device_persistent_memory_allocated() const { - return device_persistent_memory_allocated_; - } - std::vector<int64> host_persistent_alloc_ids() const; - std::vector<int64> device_persistent_alloc_ids() const; + + std::vector<int64> persistent_alloc_ids() const; bool input_is_ref(int index) const; @@ -1104,12 +1092,9 @@ class OpKernelContext { bool is_output_dead_ = false; - int64 host_temp_memory_size_; - int64 device_temp_memory_size_; - gtl::InlinedVector<int64, 2> host_persistent_alloc_ids_; - gtl::InlinedVector<int64, 2> device_persistent_alloc_ids_; - int64 host_persistent_memory_allocated_; - int64 device_persistent_memory_allocated_; + int64 temp_memory_size_; + gtl::InlinedVector<int64, 2> persistent_alloc_ids_; + int64 persistent_memory_allocated_; TF_DISALLOW_COPY_AND_ASSIGN(OpKernelContext); }; diff --git a/tensorflow/core/framework/step_stats.proto b/tensorflow/core/framework/step_stats.proto index 99dee2257e..65c8089d51 100644 --- a/tensorflow/core/framework/step_stats.proto +++ b/tensorflow/core/framework/step_stats.proto @@ -40,12 +40,13 @@ message NodeOutput { // For memory tracking. message MemoryStats { - int64 host_temp_memory_size = 1; - int64 device_temp_memory_size = 2; - int64 host_persistent_memory_size = 3; - int64 device_persistent_memory_size = 4; - repeated int64 host_persistent_tensor_alloc_ids = 5; - repeated int64 device_persistent_tensor_alloc_ids = 6; + int64 temp_memory_size = 1; + int64 persistent_memory_size = 3; + repeated int64 persistent_tensor_alloc_ids = 5; + + int64 device_temp_memory_size = 2 [deprecated = true]; + int64 device_persistent_memory_size = 4 [deprecated = true]; + repeated int64 device_persistent_tensor_alloc_ids = 6 [deprecated = true]; } // Time/size stats recorded for a single execution of a graph node. diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc index 3ed32068ae..b1e6cf64e8 100644 --- a/tensorflow/core/graph/costmodel.cc +++ b/tensorflow/core/graph/costmodel.cc @@ -291,59 +291,24 @@ Bytes CostModel::TempMemorySize(const Node* node) const { return max_mem_usage_[id].temp_memory_size; } -Bytes CostModel::HostTempMemorySize(const Node* node) const { +Bytes CostModel::PersistentMemorySize(const Node* node) const { const int id = Id(node); if (id < 0) { return Bytes(0); } - return max_mem_usage_[id].host_temp_memory_size; -} - -Bytes CostModel::DeviceTempMemorySize(const Node* node) const { - const int id = Id(node); - if (id < 0) { - return Bytes(0); - } - return max_mem_usage_[id].device_temp_memory_size; -} - -Bytes CostModel::HostPersistentMemorySize(const Node* node) const { - const int id = Id(node); - if (id < 0) { - return Bytes(0); - } - return max_mem_usage_[id].host_persistent_memory_size; -} - -Bytes CostModel::DevicePersistentMemorySize(const Node* node) const { - const int id = Id(node); - if (id < 0) { - return Bytes(0); - } - return max_mem_usage_[id].device_persistent_memory_size; + return max_mem_usage_[id].persistent_memory_size; } void CostModel::RecordMemoryStats(const Node* node, const MemoryStats& memory_stats) { const int id = Id(node); if (id < 0) return; - max_mem_usage_[id].host_temp_memory_size = - memory_stats.host_temp_memory_size(); - max_mem_usage_[id].device_temp_memory_size = - memory_stats.device_temp_memory_size(); - max_mem_usage_[id].host_persistent_memory_size = - memory_stats.host_persistent_memory_size(); - max_mem_usage_[id].device_persistent_memory_size = - memory_stats.device_persistent_memory_size(); - for (int64 alloc_id : memory_stats.host_persistent_tensor_alloc_ids()) { - if (alloc_id > 0) { - host_persistent_alloc_ids_.insert(alloc_id); - } - } - for (int64 alloc_id : memory_stats.device_persistent_tensor_alloc_ids()) { + max_mem_usage_[id].temp_memory_size = memory_stats.temp_memory_size(); + max_mem_usage_[id].persistent_memory_size = + memory_stats.persistent_memory_size(); + for (int64 alloc_id : memory_stats.persistent_tensor_alloc_ids()) { if (alloc_id > 0) { - persistent_alloc_ids_by_devices_[node->assigned_device_name()].insert( - alloc_id); + persistent_alloc_ids_.insert(alloc_id); } } } @@ -381,7 +346,7 @@ int64 CostModel::AllocationId(const Node* node, int slot) const { } bool CostModel::IsPersistentTensor(const Node* node, int64 alloc_id) const { - if (host_persistent_alloc_ids_.count(alloc_id) > 0) { + if (persistent_alloc_ids_.count(alloc_id) > 0) { return true; } if (persistent_alloc_ids_by_devices_.find(node->assigned_device_name()) == @@ -548,11 +513,8 @@ void CostModel::AddToCostGraphDef(const Graph* graph, cnode->add_control_input(Id(e->src())); } - cnode->set_host_temp_memory_size(HostTempMemorySize(n).value()); - cnode->set_device_temp_memory_size(DeviceTempMemorySize(n).value()); - cnode->set_host_persistent_memory_size(HostPersistentMemorySize(n).value()); - cnode->set_device_persistent_memory_size( - DevicePersistentMemorySize(n).value()); + cnode->set_temporary_memory_size(TempMemorySize(n).value()); + cnode->set_persistent_memory_size(PersistentMemorySize(n).value()); cnode->set_compute_cost(MaxExecutionTime(n).value()); diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h index 8afa4971ad..081eb2ff4c 100644 --- a/tensorflow/core/graph/costmodel.h +++ b/tensorflow/core/graph/costmodel.h @@ -133,13 +133,8 @@ class CostModel { // Returns the size in bytes of temporary memory consumed by "node". Bytes TempMemorySize(const Node* node) const; - // Returns the size in bytes of temporary memory consumed by "node". - Bytes HostTempMemorySize(const Node* node) const; - Bytes DeviceTempMemorySize(const Node* node) const; - // Returns the size of persistent memory allocated by "node". - Bytes HostPersistentMemorySize(const Node* node) const; - Bytes DevicePersistentMemorySize(const Node* node) const; + Bytes PersistentMemorySize(const Node* node) const; // Records memory stats such as temp momory and persistent memory. void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats); @@ -210,21 +205,11 @@ class CostModel { // Maximum memory usage struct MemUsage { - MemUsage() - : temp_memory_size(-1), - host_temp_memory_size(0), - device_temp_memory_size(0), - host_persistent_memory_size(0), - device_persistent_memory_size(0) {} + MemUsage() : temp_memory_size(0), persistent_memory_size(0) {} // TODO(yuefengz): temp_memory_size is not being used, remove it. Bytes temp_memory_size; - - Bytes host_temp_memory_size; - Bytes device_temp_memory_size; - - Bytes host_persistent_memory_size; - Bytes device_persistent_memory_size; + Bytes persistent_memory_size; gtl::InlinedVector<Bytes, 2> output_port_mem; gtl::InlinedVector<TensorShapeProto, 2> output_port_shape; @@ -234,7 +219,7 @@ class CostModel { std::vector<gtl::InlinedVector<int64, 2> > output_port_alloc_ids_; - std::set<int64> host_persistent_alloc_ids_; + std::set<int64> persistent_alloc_ids_; std::map<string, std::set<int64>> persistent_alloc_ids_by_devices_; TensorShapeProto unknown_shape_; diff --git a/tensorflow/core/grappler/clusters/single_machine_test.cc b/tensorflow/core/grappler/clusters/single_machine_test.cc index f6b394a860..c6352c1448 100644 --- a/tensorflow/core/grappler/clusters/single_machine_test.cc +++ b/tensorflow/core/grappler/clusters/single_machine_test.cc @@ -467,13 +467,11 @@ TEST_F(SingleMachineTest, PersistentMemory) { found_hashtable = true; // Persistent memory usage should be 0 since it's recorded as part of the // initialize_table op. - EXPECT_EQ(0, node.host_persistent_memory_size()); - EXPECT_EQ(0, node.device_persistent_memory_size()); + EXPECT_EQ(0, node.persistent_memory_size()); } else if (node.name() == "initialize_table") { found_table_init = true; // Persistent memory should hold 2 keys and 2 values. - EXPECT_LE(4 * sizeof(int64), node.host_persistent_memory_size()); - EXPECT_EQ(0, node.device_persistent_memory_size()); + EXPECT_LE(4 * sizeof(int64), node.persistent_memory_size()); } } EXPECT_TRUE(found_table_init); diff --git a/tensorflow/core/grappler/costs/op_performance_data.proto b/tensorflow/core/grappler/costs/op_performance_data.proto index 1a111b71dc..1d623b8db8 100644 --- a/tensorflow/core/grappler/costs/op_performance_data.proto +++ b/tensorflow/core/grappler/costs/op_performance_data.proto @@ -96,13 +96,12 @@ message OpPerformance { // The output information may have memory usage and output shapes. repeated int64 output_memory = 1; - // Temporary memory allocated by this node. - int64 host_temp_memory = 2; - int64 device_temp_memory = 3; + // Temp and persistent memory allocated by this node. + int64 temp_memory = 2; + int64 persistent_memory = 4; - // The persisted_memory doesn't include outputs. - int64 host_persistent_memory = 4; - int64 device_persistent_memory = 5; + int64 device_temp_memory = 3 [deprecated = true]; + int64 device_persistent_memory = 5 [deprecated = true]; } OpMemory op_memory = 9; } diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc index ade0ad53fb..602f69f12e 100644 --- a/tensorflow/core/grappler/costs/utils.cc +++ b/tensorflow/core/grappler/costs/utils.cc @@ -285,14 +285,10 @@ OpPerformanceList CostGraphToOpPerformanceData(const CostGraphDef& cost_graph, perf->mutable_op_memory()->add_output_memory(output_info.size()); } - perf->mutable_op_memory()->set_host_temp_memory( - cost_node->host_temp_memory_size()); - perf->mutable_op_memory()->set_device_temp_memory( - cost_node->device_temp_memory_size()); - perf->mutable_op_memory()->set_host_persistent_memory( - cost_node->host_persistent_memory_size()); - perf->mutable_op_memory()->set_device_persistent_memory( - cost_node->device_persistent_memory_size()); + perf->mutable_op_memory()->set_temp_memory( + cost_node->temporary_memory_size()); + perf->mutable_op_memory()->set_persistent_memory( + cost_node->persistent_memory_size()); } return ret; } diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc index 0af889f886..d7d07ee7a5 100644 --- a/tensorflow/core/grappler/costs/virtual_scheduler.cc +++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc @@ -984,21 +984,12 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) { nodestate.time_scheduled.asMicroSeconds().count()); auto* mem_stats = node_stats->mutable_memory_stats(); // VirtualScheduler does not specify scratch pad memory usage. - mem_stats->set_host_temp_memory_size(0); - mem_stats->set_device_temp_memory_size(0); - int64 host_persistent_memory_size = 0; - int64 device_persistent_memory_size = 0; + mem_stats->set_temp_memory_size(0); + int64 persistent_memory_size = 0; if (IsPersistentNode(node_def)) { - if (device.first.find("cpu") != string::npos || - device.first.find("CPU") != string::npos) { - host_persistent_memory_size = total_output_size; - } else { - device_persistent_memory_size = total_output_size; - } + persistent_memory_size = total_output_size; } - mem_stats->set_host_persistent_memory_size(host_persistent_memory_size); - mem_stats->set_device_persistent_memory_size( - device_persistent_memory_size); + mem_stats->set_persistent_memory_size(persistent_memory_size); *device_partition_graph->add_node() = *node_def; } } diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc index c8bfb26859..59f9f69315 100644 --- a/tensorflow/core/kernels/constant_op.cc +++ b/tensorflow/core/kernels/constant_op.cc @@ -57,12 +57,7 @@ ConstantOp::ConstantOp(OpKernelConstruction* ctx) void ConstantOp::Compute(OpKernelContext* ctx) { ctx->set_output(0, tensor_); if (TF_PREDICT_FALSE(ctx->track_allocations())) { - AllocatorAttributes attr; - if (ctx->allocate_on_host(attr)) { - ctx->record_host_persistent_memory_allocation(tensor_.AllocatedBytes()); - } else { - ctx->record_device_persistent_memory_allocation(tensor_.AllocatedBytes()); - } + ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes()); } } diff --git a/tensorflow/core/kernels/constant_op_test.cc b/tensorflow/core/kernels/constant_op_test.cc index 62cc67c736..7a05d9371d 100644 --- a/tensorflow/core/kernels/constant_op_test.cc +++ b/tensorflow/core/kernels/constant_op_test.cc @@ -72,9 +72,9 @@ void ConstantOpTest::PersistentMemoryTrackingTest(bool on_gpu) { TF_EXPECT_OK(ctx.status()); if (on_gpu) { - EXPECT_EQ(ctx.device_persistent_memory_allocated(), 512); + EXPECT_EQ(ctx.persistent_memory_allocated(), 512); } else { - EXPECT_EQ(ctx.host_persistent_memory_allocated(), 480); + EXPECT_EQ(ctx.persistent_memory_allocated(), 480); } // Remove memry leak errors. diff --git a/tensorflow/core/kernels/lookup_table_init_op.cc b/tensorflow/core/kernels/lookup_table_init_op.cc index 38adcada6d..b352dd257c 100644 --- a/tensorflow/core/kernels/lookup_table_init_op.cc +++ b/tensorflow/core/kernels/lookup_table_init_op.cc @@ -82,8 +82,8 @@ class InitializeTableOp : public OpKernel { } OP_REQUIRES_OK(ctx, table->Initialize(iter)); if (ctx->track_allocations()) { - ctx->record_host_persistent_memory_allocation(table->MemoryUsed() - - memory_used_before); + ctx->record_persistent_memory_allocation(table->MemoryUsed() - + memory_used_before); } } @@ -144,8 +144,8 @@ class InitializeTableFromTextFileOp : public OpKernel { vocab_filename, vocab_size_, delimiter_, key_index_, value_index_, ctx->env(), table)); if (ctx->track_allocations()) { - ctx->record_host_persistent_memory_allocation(table->MemoryUsed() - - memory_used_before); + ctx->record_persistent_memory_allocation(table->MemoryUsed() - + memory_used_before); } } diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc index 418d9dcc61..e3872fee0e 100644 --- a/tensorflow/core/kernels/lookup_table_op.cc +++ b/tensorflow/core/kernels/lookup_table_op.cc @@ -709,8 +709,8 @@ class LookupTableInsertOp : public OpKernel { } OP_REQUIRES_OK(ctx, table->Insert(ctx, keys, values)); if (ctx->track_allocations()) { - ctx->record_host_persistent_memory_allocation(table->MemoryUsed() - - memory_used_before); + ctx->record_persistent_memory_allocation(table->MemoryUsed() - + memory_used_before); } } }; @@ -786,8 +786,8 @@ class LookupTableImportOp : public OpKernel { } OP_REQUIRES_OK(ctx, table->ImportValues(ctx, keys, values)); if (ctx->track_allocations()) { - ctx->record_host_persistent_memory_allocation(table->MemoryUsed() - - memory_used_before); + ctx->record_persistent_memory_allocation(table->MemoryUsed() - + memory_used_before); } } }; diff --git a/tensorflow/core/kernels/lookup_table_op.h b/tensorflow/core/kernels/lookup_table_op.h index ff23a09a24..5ba9b936e4 100644 --- a/tensorflow/core/kernels/lookup_table_op.h +++ b/tensorflow/core/kernels/lookup_table_op.h @@ -64,7 +64,7 @@ class LookupTableOp : public OpKernel { return ctx->status(); } if (ctx->track_allocations()) { - ctx->record_host_persistent_memory_allocation( + ctx->record_persistent_memory_allocation( container->MemoryUsed() + table_handle_.AllocatedBytes()); } *ret = container; diff --git a/tensorflow/core/kernels/queue_op.h b/tensorflow/core/kernels/queue_op.h index 2d68ac7a29..ad606803ee 100644 --- a/tensorflow/core/kernels/queue_op.h +++ b/tensorflow/core/kernels/queue_op.h @@ -44,8 +44,7 @@ class QueueOp : public ResourceOpKernel<QueueInterface> { void Compute(OpKernelContext* context) override { ResourceOpKernel<QueueInterface>::Compute(context); if (resource_ && context->track_allocations()) { - context->record_host_persistent_memory_allocation( - resource_->MemoryUsed()); + context->record_persistent_memory_allocation(resource_->MemoryUsed()); } } diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h index 9da992ccd1..d7bebfb24c 100644 --- a/tensorflow/core/kernels/reduction_ops_common.h +++ b/tensorflow/core/kernels/reduction_ops_common.h @@ -240,14 +240,7 @@ class ReductionOp : public OpKernel { ctx->SetStatus(errors::Internal("Error during reduction copy.")); } if (ctx->track_allocations()) { - // The temporary memory becomes the output memory. - if (ctx->allocate_on_host(alloc_attr)) { - ctx->record_host_temp_memory_size( - -static_cast<int64>(out.AllocatedBytes())); - } else { - ctx->record_device_temp_memory_size( - -static_cast<int64>(out.AllocatedBytes())); - } + ctx->record_temp_memory_size(-static_cast<int64>(out.AllocatedBytes())); } ctx->set_output(0, out); } diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc index 1b7079dcba..10ccc85b7c 100644 --- a/tensorflow/core/kernels/variable_ops.cc +++ b/tensorflow/core/kernels/variable_ops.cc @@ -76,13 +76,7 @@ void VariableOp::Compute(OpKernelContext* ctx) { AllocatorAttributes attr; attr.set_gpu_compatible(true); attr.set_nic_compatible(true); - if (ctx->allocate_on_host(attr)) { - ctx->record_host_persistent_memory_allocation( - var->tensor()->AllocatedBytes()); - } else { - ctx->record_device_persistent_memory_allocation( - var->tensor()->AllocatedBytes()); - } + ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes()); } var->Unref(); } @@ -113,14 +107,8 @@ class TemporaryVariableOp : public OpKernel { var_name_, tmp_var)); context->set_output_ref(0, &tmp_var->mu, &tmp_var->val); if (context->track_allocations()) { - AllocatorAttributes attr; - if (context->allocate_on_host(attr)) { - context->record_host_persistent_memory_allocation( - tmp_var->val.AllocatedBytes()); - } else { - context->record_device_persistent_memory_allocation( - tmp_var->val.AllocatedBytes()); - } + context->record_persistent_memory_allocation( + tmp_var->val.AllocatedBytes()); } } @@ -163,13 +151,8 @@ class DestroyTemporaryVariableOp : public OpKernel { OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>( context->step_container()->name(), var_name_)); if (context->track_allocations()) { - if (context->allocate_on_host(AllocatorAttributes())) { - context->record_host_persistent_memory_allocation( - -static_cast<int64>(tmpvar.AllocatedBytes())); - } else { - context->record_device_persistent_memory_allocation( - -static_cast<int64>(tmpvar.AllocatedBytes())); - } + context->record_persistent_memory_allocation( + -static_cast<int64>(tmpvar.AllocatedBytes())); } } diff --git a/tensorflow/core/profiler/internal/testdata/run_meta b/tensorflow/core/profiler/internal/testdata/run_meta Binary files differindex ae76acb743..eaea62b06c 100644 --- a/tensorflow/core/profiler/internal/testdata/run_meta +++ b/tensorflow/core/profiler/internal/testdata/run_meta diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc index 2945c9510f..86cb20de7b 100644 --- a/tensorflow/core/profiler/internal/tfprof_node.cc +++ b/tensorflow/core/profiler/internal/tfprof_node.cc @@ -133,18 +133,21 @@ void ExecStep::AddMemoryStats(const string& dev, exec_mem.set_output_bytes(total_output_bytes); if (step_stat.has_memory_stats()) { - exec_mem.set_host_temp_bytes( - exec_mem.host_temp_bytes() + - step_stat.memory_stats().host_temp_memory_size()); - exec_mem.set_host_persistent_bytes( - exec_mem.host_persistent_bytes() + - step_stat.memory_stats().host_persistent_memory_size()); - exec_mem.set_accelerator_temp_bytes( - exec_mem.accelerator_temp_bytes() + - step_stat.memory_stats().device_temp_memory_size()); - exec_mem.set_accelerator_persistent_bytes( - exec_mem.accelerator_persistent_bytes() + - step_stat.memory_stats().device_persistent_memory_size()); + if (IsPlacedOnCPU(dev)) { + // Currently we assume ops placed on gpu only allocate memory on gpu. + exec_mem.set_host_temp_bytes(exec_mem.host_temp_bytes() + + step_stat.memory_stats().temp_memory_size()); + exec_mem.set_host_persistent_bytes( + exec_mem.host_persistent_bytes() + + step_stat.memory_stats().persistent_memory_size()); + } else { + exec_mem.set_accelerator_temp_bytes( + exec_mem.accelerator_temp_bytes() + + step_stat.memory_stats().temp_memory_size()); + exec_mem.set_accelerator_persistent_bytes( + exec_mem.accelerator_persistent_bytes() + + step_stat.memory_stats().persistent_memory_size()); + } } // TODO(xpan): Make this more accurate: |