aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/c/eager/runtime.cc16
-rw-r--r--tensorflow/core/common_runtime/executor.cc14
-rw-r--r--tensorflow/core/framework/cost_graph.proto10
-rw-r--r--tensorflow/core/framework/op_kernel.cc37
-rw-r--r--tensorflow/core/framework/op_kernel.h35
-rw-r--r--tensorflow/core/framework/step_stats.proto13
-rw-r--r--tensorflow/core/graph/costmodel.cc58
-rw-r--r--tensorflow/core/graph/costmodel.h23
-rw-r--r--tensorflow/core/grappler/clusters/single_machine_test.cc6
-rw-r--r--tensorflow/core/grappler/costs/op_performance_data.proto11
-rw-r--r--tensorflow/core/grappler/costs/utils.cc12
-rw-r--r--tensorflow/core/grappler/costs/virtual_scheduler.cc17
-rw-r--r--tensorflow/core/kernels/constant_op.cc7
-rw-r--r--tensorflow/core/kernels/constant_op_test.cc4
-rw-r--r--tensorflow/core/kernels/lookup_table_init_op.cc8
-rw-r--r--tensorflow/core/kernels/lookup_table_op.cc8
-rw-r--r--tensorflow/core/kernels/lookup_table_op.h2
-rw-r--r--tensorflow/core/kernels/queue_op.h3
-rw-r--r--tensorflow/core/kernels/reduction_ops_common.h9
-rw-r--r--tensorflow/core/kernels/variable_ops.cc27
-rw-r--r--tensorflow/core/profiler/internal/testdata/run_metabin5539 -> 5631 bytes
-rw-r--r--tensorflow/core/profiler/internal/tfprof_node.cc27
22 files changed, 105 insertions, 242 deletions
diff --git a/tensorflow/c/eager/runtime.cc b/tensorflow/c/eager/runtime.cc
index ec34b0ea77..3a9951e14d 100644
--- a/tensorflow/c/eager/runtime.cc
+++ b/tensorflow/c/eager/runtime.cc
@@ -316,18 +316,12 @@ Status KernelAndDevice::Run(std::vector<Tensor>* input_tensors,
allocator_pair.second->GetRecordsAndUnRef();
}
auto* ms = stats->mutable_memory_stats();
- ms->set_host_temp_memory_size(context.host_temp_memory_size());
- ms->set_device_temp_memory_size(context.device_temp_memory_size());
- for (const auto& alloc_id : context.host_persistent_alloc_ids()) {
- ms->mutable_host_persistent_tensor_alloc_ids()->Add(alloc_id);
+ ms->set_temp_memory_size(context.temp_memory_size());
+ for (const auto& alloc_id : context.persistent_alloc_ids()) {
+ ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id);
}
- for (const auto& alloc_id : context.device_persistent_alloc_ids()) {
- ms->mutable_device_persistent_tensor_alloc_ids()->Add(alloc_id);
- }
- ms->set_host_persistent_memory_size(
- context.host_persistent_memory_allocated());
- ms->set_device_persistent_memory_size(
- context.device_persistent_memory_allocated());
+
+ ms->set_persistent_memory_size(context.persistent_memory_allocated());
}
return Status::OK();
}
diff --git a/tensorflow/core/common_runtime/executor.cc b/tensorflow/core/common_runtime/executor.cc
index fe1cf1b12e..9d03caff1e 100644
--- a/tensorflow/core/common_runtime/executor.cc
+++ b/tensorflow/core/common_runtime/executor.cc
@@ -172,17 +172,11 @@ void SetMemory(NodeExecStatsWrapper* stats, OpKernelContext* ctx) {
stats->AddAllocation(allocator_pair.first, allocator_pair.second);
}
auto* ms = stats->stats()->mutable_memory_stats();
- ms->set_host_temp_memory_size(ctx->host_temp_memory_size());
- ms->set_device_temp_memory_size(ctx->device_temp_memory_size());
- for (const auto& alloc_id : ctx->host_persistent_alloc_ids()) {
- ms->mutable_host_persistent_tensor_alloc_ids()->Add(alloc_id);
+ ms->set_temp_memory_size(ctx->temp_memory_size());
+ for (const auto& alloc_id : ctx->persistent_alloc_ids()) {
+ ms->mutable_persistent_tensor_alloc_ids()->Add(alloc_id);
}
- for (const auto& alloc_id : ctx->device_persistent_alloc_ids()) {
- ms->mutable_device_persistent_tensor_alloc_ids()->Add(alloc_id);
- }
- ms->set_host_persistent_memory_size(ctx->host_persistent_memory_allocated());
- ms->set_device_persistent_memory_size(
- ctx->device_persistent_memory_allocated());
+ ms->set_persistent_memory_size(ctx->persistent_memory_allocated());
}
void SetReferencedTensors(NodeExecStatsWrapper* stats,
diff --git a/tensorflow/core/framework/cost_graph.proto b/tensorflow/core/framework/cost_graph.proto
index f4837fbfc5..7885b0171a 100644
--- a/tensorflow/core/framework/cost_graph.proto
+++ b/tensorflow/core/framework/cost_graph.proto
@@ -45,10 +45,12 @@ message CostGraphDef {
// Temporary memory used by this node.
int64 temporary_memory_size = 6;
- int64 host_temp_memory_size = 10;
- int64 device_temp_memory_size = 11;
- int64 host_persistent_memory_size = 12;
- int64 device_persistent_memory_size = 16;
+ // Persistent memory used by this node.
+ int64 persistent_memory_size = 12;
+
+ int64 host_temp_memory_size = 10 [deprecated = true];
+ int64 device_temp_memory_size = 11 [deprecated = true];
+ int64 device_persistent_memory_size = 16 [deprecated = true];
// Estimate of the computational cost of this node, in microseconds.
int64 compute_cost = 9;
diff --git a/tensorflow/core/framework/op_kernel.cc b/tensorflow/core/framework/op_kernel.cc
index 433005c8ab..c879dc6f3f 100644
--- a/tensorflow/core/framework/op_kernel.cc
+++ b/tensorflow/core/framework/op_kernel.cc
@@ -252,10 +252,8 @@ OpKernelContext::OpKernelContext(Params* params)
OpKernelContext::OpKernelContext(Params* params, int num_outputs)
: params_(params),
outputs_(num_outputs),
- host_temp_memory_size_(0),
- device_temp_memory_size_(0),
- host_persistent_memory_allocated_(0),
- device_persistent_memory_allocated_(0) {
+ temp_memory_size_(0),
+ persistent_memory_allocated_(0) {
Allocator* eigen_gpu_allocator = get_allocator(AllocatorAttributes());
params_->ensure_eigen_gpu_device();
params_->device->ReinitializeGpuDevice(this, params_->eigen_gpu_device,
@@ -668,11 +666,7 @@ Status OpKernelContext::allocate_temp(
if (a->TracksAllocationSizes()) {
int64 alloc_size =
a->AllocatedSize(const_cast<char*>(out_temp->tensor_data().data()));
- if (allocate_on_host(allocator_attr)) {
- record_host_temp_memory_size(alloc_size);
- } else {
- record_device_temp_memory_size(alloc_size);
- }
+ record_temp_memory_size(alloc_size);
}
}
return s;
@@ -795,26 +789,15 @@ bool OpKernelContext::allocate_on_host(AllocatorAttributes alloc_attr) const {
return alloc_attr.on_host() || device()->attributes().device_type() == "CPU";
}
-void OpKernelContext::record_host_persistent_memory_allocation(int64 size,
- int64 alloc_id) {
- host_persistent_memory_allocated_ += size;
- host_persistent_alloc_ids_.push_back(alloc_id);
-}
-
-void OpKernelContext::record_device_persistent_memory_allocation(
- int64 size, int64 alloc_id) {
- device_persistent_memory_allocated_ += size;
- device_persistent_alloc_ids_.push_back(alloc_id);
-}
-
-std::vector<int64> OpKernelContext::host_persistent_alloc_ids() const {
- return std::vector<int64>(host_persistent_alloc_ids_.begin(),
- host_persistent_alloc_ids_.end());
+void OpKernelContext::record_persistent_memory_allocation(int64 size,
+ int64 alloc_id) {
+ persistent_memory_allocated_ += size;
+ persistent_alloc_ids_.push_back(alloc_id);
}
-std::vector<int64> OpKernelContext::device_persistent_alloc_ids() const {
- return std::vector<int64>(device_persistent_alloc_ids_.begin(),
- device_persistent_alloc_ids_.end());
+std::vector<int64> OpKernelContext::persistent_alloc_ids() const {
+ return std::vector<int64>(persistent_alloc_ids_.begin(),
+ persistent_alloc_ids_.end());
}
// OpKernel registration ------------------------------------------------------
diff --git a/tensorflow/core/framework/op_kernel.h b/tensorflow/core/framework/op_kernel.h
index 3a9a6121c0..25150499ad 100644
--- a/tensorflow/core/framework/op_kernel.h
+++ b/tensorflow/core/framework/op_kernel.h
@@ -1033,33 +1033,21 @@ class OpKernelContext {
bool allocate_on_host(AllocatorAttributes alloc_attr) const;
// Records temporary memory sizes.
- void record_host_temp_memory_size(int64 size) {
- host_temp_memory_size_ += size;
- }
- void record_device_temp_memory_size(int64 size) {
- device_temp_memory_size_ += size;
- }
+ void record_temp_memory_size(int64 size) { temp_memory_size_ += size; }
// Returns recorded size of temporary memory;
- int64 host_temp_memory_size() const { return host_temp_memory_size_; }
- int64 device_temp_memory_size() const { return device_temp_memory_size_; }
+ int64 temp_memory_size() const { return temp_memory_size_; }
// Records persistent memory allocation, size can be negative indicating
// deallocation.
- void record_host_persistent_memory_allocation(int64 size,
- int64 alloc_id = -1);
- void record_device_persistent_memory_allocation(int64 size,
- int64 alloc_id = -1);
+ void record_persistent_memory_allocation(int64 size, int64 alloc_id = -1);
// Returns recorded size and ids of persistent memory.
- int64 host_persistent_memory_allocated() const {
- return host_persistent_memory_allocated_;
+ int64 persistent_memory_allocated() const {
+ return persistent_memory_allocated_;
}
- int64 device_persistent_memory_allocated() const {
- return device_persistent_memory_allocated_;
- }
- std::vector<int64> host_persistent_alloc_ids() const;
- std::vector<int64> device_persistent_alloc_ids() const;
+
+ std::vector<int64> persistent_alloc_ids() const;
bool input_is_ref(int index) const;
@@ -1104,12 +1092,9 @@ class OpKernelContext {
bool is_output_dead_ = false;
- int64 host_temp_memory_size_;
- int64 device_temp_memory_size_;
- gtl::InlinedVector<int64, 2> host_persistent_alloc_ids_;
- gtl::InlinedVector<int64, 2> device_persistent_alloc_ids_;
- int64 host_persistent_memory_allocated_;
- int64 device_persistent_memory_allocated_;
+ int64 temp_memory_size_;
+ gtl::InlinedVector<int64, 2> persistent_alloc_ids_;
+ int64 persistent_memory_allocated_;
TF_DISALLOW_COPY_AND_ASSIGN(OpKernelContext);
};
diff --git a/tensorflow/core/framework/step_stats.proto b/tensorflow/core/framework/step_stats.proto
index 99dee2257e..65c8089d51 100644
--- a/tensorflow/core/framework/step_stats.proto
+++ b/tensorflow/core/framework/step_stats.proto
@@ -40,12 +40,13 @@ message NodeOutput {
// For memory tracking.
message MemoryStats {
- int64 host_temp_memory_size = 1;
- int64 device_temp_memory_size = 2;
- int64 host_persistent_memory_size = 3;
- int64 device_persistent_memory_size = 4;
- repeated int64 host_persistent_tensor_alloc_ids = 5;
- repeated int64 device_persistent_tensor_alloc_ids = 6;
+ int64 temp_memory_size = 1;
+ int64 persistent_memory_size = 3;
+ repeated int64 persistent_tensor_alloc_ids = 5;
+
+ int64 device_temp_memory_size = 2 [deprecated = true];
+ int64 device_persistent_memory_size = 4 [deprecated = true];
+ repeated int64 device_persistent_tensor_alloc_ids = 6 [deprecated = true];
}
// Time/size stats recorded for a single execution of a graph node.
diff --git a/tensorflow/core/graph/costmodel.cc b/tensorflow/core/graph/costmodel.cc
index 3ed32068ae..b1e6cf64e8 100644
--- a/tensorflow/core/graph/costmodel.cc
+++ b/tensorflow/core/graph/costmodel.cc
@@ -291,59 +291,24 @@ Bytes CostModel::TempMemorySize(const Node* node) const {
return max_mem_usage_[id].temp_memory_size;
}
-Bytes CostModel::HostTempMemorySize(const Node* node) const {
+Bytes CostModel::PersistentMemorySize(const Node* node) const {
const int id = Id(node);
if (id < 0) {
return Bytes(0);
}
- return max_mem_usage_[id].host_temp_memory_size;
-}
-
-Bytes CostModel::DeviceTempMemorySize(const Node* node) const {
- const int id = Id(node);
- if (id < 0) {
- return Bytes(0);
- }
- return max_mem_usage_[id].device_temp_memory_size;
-}
-
-Bytes CostModel::HostPersistentMemorySize(const Node* node) const {
- const int id = Id(node);
- if (id < 0) {
- return Bytes(0);
- }
- return max_mem_usage_[id].host_persistent_memory_size;
-}
-
-Bytes CostModel::DevicePersistentMemorySize(const Node* node) const {
- const int id = Id(node);
- if (id < 0) {
- return Bytes(0);
- }
- return max_mem_usage_[id].device_persistent_memory_size;
+ return max_mem_usage_[id].persistent_memory_size;
}
void CostModel::RecordMemoryStats(const Node* node,
const MemoryStats& memory_stats) {
const int id = Id(node);
if (id < 0) return;
- max_mem_usage_[id].host_temp_memory_size =
- memory_stats.host_temp_memory_size();
- max_mem_usage_[id].device_temp_memory_size =
- memory_stats.device_temp_memory_size();
- max_mem_usage_[id].host_persistent_memory_size =
- memory_stats.host_persistent_memory_size();
- max_mem_usage_[id].device_persistent_memory_size =
- memory_stats.device_persistent_memory_size();
- for (int64 alloc_id : memory_stats.host_persistent_tensor_alloc_ids()) {
- if (alloc_id > 0) {
- host_persistent_alloc_ids_.insert(alloc_id);
- }
- }
- for (int64 alloc_id : memory_stats.device_persistent_tensor_alloc_ids()) {
+ max_mem_usage_[id].temp_memory_size = memory_stats.temp_memory_size();
+ max_mem_usage_[id].persistent_memory_size =
+ memory_stats.persistent_memory_size();
+ for (int64 alloc_id : memory_stats.persistent_tensor_alloc_ids()) {
if (alloc_id > 0) {
- persistent_alloc_ids_by_devices_[node->assigned_device_name()].insert(
- alloc_id);
+ persistent_alloc_ids_.insert(alloc_id);
}
}
}
@@ -381,7 +346,7 @@ int64 CostModel::AllocationId(const Node* node, int slot) const {
}
bool CostModel::IsPersistentTensor(const Node* node, int64 alloc_id) const {
- if (host_persistent_alloc_ids_.count(alloc_id) > 0) {
+ if (persistent_alloc_ids_.count(alloc_id) > 0) {
return true;
}
if (persistent_alloc_ids_by_devices_.find(node->assigned_device_name()) ==
@@ -548,11 +513,8 @@ void CostModel::AddToCostGraphDef(const Graph* graph,
cnode->add_control_input(Id(e->src()));
}
- cnode->set_host_temp_memory_size(HostTempMemorySize(n).value());
- cnode->set_device_temp_memory_size(DeviceTempMemorySize(n).value());
- cnode->set_host_persistent_memory_size(HostPersistentMemorySize(n).value());
- cnode->set_device_persistent_memory_size(
- DevicePersistentMemorySize(n).value());
+ cnode->set_temporary_memory_size(TempMemorySize(n).value());
+ cnode->set_persistent_memory_size(PersistentMemorySize(n).value());
cnode->set_compute_cost(MaxExecutionTime(n).value());
diff --git a/tensorflow/core/graph/costmodel.h b/tensorflow/core/graph/costmodel.h
index 8afa4971ad..081eb2ff4c 100644
--- a/tensorflow/core/graph/costmodel.h
+++ b/tensorflow/core/graph/costmodel.h
@@ -133,13 +133,8 @@ class CostModel {
// Returns the size in bytes of temporary memory consumed by "node".
Bytes TempMemorySize(const Node* node) const;
- // Returns the size in bytes of temporary memory consumed by "node".
- Bytes HostTempMemorySize(const Node* node) const;
- Bytes DeviceTempMemorySize(const Node* node) const;
-
// Returns the size of persistent memory allocated by "node".
- Bytes HostPersistentMemorySize(const Node* node) const;
- Bytes DevicePersistentMemorySize(const Node* node) const;
+ Bytes PersistentMemorySize(const Node* node) const;
// Records memory stats such as temp momory and persistent memory.
void RecordMemoryStats(const Node* node, const MemoryStats& memory_stats);
@@ -210,21 +205,11 @@ class CostModel {
// Maximum memory usage
struct MemUsage {
- MemUsage()
- : temp_memory_size(-1),
- host_temp_memory_size(0),
- device_temp_memory_size(0),
- host_persistent_memory_size(0),
- device_persistent_memory_size(0) {}
+ MemUsage() : temp_memory_size(0), persistent_memory_size(0) {}
// TODO(yuefengz): temp_memory_size is not being used, remove it.
Bytes temp_memory_size;
-
- Bytes host_temp_memory_size;
- Bytes device_temp_memory_size;
-
- Bytes host_persistent_memory_size;
- Bytes device_persistent_memory_size;
+ Bytes persistent_memory_size;
gtl::InlinedVector<Bytes, 2> output_port_mem;
gtl::InlinedVector<TensorShapeProto, 2> output_port_shape;
@@ -234,7 +219,7 @@ class CostModel {
std::vector<gtl::InlinedVector<int64, 2> > output_port_alloc_ids_;
- std::set<int64> host_persistent_alloc_ids_;
+ std::set<int64> persistent_alloc_ids_;
std::map<string, std::set<int64>> persistent_alloc_ids_by_devices_;
TensorShapeProto unknown_shape_;
diff --git a/tensorflow/core/grappler/clusters/single_machine_test.cc b/tensorflow/core/grappler/clusters/single_machine_test.cc
index f6b394a860..c6352c1448 100644
--- a/tensorflow/core/grappler/clusters/single_machine_test.cc
+++ b/tensorflow/core/grappler/clusters/single_machine_test.cc
@@ -467,13 +467,11 @@ TEST_F(SingleMachineTest, PersistentMemory) {
found_hashtable = true;
// Persistent memory usage should be 0 since it's recorded as part of the
// initialize_table op.
- EXPECT_EQ(0, node.host_persistent_memory_size());
- EXPECT_EQ(0, node.device_persistent_memory_size());
+ EXPECT_EQ(0, node.persistent_memory_size());
} else if (node.name() == "initialize_table") {
found_table_init = true;
// Persistent memory should hold 2 keys and 2 values.
- EXPECT_LE(4 * sizeof(int64), node.host_persistent_memory_size());
- EXPECT_EQ(0, node.device_persistent_memory_size());
+ EXPECT_LE(4 * sizeof(int64), node.persistent_memory_size());
}
}
EXPECT_TRUE(found_table_init);
diff --git a/tensorflow/core/grappler/costs/op_performance_data.proto b/tensorflow/core/grappler/costs/op_performance_data.proto
index 1a111b71dc..1d623b8db8 100644
--- a/tensorflow/core/grappler/costs/op_performance_data.proto
+++ b/tensorflow/core/grappler/costs/op_performance_data.proto
@@ -96,13 +96,12 @@ message OpPerformance {
// The output information may have memory usage and output shapes.
repeated int64 output_memory = 1;
- // Temporary memory allocated by this node.
- int64 host_temp_memory = 2;
- int64 device_temp_memory = 3;
+ // Temp and persistent memory allocated by this node.
+ int64 temp_memory = 2;
+ int64 persistent_memory = 4;
- // The persisted_memory doesn't include outputs.
- int64 host_persistent_memory = 4;
- int64 device_persistent_memory = 5;
+ int64 device_temp_memory = 3 [deprecated = true];
+ int64 device_persistent_memory = 5 [deprecated = true];
}
OpMemory op_memory = 9;
}
diff --git a/tensorflow/core/grappler/costs/utils.cc b/tensorflow/core/grappler/costs/utils.cc
index ade0ad53fb..602f69f12e 100644
--- a/tensorflow/core/grappler/costs/utils.cc
+++ b/tensorflow/core/grappler/costs/utils.cc
@@ -285,14 +285,10 @@ OpPerformanceList CostGraphToOpPerformanceData(const CostGraphDef& cost_graph,
perf->mutable_op_memory()->add_output_memory(output_info.size());
}
- perf->mutable_op_memory()->set_host_temp_memory(
- cost_node->host_temp_memory_size());
- perf->mutable_op_memory()->set_device_temp_memory(
- cost_node->device_temp_memory_size());
- perf->mutable_op_memory()->set_host_persistent_memory(
- cost_node->host_persistent_memory_size());
- perf->mutable_op_memory()->set_device_persistent_memory(
- cost_node->device_persistent_memory_size());
+ perf->mutable_op_memory()->set_temp_memory(
+ cost_node->temporary_memory_size());
+ perf->mutable_op_memory()->set_persistent_memory(
+ cost_node->persistent_memory_size());
}
return ret;
}
diff --git a/tensorflow/core/grappler/costs/virtual_scheduler.cc b/tensorflow/core/grappler/costs/virtual_scheduler.cc
index 0af889f886..d7d07ee7a5 100644
--- a/tensorflow/core/grappler/costs/virtual_scheduler.cc
+++ b/tensorflow/core/grappler/costs/virtual_scheduler.cc
@@ -984,21 +984,12 @@ Costs VirtualScheduler::Summary(RunMetadata* metadata) {
nodestate.time_scheduled.asMicroSeconds().count());
auto* mem_stats = node_stats->mutable_memory_stats();
// VirtualScheduler does not specify scratch pad memory usage.
- mem_stats->set_host_temp_memory_size(0);
- mem_stats->set_device_temp_memory_size(0);
- int64 host_persistent_memory_size = 0;
- int64 device_persistent_memory_size = 0;
+ mem_stats->set_temp_memory_size(0);
+ int64 persistent_memory_size = 0;
if (IsPersistentNode(node_def)) {
- if (device.first.find("cpu") != string::npos ||
- device.first.find("CPU") != string::npos) {
- host_persistent_memory_size = total_output_size;
- } else {
- device_persistent_memory_size = total_output_size;
- }
+ persistent_memory_size = total_output_size;
}
- mem_stats->set_host_persistent_memory_size(host_persistent_memory_size);
- mem_stats->set_device_persistent_memory_size(
- device_persistent_memory_size);
+ mem_stats->set_persistent_memory_size(persistent_memory_size);
*device_partition_graph->add_node() = *node_def;
}
}
diff --git a/tensorflow/core/kernels/constant_op.cc b/tensorflow/core/kernels/constant_op.cc
index c8bfb26859..59f9f69315 100644
--- a/tensorflow/core/kernels/constant_op.cc
+++ b/tensorflow/core/kernels/constant_op.cc
@@ -57,12 +57,7 @@ ConstantOp::ConstantOp(OpKernelConstruction* ctx)
void ConstantOp::Compute(OpKernelContext* ctx) {
ctx->set_output(0, tensor_);
if (TF_PREDICT_FALSE(ctx->track_allocations())) {
- AllocatorAttributes attr;
- if (ctx->allocate_on_host(attr)) {
- ctx->record_host_persistent_memory_allocation(tensor_.AllocatedBytes());
- } else {
- ctx->record_device_persistent_memory_allocation(tensor_.AllocatedBytes());
- }
+ ctx->record_persistent_memory_allocation(tensor_.AllocatedBytes());
}
}
diff --git a/tensorflow/core/kernels/constant_op_test.cc b/tensorflow/core/kernels/constant_op_test.cc
index 62cc67c736..7a05d9371d 100644
--- a/tensorflow/core/kernels/constant_op_test.cc
+++ b/tensorflow/core/kernels/constant_op_test.cc
@@ -72,9 +72,9 @@ void ConstantOpTest::PersistentMemoryTrackingTest(bool on_gpu) {
TF_EXPECT_OK(ctx.status());
if (on_gpu) {
- EXPECT_EQ(ctx.device_persistent_memory_allocated(), 512);
+ EXPECT_EQ(ctx.persistent_memory_allocated(), 512);
} else {
- EXPECT_EQ(ctx.host_persistent_memory_allocated(), 480);
+ EXPECT_EQ(ctx.persistent_memory_allocated(), 480);
}
// Remove memry leak errors.
diff --git a/tensorflow/core/kernels/lookup_table_init_op.cc b/tensorflow/core/kernels/lookup_table_init_op.cc
index 38adcada6d..b352dd257c 100644
--- a/tensorflow/core/kernels/lookup_table_init_op.cc
+++ b/tensorflow/core/kernels/lookup_table_init_op.cc
@@ -82,8 +82,8 @@ class InitializeTableOp : public OpKernel {
}
OP_REQUIRES_OK(ctx, table->Initialize(iter));
if (ctx->track_allocations()) {
- ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
- memory_used_before);
+ ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+ memory_used_before);
}
}
@@ -144,8 +144,8 @@ class InitializeTableFromTextFileOp : public OpKernel {
vocab_filename, vocab_size_, delimiter_, key_index_,
value_index_, ctx->env(), table));
if (ctx->track_allocations()) {
- ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
- memory_used_before);
+ ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+ memory_used_before);
}
}
diff --git a/tensorflow/core/kernels/lookup_table_op.cc b/tensorflow/core/kernels/lookup_table_op.cc
index 418d9dcc61..e3872fee0e 100644
--- a/tensorflow/core/kernels/lookup_table_op.cc
+++ b/tensorflow/core/kernels/lookup_table_op.cc
@@ -709,8 +709,8 @@ class LookupTableInsertOp : public OpKernel {
}
OP_REQUIRES_OK(ctx, table->Insert(ctx, keys, values));
if (ctx->track_allocations()) {
- ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
- memory_used_before);
+ ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+ memory_used_before);
}
}
};
@@ -786,8 +786,8 @@ class LookupTableImportOp : public OpKernel {
}
OP_REQUIRES_OK(ctx, table->ImportValues(ctx, keys, values));
if (ctx->track_allocations()) {
- ctx->record_host_persistent_memory_allocation(table->MemoryUsed() -
- memory_used_before);
+ ctx->record_persistent_memory_allocation(table->MemoryUsed() -
+ memory_used_before);
}
}
};
diff --git a/tensorflow/core/kernels/lookup_table_op.h b/tensorflow/core/kernels/lookup_table_op.h
index ff23a09a24..5ba9b936e4 100644
--- a/tensorflow/core/kernels/lookup_table_op.h
+++ b/tensorflow/core/kernels/lookup_table_op.h
@@ -64,7 +64,7 @@ class LookupTableOp : public OpKernel {
return ctx->status();
}
if (ctx->track_allocations()) {
- ctx->record_host_persistent_memory_allocation(
+ ctx->record_persistent_memory_allocation(
container->MemoryUsed() + table_handle_.AllocatedBytes());
}
*ret = container;
diff --git a/tensorflow/core/kernels/queue_op.h b/tensorflow/core/kernels/queue_op.h
index 2d68ac7a29..ad606803ee 100644
--- a/tensorflow/core/kernels/queue_op.h
+++ b/tensorflow/core/kernels/queue_op.h
@@ -44,8 +44,7 @@ class QueueOp : public ResourceOpKernel<QueueInterface> {
void Compute(OpKernelContext* context) override {
ResourceOpKernel<QueueInterface>::Compute(context);
if (resource_ && context->track_allocations()) {
- context->record_host_persistent_memory_allocation(
- resource_->MemoryUsed());
+ context->record_persistent_memory_allocation(resource_->MemoryUsed());
}
}
diff --git a/tensorflow/core/kernels/reduction_ops_common.h b/tensorflow/core/kernels/reduction_ops_common.h
index 9da992ccd1..d7bebfb24c 100644
--- a/tensorflow/core/kernels/reduction_ops_common.h
+++ b/tensorflow/core/kernels/reduction_ops_common.h
@@ -240,14 +240,7 @@ class ReductionOp : public OpKernel {
ctx->SetStatus(errors::Internal("Error during reduction copy."));
}
if (ctx->track_allocations()) {
- // The temporary memory becomes the output memory.
- if (ctx->allocate_on_host(alloc_attr)) {
- ctx->record_host_temp_memory_size(
- -static_cast<int64>(out.AllocatedBytes()));
- } else {
- ctx->record_device_temp_memory_size(
- -static_cast<int64>(out.AllocatedBytes()));
- }
+ ctx->record_temp_memory_size(-static_cast<int64>(out.AllocatedBytes()));
}
ctx->set_output(0, out);
}
diff --git a/tensorflow/core/kernels/variable_ops.cc b/tensorflow/core/kernels/variable_ops.cc
index 1b7079dcba..10ccc85b7c 100644
--- a/tensorflow/core/kernels/variable_ops.cc
+++ b/tensorflow/core/kernels/variable_ops.cc
@@ -76,13 +76,7 @@ void VariableOp::Compute(OpKernelContext* ctx) {
AllocatorAttributes attr;
attr.set_gpu_compatible(true);
attr.set_nic_compatible(true);
- if (ctx->allocate_on_host(attr)) {
- ctx->record_host_persistent_memory_allocation(
- var->tensor()->AllocatedBytes());
- } else {
- ctx->record_device_persistent_memory_allocation(
- var->tensor()->AllocatedBytes());
- }
+ ctx->record_persistent_memory_allocation(var->tensor()->AllocatedBytes());
}
var->Unref();
}
@@ -113,14 +107,8 @@ class TemporaryVariableOp : public OpKernel {
var_name_, tmp_var));
context->set_output_ref(0, &tmp_var->mu, &tmp_var->val);
if (context->track_allocations()) {
- AllocatorAttributes attr;
- if (context->allocate_on_host(attr)) {
- context->record_host_persistent_memory_allocation(
- tmp_var->val.AllocatedBytes());
- } else {
- context->record_device_persistent_memory_allocation(
- tmp_var->val.AllocatedBytes());
- }
+ context->record_persistent_memory_allocation(
+ tmp_var->val.AllocatedBytes());
}
}
@@ -163,13 +151,8 @@ class DestroyTemporaryVariableOp : public OpKernel {
OP_REQUIRES_OK(context, rm->Delete<TemporaryVariableOp::TmpVar>(
context->step_container()->name(), var_name_));
if (context->track_allocations()) {
- if (context->allocate_on_host(AllocatorAttributes())) {
- context->record_host_persistent_memory_allocation(
- -static_cast<int64>(tmpvar.AllocatedBytes()));
- } else {
- context->record_device_persistent_memory_allocation(
- -static_cast<int64>(tmpvar.AllocatedBytes()));
- }
+ context->record_persistent_memory_allocation(
+ -static_cast<int64>(tmpvar.AllocatedBytes()));
}
}
diff --git a/tensorflow/core/profiler/internal/testdata/run_meta b/tensorflow/core/profiler/internal/testdata/run_meta
index ae76acb743..eaea62b06c 100644
--- a/tensorflow/core/profiler/internal/testdata/run_meta
+++ b/tensorflow/core/profiler/internal/testdata/run_meta
Binary files differ
diff --git a/tensorflow/core/profiler/internal/tfprof_node.cc b/tensorflow/core/profiler/internal/tfprof_node.cc
index 2945c9510f..86cb20de7b 100644
--- a/tensorflow/core/profiler/internal/tfprof_node.cc
+++ b/tensorflow/core/profiler/internal/tfprof_node.cc
@@ -133,18 +133,21 @@ void ExecStep::AddMemoryStats(const string& dev,
exec_mem.set_output_bytes(total_output_bytes);
if (step_stat.has_memory_stats()) {
- exec_mem.set_host_temp_bytes(
- exec_mem.host_temp_bytes() +
- step_stat.memory_stats().host_temp_memory_size());
- exec_mem.set_host_persistent_bytes(
- exec_mem.host_persistent_bytes() +
- step_stat.memory_stats().host_persistent_memory_size());
- exec_mem.set_accelerator_temp_bytes(
- exec_mem.accelerator_temp_bytes() +
- step_stat.memory_stats().device_temp_memory_size());
- exec_mem.set_accelerator_persistent_bytes(
- exec_mem.accelerator_persistent_bytes() +
- step_stat.memory_stats().device_persistent_memory_size());
+ if (IsPlacedOnCPU(dev)) {
+ // Currently we assume ops placed on gpu only allocate memory on gpu.
+ exec_mem.set_host_temp_bytes(exec_mem.host_temp_bytes() +
+ step_stat.memory_stats().temp_memory_size());
+ exec_mem.set_host_persistent_bytes(
+ exec_mem.host_persistent_bytes() +
+ step_stat.memory_stats().persistent_memory_size());
+ } else {
+ exec_mem.set_accelerator_temp_bytes(
+ exec_mem.accelerator_temp_bytes() +
+ step_stat.memory_stats().temp_memory_size());
+ exec_mem.set_accelerator_persistent_bytes(
+ exec_mem.accelerator_persistent_bytes() +
+ step_stat.memory_stats().persistent_memory_size());
+ }
}
// TODO(xpan): Make this more accurate: