aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/device_description.h
diff options
context:
space:
mode:
authorGravatar Justin Lebar <jlebar@google.com>2018-03-15 02:22:17 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2018-03-15 02:25:56 -0700
commitb08c54271084b05ea822b3348a3a448a9fe3b898 (patch)
tree49e71a99b2f6c974edd73761e4d5928c9a914b1f /tensorflow/stream_executor/device_description.h
parent9037e241de1e64044ff55ab539ccc1fb013c178a (diff)
[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.
Add a memory_bandwidth() property to StreamExecutor's DeviceDescription, and use this in the GPU's --xla_hlo_profile. PiperOrigin-RevId: 189157407
Diffstat (limited to 'tensorflow/stream_executor/device_description.h')
-rw-r--r--tensorflow/stream_executor/device_description.h9
1 files changed, 9 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h
index f2b35bcb43..fcf0928096 100644
--- a/tensorflow/stream_executor/device_description.h
+++ b/tensorflow/stream_executor/device_description.h
@@ -140,6 +140,11 @@ class DeviceDescription {
// Returns the device memory size in bytes.
uint64 device_memory_size() const { return device_memory_size_; }
+ // Returns the device's memory bandwidth in bytes/sec. (This is for
+ // reads/writes to/from the device's own memory, not for transfers between the
+ // host and device.)
+ uint64 memory_bandwidth() const { return memory_bandwidth_; }
+
// Returns the device's core clock rate in GHz.
float clock_rate_ghz() const { return clock_rate_ghz_; }
@@ -212,6 +217,7 @@ class DeviceDescription {
uint64 device_address_bits_;
uint64 device_memory_size_;
+ uint64 memory_bandwidth_;
// Shared memory limits on a given device.
uint64 shared_memory_per_core_;
@@ -305,6 +311,9 @@ class DeviceDescriptionBuilder {
void set_device_memory_size(uint64 value) {
device_description_->device_memory_size_ = value;
}
+ void set_memory_bandwidth(uint64 value) {
+ device_description_->memory_bandwidth_ = value;
+ }
void set_shared_memory_per_core(int64 value) {
device_description_->shared_memory_per_core_ = value;