diff options
author | Justin Lebar <jlebar@google.com> | 2018-03-15 02:22:17 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2018-03-15 02:25:56 -0700 |
commit | b08c54271084b05ea822b3348a3a448a9fe3b898 (patch) | |
tree | 49e71a99b2f6c974edd73761e4d5928c9a914b1f /tensorflow/stream_executor/device_description.h | |
parent | 9037e241de1e64044ff55ab539ccc1fb013c178a (diff) |
[SE] [XLA:GPU] Inform --xla_hlo_profile of the GPU's memory bandwidth.
Add a memory_bandwidth() property to StreamExecutor's DeviceDescription,
and use this in the GPU's --xla_hlo_profile.
PiperOrigin-RevId: 189157407
Diffstat (limited to 'tensorflow/stream_executor/device_description.h')
-rw-r--r-- | tensorflow/stream_executor/device_description.h | 9 |
1 files changed, 9 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/device_description.h b/tensorflow/stream_executor/device_description.h index f2b35bcb43..fcf0928096 100644 --- a/tensorflow/stream_executor/device_description.h +++ b/tensorflow/stream_executor/device_description.h @@ -140,6 +140,11 @@ class DeviceDescription { // Returns the device memory size in bytes. uint64 device_memory_size() const { return device_memory_size_; } + // Returns the device's memory bandwidth in bytes/sec. (This is for + // reads/writes to/from the device's own memory, not for transfers between the + // host and device.) + uint64 memory_bandwidth() const { return memory_bandwidth_; } + // Returns the device's core clock rate in GHz. float clock_rate_ghz() const { return clock_rate_ghz_; } @@ -212,6 +217,7 @@ class DeviceDescription { uint64 device_address_bits_; uint64 device_memory_size_; + uint64 memory_bandwidth_; // Shared memory limits on a given device. uint64 shared_memory_per_core_; @@ -305,6 +311,9 @@ class DeviceDescriptionBuilder { void set_device_memory_size(uint64 value) { device_description_->device_memory_size_ = value; } + void set_memory_bandwidth(uint64 value) { + device_description_->memory_bandwidth_ = value; + } void set_shared_memory_per_core(int64 value) { device_description_->shared_memory_per_core_ = value; |