aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/stream_executor_pimpl.h
diff options
context:
space:
mode:
authorGravatar Chris Leary <leary@google.com>2017-08-22 23:37:36 -0700
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2017-08-22 23:41:55 -0700
commit33e34ef79f66743e44ced81ed9cf182e00ad5d13 (patch)
tree2450fdfb1282bebfe4a4995757fbe4b71a2ba764 /tensorflow/stream_executor/stream_executor_pimpl.h
parent1e849fed6032015eb25149b801de0f7be2d87026 (diff)
[SE] Change comments/identifiers/logs from when StreamExecutor was GPUExecutor.
PiperOrigin-RevId: 166163619
Diffstat (limited to 'tensorflow/stream_executor/stream_executor_pimpl.h')
-rw-r--r--tensorflow/stream_executor/stream_executor_pimpl.h93
1 files changed, 48 insertions, 45 deletions
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index 9814f1b960..d910eb8823 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -104,8 +104,8 @@ class StreamExecutor {
// platform, false is returned.
bool GetKernel(const MultiKernelLoaderSpec &spec, KernelBase *kernel);
- // Synchronously allocates an array on the GPU device of type T with
- // element_count elements.
+ // Synchronously allocates an array on the device of type T with element_count
+ // elements.
template <typename T>
DeviceMemory<T> AllocateArray(uint64 element_count);
@@ -115,8 +115,8 @@ class StreamExecutor {
return ScopedDeviceMemory<T>(this, AllocateArray<T>(element_count));
}
- // Convenience wrapper that allocates space for a single element of type T
- // in GPU memory.
+ // Convenience wrapper that allocates space for a single element of type T in
+ // device memory.
template <typename T>
DeviceMemory<T> AllocateScalar() {
return AllocateArray<T>(1);
@@ -128,8 +128,8 @@ class StreamExecutor {
return AllocateOwnedArray<T>(1);
}
- // Synchronously allocates a scalar of type T on the GPU device that is
- // (POD) zero-byte initialized.
+ // Synchronously allocates a scalar of type T on the device that is (POD)
+ // zero-byte initialized.
template <typename T>
DeviceMemory<T> AllocateZeroed();
@@ -177,11 +177,12 @@ class StreamExecutor {
// null-out effect should not be relied upon in client code.
void Deallocate(DeviceMemoryBase *mem);
- // Retrieves a mapping of active opaque GPU memory pointer to a string
+ // Retrieves a mapping of active opaque device memory pointer to a string
// representation of the [allocating thread's] stack at the time the pointer
- // was allocated. Useful for tracking GPU memory leaks.
+ // was allocated. Useful for tracking device memory leaks.
//
- // Note: this will only be populated if --check_gpu_leaks flag is activated.
+ // Note: this will only be populated if --check_device_leaks flag is
+ // activated.
void GetMemAllocs(std::map<void *, AllocRecord> *records_out);
// Allocates a region of host memory and registers it with the platform API.
@@ -210,68 +211,68 @@ class StreamExecutor {
bool SynchronizeAllActivity() SE_MUST_USE_RESULT;
// Blocks the caller while "size" bytes are zeroed out (in POD fashion) at the
- // given location in GPU memory.
+ // given location in device memory.
bool SynchronousMemZero(DeviceMemoryBase *location,
uint64 size) SE_MUST_USE_RESULT;
// Blocks the caller while "size" bytes are initialized to "value" (in POD
- // fashion) at the given location in GPU memory.
+ // fashion) at the given location in device memory.
bool SynchronousMemSet(DeviceMemoryBase *location, int value,
uint64 size) SE_MUST_USE_RESULT;
// [deprecated] Blocks the caller while a data segment of the given size is
- // copied from the host source to the GPU destination.
+ // copied from the host source to the device destination.
//
// Deprecation: prefer explicit H2D below, to avoid error-prone API usage.
- bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src,
+ bool SynchronousMemcpy(DeviceMemoryBase *device_dst, const void *host_src,
uint64 size) SE_MUST_USE_RESULT;
// [deprecated] Blocks the caller while a data segment of the given size is
- // copied from the GPU source to the host destination.
+ // copied from the device source to the host destination.
//
// Deprecation: prefer explicit D2H below, to avoid error-prone API usage.
- bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src,
+ bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &device_src,
uint64 size) SE_MUST_USE_RESULT;
// Same as SynchronousMemcpy(DeviceMemoryBase*, ...) above.
port::Status SynchronousMemcpyH2D(const void *host_src, int64 size,
- DeviceMemoryBase *gpu_dst);
+ DeviceMemoryBase *device_dst);
// Alternative interface for memcpying from host to device that takes an
// array slice. Checks that the destination size can accommodate the host
// slice size.
template <class T>
port::Status SynchronousMemcpyH2D(port::ArraySlice<T> host_src,
- DeviceMemoryBase *gpu_dst) {
+ DeviceMemoryBase *device_dst) {
auto host_size = host_src.size() * sizeof(T);
- CHECK(gpu_dst->size() == 0 || gpu_dst->size() >= host_size);
- return SynchronousMemcpyH2D(host_src.begin(), host_size, gpu_dst);
+ CHECK(device_dst->size() == 0 || device_dst->size() >= host_size);
+ return SynchronousMemcpyH2D(host_src.begin(), host_size, device_dst);
}
// Same as SynchronousMemcpy(void*, ...) above.
- port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &gpu_src, int64 size,
- void *host_dst);
+ port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &device_src,
+ int64 size, void *host_dst);
// Alternative interface for memcpying from device to host that takes an
// array slice. Checks that the destination size can accommodate the host
// slice size.
template <typename T>
- port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &gpu_src,
+ port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &device_src,
port::MutableArraySlice<T> host_dst) {
auto host_size = host_dst.size() * sizeof(T);
- CHECK(gpu_src.size() == 0 || host_size >= gpu_src.size());
- return SynchronousMemcpyD2H(gpu_src, host_size, host_dst.begin());
+ CHECK(device_src.size() == 0 || host_size >= device_src.size());
+ return SynchronousMemcpyD2H(device_src, host_size, host_dst.begin());
}
// Blocks the caller while a data segment of the given size is copied from the
- // GPU source to the GPU destination.
- bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
- const DeviceMemoryBase &gpu_src,
+ // device source to the device destination.
+ bool SynchronousMemcpy(DeviceMemoryBase *device_dst,
+ const DeviceMemoryBase &device_src,
uint64 size) SE_MUST_USE_RESULT;
- // Enqueues an operation onto stream to zero out size bytes at the given GPU
- // memory location. Neither stream nor location may be null. Returns whether
- // the operation was successfully enqueued onto the stream.
+ // Enqueues an operation onto stream to zero out size bytes at the given
+ // device memory location. Neither stream nor location may be null. Returns
+ // whether the operation was successfully enqueued onto the stream.
bool MemZero(Stream *stream, DeviceMemoryBase *location,
uint64 size) SE_MUST_USE_RESULT;
@@ -471,8 +472,8 @@ class StreamExecutor {
rng::RngSupport *AsRng();
// Causes the host code to synchronously wait for operations entrained onto
- // stream to complete. Effectively a join on the asynchronous GPU operations
- // enqueued on the stream before this program point.
+ // stream to complete. Effectively a join on the asynchronous device
+ // operations enqueued on the stream before this program point.
bool BlockHostUntilDone(Stream *stream);
// Synchronously allocates size bytes on the underlying platform and returns
@@ -485,20 +486,21 @@ class StreamExecutor {
bool GetSymbol(const string& symbol_name, void **mem, size_t *bytes);
// Entrains a memcpy operation onto stream, with a host destination location
- // host_dst and a GPU memory source, with target size size.
- bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src,
- uint64 size);
+ // host_dst and a device memory source, with target size size.
+ bool Memcpy(Stream *stream, void *host_dst,
+ const DeviceMemoryBase &device_src, uint64 size);
- // Entrains a memcpy operation onto stream, with a GPU destination location
+ // Entrains a memcpy operation onto stream, with a device destination location
// and a host memory source, with target size size.
- bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src,
- uint64 size);
+ bool Memcpy(Stream *stream, DeviceMemoryBase *device_dst,
+ const void *host_src, uint64 size);
- // Entrains a memcpy operation onto stream, with a GPU destination location
- // and a GPU source location, with target size size. Peer access should have
- // been enabled between the StreamExecutors owning the GPU memory regions.
- bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst,
- const DeviceMemoryBase &gpu_src, uint64 size);
+ // Entrains a memcpy operation onto stream, with a device destination location
+ // and a device source location, with target size size. Peer access should
+ // have been enabled between the StreamExecutors owning the device memory
+ // regions.
+ bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *device_dst,
+ const DeviceMemoryBase &device_src, uint64 size);
// Entrains on a stream a user-specified function to be run on the host.
// See Stream::ThenDoHostCallback for full details.
@@ -585,8 +587,9 @@ class StreamExecutor {
// fashion.
std::unique_ptr<internal::StreamExecutorInterface> implementation_;
- // A mapping of pointer (to GPU memory) to string representation of the stack
- // (of the allocating thread) at the time at which the pointer was allocated.
+ // A mapping of pointer (to device memory) to string representation of the
+ // stack (of the allocating thread) at the time at which the pointer was
+ // allocated.
std::map<void *, AllocRecord> mem_allocs_ GUARDED_BY(mu_);
// Memoized BLAS support object -- we only want to create this once when asked