diff options
author | Chris Leary <leary@google.com> | 2017-08-22 23:37:36 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-22 23:41:55 -0700 |
commit | 33e34ef79f66743e44ced81ed9cf182e00ad5d13 (patch) | |
tree | 2450fdfb1282bebfe4a4995757fbe4b71a2ba764 /tensorflow/stream_executor/stream_executor_pimpl.h | |
parent | 1e849fed6032015eb25149b801de0f7be2d87026 (diff) |
[SE] Change comments/identifiers/logs from when StreamExecutor was GPUExecutor.
PiperOrigin-RevId: 166163619
Diffstat (limited to 'tensorflow/stream_executor/stream_executor_pimpl.h')
-rw-r--r-- | tensorflow/stream_executor/stream_executor_pimpl.h | 93 |
1 files changed, 48 insertions, 45 deletions
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h index 9814f1b960..d910eb8823 100644 --- a/tensorflow/stream_executor/stream_executor_pimpl.h +++ b/tensorflow/stream_executor/stream_executor_pimpl.h @@ -104,8 +104,8 @@ class StreamExecutor { // platform, false is returned. bool GetKernel(const MultiKernelLoaderSpec &spec, KernelBase *kernel); - // Synchronously allocates an array on the GPU device of type T with - // element_count elements. + // Synchronously allocates an array on the device of type T with element_count + // elements. template <typename T> DeviceMemory<T> AllocateArray(uint64 element_count); @@ -115,8 +115,8 @@ class StreamExecutor { return ScopedDeviceMemory<T>(this, AllocateArray<T>(element_count)); } - // Convenience wrapper that allocates space for a single element of type T - // in GPU memory. + // Convenience wrapper that allocates space for a single element of type T in + // device memory. template <typename T> DeviceMemory<T> AllocateScalar() { return AllocateArray<T>(1); @@ -128,8 +128,8 @@ class StreamExecutor { return AllocateOwnedArray<T>(1); } - // Synchronously allocates a scalar of type T on the GPU device that is - // (POD) zero-byte initialized. + // Synchronously allocates a scalar of type T on the device that is (POD) + // zero-byte initialized. template <typename T> DeviceMemory<T> AllocateZeroed(); @@ -177,11 +177,12 @@ class StreamExecutor { // null-out effect should not be relied upon in client code. void Deallocate(DeviceMemoryBase *mem); - // Retrieves a mapping of active opaque GPU memory pointer to a string + // Retrieves a mapping of active opaque device memory pointer to a string // representation of the [allocating thread's] stack at the time the pointer - // was allocated. Useful for tracking GPU memory leaks. + // was allocated. Useful for tracking device memory leaks. // - // Note: this will only be populated if --check_gpu_leaks flag is activated. + // Note: this will only be populated if --check_device_leaks flag is + // activated. void GetMemAllocs(std::map<void *, AllocRecord> *records_out); // Allocates a region of host memory and registers it with the platform API. @@ -210,68 +211,68 @@ class StreamExecutor { bool SynchronizeAllActivity() SE_MUST_USE_RESULT; // Blocks the caller while "size" bytes are zeroed out (in POD fashion) at the - // given location in GPU memory. + // given location in device memory. bool SynchronousMemZero(DeviceMemoryBase *location, uint64 size) SE_MUST_USE_RESULT; // Blocks the caller while "size" bytes are initialized to "value" (in POD - // fashion) at the given location in GPU memory. + // fashion) at the given location in device memory. bool SynchronousMemSet(DeviceMemoryBase *location, int value, uint64 size) SE_MUST_USE_RESULT; // [deprecated] Blocks the caller while a data segment of the given size is - // copied from the host source to the GPU destination. + // copied from the host source to the device destination. // // Deprecation: prefer explicit H2D below, to avoid error-prone API usage. - bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src, + bool SynchronousMemcpy(DeviceMemoryBase *device_dst, const void *host_src, uint64 size) SE_MUST_USE_RESULT; // [deprecated] Blocks the caller while a data segment of the given size is - // copied from the GPU source to the host destination. + // copied from the device source to the host destination. // // Deprecation: prefer explicit D2H below, to avoid error-prone API usage. - bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, + bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &device_src, uint64 size) SE_MUST_USE_RESULT; // Same as SynchronousMemcpy(DeviceMemoryBase*, ...) above. port::Status SynchronousMemcpyH2D(const void *host_src, int64 size, - DeviceMemoryBase *gpu_dst); + DeviceMemoryBase *device_dst); // Alternative interface for memcpying from host to device that takes an // array slice. Checks that the destination size can accommodate the host // slice size. template <class T> port::Status SynchronousMemcpyH2D(port::ArraySlice<T> host_src, - DeviceMemoryBase *gpu_dst) { + DeviceMemoryBase *device_dst) { auto host_size = host_src.size() * sizeof(T); - CHECK(gpu_dst->size() == 0 || gpu_dst->size() >= host_size); - return SynchronousMemcpyH2D(host_src.begin(), host_size, gpu_dst); + CHECK(device_dst->size() == 0 || device_dst->size() >= host_size); + return SynchronousMemcpyH2D(host_src.begin(), host_size, device_dst); } // Same as SynchronousMemcpy(void*, ...) above. - port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &gpu_src, int64 size, - void *host_dst); + port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &device_src, + int64 size, void *host_dst); // Alternative interface for memcpying from device to host that takes an // array slice. Checks that the destination size can accommodate the host // slice size. template <typename T> - port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &gpu_src, + port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &device_src, port::MutableArraySlice<T> host_dst) { auto host_size = host_dst.size() * sizeof(T); - CHECK(gpu_src.size() == 0 || host_size >= gpu_src.size()); - return SynchronousMemcpyD2H(gpu_src, host_size, host_dst.begin()); + CHECK(device_src.size() == 0 || host_size >= device_src.size()); + return SynchronousMemcpyD2H(device_src, host_size, host_dst.begin()); } // Blocks the caller while a data segment of the given size is copied from the - // GPU source to the GPU destination. - bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, - const DeviceMemoryBase &gpu_src, + // device source to the device destination. + bool SynchronousMemcpy(DeviceMemoryBase *device_dst, + const DeviceMemoryBase &device_src, uint64 size) SE_MUST_USE_RESULT; - // Enqueues an operation onto stream to zero out size bytes at the given GPU - // memory location. Neither stream nor location may be null. Returns whether - // the operation was successfully enqueued onto the stream. + // Enqueues an operation onto stream to zero out size bytes at the given + // device memory location. Neither stream nor location may be null. Returns + // whether the operation was successfully enqueued onto the stream. bool MemZero(Stream *stream, DeviceMemoryBase *location, uint64 size) SE_MUST_USE_RESULT; @@ -471,8 +472,8 @@ class StreamExecutor { rng::RngSupport *AsRng(); // Causes the host code to synchronously wait for operations entrained onto - // stream to complete. Effectively a join on the asynchronous GPU operations - // enqueued on the stream before this program point. + // stream to complete. Effectively a join on the asynchronous device + // operations enqueued on the stream before this program point. bool BlockHostUntilDone(Stream *stream); // Synchronously allocates size bytes on the underlying platform and returns @@ -485,20 +486,21 @@ class StreamExecutor { bool GetSymbol(const string& symbol_name, void **mem, size_t *bytes); // Entrains a memcpy operation onto stream, with a host destination location - // host_dst and a GPU memory source, with target size size. - bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src, - uint64 size); + // host_dst and a device memory source, with target size size. + bool Memcpy(Stream *stream, void *host_dst, + const DeviceMemoryBase &device_src, uint64 size); - // Entrains a memcpy operation onto stream, with a GPU destination location + // Entrains a memcpy operation onto stream, with a device destination location // and a host memory source, with target size size. - bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src, - uint64 size); + bool Memcpy(Stream *stream, DeviceMemoryBase *device_dst, + const void *host_src, uint64 size); - // Entrains a memcpy operation onto stream, with a GPU destination location - // and a GPU source location, with target size size. Peer access should have - // been enabled between the StreamExecutors owning the GPU memory regions. - bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst, - const DeviceMemoryBase &gpu_src, uint64 size); + // Entrains a memcpy operation onto stream, with a device destination location + // and a device source location, with target size size. Peer access should + // have been enabled between the StreamExecutors owning the device memory + // regions. + bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *device_dst, + const DeviceMemoryBase &device_src, uint64 size); // Entrains on a stream a user-specified function to be run on the host. // See Stream::ThenDoHostCallback for full details. @@ -585,8 +587,9 @@ class StreamExecutor { // fashion. std::unique_ptr<internal::StreamExecutorInterface> implementation_; - // A mapping of pointer (to GPU memory) to string representation of the stack - // (of the allocating thread) at the time at which the pointer was allocated. + // A mapping of pointer (to device memory) to string representation of the + // stack (of the allocating thread) at the time at which the pointer was + // allocated. std::map<void *, AllocRecord> mem_allocs_ GUARDED_BY(mu_); // Memoized BLAS support object -- we only want to create this once when asked |