[SE] Change comments/identifiers/logs from when StreamExecutor was GPUExecutor.

PiperOrigin-RevId: 166163619
author: Chris Leary <leary@google.com> 2017-08-22 23:37:36 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-08-22 23:41:55 -0700
commit: 33e34ef79f66743e44ced81ed9cf182e00ad5d13 (patch)
tree: 2450fdfb1282bebfe4a4995757fbe4b71a2ba764 /tensorflow/stream_executor/stream_executor_pimpl.h
parent: 1e849fed6032015eb25149b801de0f7be2d87026 (diff)
1 files changed, 48 insertions, 45 deletions
diff --git a/tensorflow/stream_executor/stream_executor_pimpl.h b/tensorflow/stream_executor/stream_executor_pimpl.h
index 9814f1b960..d910eb8823 100644
--- a/tensorflow/stream_executor/stream_executor_pimpl.h
+++ b/tensorflow/stream_executor/stream_executor_pimpl.h
@@ -104,8 +104,8 @@ class StreamExecutor {
   // platform, false is returned.
   bool GetKernel(const MultiKernelLoaderSpec &spec, KernelBase *kernel);
 
-  // Synchronously allocates an array on the GPU device of type T with
-  // element_count elements.
+  // Synchronously allocates an array on the device of type T with element_count
+  // elements.
   template <typename T>
   DeviceMemory<T> AllocateArray(uint64 element_count);
 
@@ -115,8 +115,8 @@ class StreamExecutor {
     return ScopedDeviceMemory<T>(this, AllocateArray<T>(element_count));
   }
 
-  // Convenience wrapper that allocates space for a single element of type T
-  // in GPU memory.
+  // Convenience wrapper that allocates space for a single element of type T in
+  // device memory.
   template <typename T>
   DeviceMemory<T> AllocateScalar() {
     return AllocateArray<T>(1);
@@ -128,8 +128,8 @@ class StreamExecutor {
     return AllocateOwnedArray<T>(1);
   }
 
-  // Synchronously allocates a scalar of type T on the GPU device that is
-  // (POD) zero-byte initialized.
+  // Synchronously allocates a scalar of type T on the device that is (POD)
+  // zero-byte initialized.
   template <typename T>
   DeviceMemory<T> AllocateZeroed();
 
@@ -177,11 +177,12 @@ class StreamExecutor {
   // null-out effect should not be relied upon in client code.
   void Deallocate(DeviceMemoryBase *mem);
 
-  // Retrieves a mapping of active opaque GPU memory pointer to a string
+  // Retrieves a mapping of active opaque device memory pointer to a string
   // representation of the [allocating thread's] stack at the time the pointer
-  // was allocated. Useful for tracking GPU memory leaks.
+  // was allocated. Useful for tracking device memory leaks.
   //
-  // Note: this will only be populated if --check_gpu_leaks flag is activated.
+  // Note: this will only be populated if --check_device_leaks flag is
+  // activated.
   void GetMemAllocs(std::map<void *, AllocRecord> *records_out);
 
   // Allocates a region of host memory and registers it with the platform API.
@@ -210,68 +211,68 @@ class StreamExecutor {
   bool SynchronizeAllActivity() SE_MUST_USE_RESULT;
 
   // Blocks the caller while "size" bytes are zeroed out (in POD fashion) at the
-  // given location in GPU memory.
+  // given location in device memory.
   bool SynchronousMemZero(DeviceMemoryBase *location,
                           uint64 size) SE_MUST_USE_RESULT;
 
   // Blocks the caller while "size" bytes are initialized to "value" (in POD
-  // fashion) at the given location in GPU memory.
+  // fashion) at the given location in device memory.
   bool SynchronousMemSet(DeviceMemoryBase *location, int value,
                          uint64 size) SE_MUST_USE_RESULT;
 
   // [deprecated] Blocks the caller while a data segment of the given size is
-  // copied from the host source to the GPU destination.
+  // copied from the host source to the device destination.
   //
   // Deprecation: prefer explicit H2D below, to avoid error-prone API usage.
-  bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src,
+  bool SynchronousMemcpy(DeviceMemoryBase *device_dst, const void *host_src,
                          uint64 size) SE_MUST_USE_RESULT;
 
   // [deprecated] Blocks the caller while a data segment of the given size is
-  // copied from the GPU source to the host destination.
+  // copied from the device source to the host destination.
   //
   // Deprecation: prefer explicit D2H below, to avoid error-prone API usage.
-  bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src,
+  bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &device_src,
                          uint64 size) SE_MUST_USE_RESULT;
 
   // Same as SynchronousMemcpy(DeviceMemoryBase*, ...) above.
   port::Status SynchronousMemcpyH2D(const void *host_src, int64 size,
-                                    DeviceMemoryBase *gpu_dst);
+                                    DeviceMemoryBase *device_dst);
 
   // Alternative interface for memcpying from host to device that takes an
   // array slice. Checks that the destination size can accommodate the host
   // slice size.
   template <class T>
   port::Status SynchronousMemcpyH2D(port::ArraySlice<T> host_src,
-                                    DeviceMemoryBase *gpu_dst) {
+                                    DeviceMemoryBase *device_dst) {
     auto host_size = host_src.size() * sizeof(T);
-    CHECK(gpu_dst->size() == 0 || gpu_dst->size() >= host_size);
-    return SynchronousMemcpyH2D(host_src.begin(), host_size, gpu_dst);
+    CHECK(device_dst->size() == 0 || device_dst->size() >= host_size);
+    return SynchronousMemcpyH2D(host_src.begin(), host_size, device_dst);
   }
 
   // Same as SynchronousMemcpy(void*, ...) above.
-  port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &gpu_src, int64 size,
-                                    void *host_dst);
+  port::Status SynchronousMemcpyD2H(const DeviceMemoryBase &device_src,
+                                    int64 size, void *host_dst);
 
   // Alternative interface for memcpying from device to host that takes an
   // array slice. Checks that the destination size can accommodate the host
   // slice size.
   template <typename T>
-  port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &gpu_src,
+  port::Status SynchronousMemcpyD2H(const DeviceMemory<T> &device_src,
                                     port::MutableArraySlice<T> host_dst) {
     auto host_size = host_dst.size() * sizeof(T);
-    CHECK(gpu_src.size() == 0 || host_size >= gpu_src.size());
-    return SynchronousMemcpyD2H(gpu_src, host_size, host_dst.begin());
+    CHECK(device_src.size() == 0 || host_size >= device_src.size());
+    return SynchronousMemcpyD2H(device_src, host_size, host_dst.begin());
   }
 
   // Blocks the caller while a data segment of the given size is copied from the
-  // GPU source to the GPU destination.
-  bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst,
-                         const DeviceMemoryBase &gpu_src,
+  // device source to the device destination.
+  bool SynchronousMemcpy(DeviceMemoryBase *device_dst,
+                         const DeviceMemoryBase &device_src,
                          uint64 size) SE_MUST_USE_RESULT;
 
-  // Enqueues an operation onto stream to zero out size bytes at the given GPU
-  // memory location. Neither stream nor location may be null. Returns whether
-  // the operation was successfully enqueued onto the stream.
+  // Enqueues an operation onto stream to zero out size bytes at the given
+  // device memory location. Neither stream nor location may be null. Returns
+  // whether the operation was successfully enqueued onto the stream.
   bool MemZero(Stream *stream, DeviceMemoryBase *location,
                uint64 size) SE_MUST_USE_RESULT;
 
@@ -471,8 +472,8 @@ class StreamExecutor {
   rng::RngSupport *AsRng();
 
   // Causes the host code to synchronously wait for operations entrained onto
-  // stream to complete. Effectively a join on the asynchronous GPU operations
-  // enqueued on the stream before this program point.
+  // stream to complete. Effectively a join on the asynchronous device
+  // operations enqueued on the stream before this program point.
   bool BlockHostUntilDone(Stream *stream);
 
   // Synchronously allocates size bytes on the underlying platform and returns
@@ -485,20 +486,21 @@ class StreamExecutor {
   bool GetSymbol(const string& symbol_name, void **mem, size_t *bytes);
 
   // Entrains a memcpy operation onto stream, with a host destination location
-  // host_dst and a GPU memory source, with target size size.
-  bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src,
-              uint64 size);
+  // host_dst and a device memory source, with target size size.
+  bool Memcpy(Stream *stream, void *host_dst,
+              const DeviceMemoryBase &device_src, uint64 size);
 
-  // Entrains a memcpy operation onto stream, with a GPU destination location
+  // Entrains a memcpy operation onto stream, with a device destination location
   // and a host memory source, with target size size.
-  bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src,
-              uint64 size);
+  bool Memcpy(Stream *stream, DeviceMemoryBase *device_dst,
+              const void *host_src, uint64 size);
 
-  // Entrains a memcpy operation onto stream, with a GPU destination location
-  // and a GPU source location, with target size size. Peer access should have
-  // been enabled between the StreamExecutors owning the GPU memory regions.
-  bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst,
-                            const DeviceMemoryBase &gpu_src, uint64 size);
+  // Entrains a memcpy operation onto stream, with a device destination location
+  // and a device source location, with target size size. Peer access should
+  // have been enabled between the StreamExecutors owning the device memory
+  // regions.
+  bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *device_dst,
+                            const DeviceMemoryBase &device_src, uint64 size);
 
   // Entrains on a stream a user-specified function to be run on the host.
   // See Stream::ThenDoHostCallback for full details.
@@ -585,8 +587,9 @@ class StreamExecutor {
   // fashion.
   std::unique_ptr<internal::StreamExecutorInterface> implementation_;
 
-  // A mapping of pointer (to GPU memory) to string representation of the stack
-  // (of the allocating thread) at the time at which the pointer was allocated.
+  // A mapping of pointer (to device memory) to string representation of the
+  // stack (of the allocating thread) at the time at which the pointer was
+  // allocated.
   std::map<void *, AllocRecord> mem_allocs_ GUARDED_BY(mu_);
 
   // Memoized BLAS support object -- we only want to create this once when asked
author	Chris Leary <leary@google.com>	2017-08-22 23:37:36 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-08-22 23:41:55 -0700
commit	33e34ef79f66743e44ced81ed9cf182e00ad5d13 (patch)
tree	2450fdfb1282bebfe4a4995757fbe4b71a2ba764 /tensorflow/stream_executor/stream_executor_pimpl.h
parent	1e849fed6032015eb25149b801de0f7be2d87026 (diff)