diff options
-rw-r--r-- | tensorflow/stream_executor/stream_executor.h | 28 |
1 files changed, 0 insertions, 28 deletions
diff --git a/tensorflow/stream_executor/stream_executor.h b/tensorflow/stream_executor/stream_executor.h index dd4664849d..2995dccf46 100644 --- a/tensorflow/stream_executor/stream_executor.h +++ b/tensorflow/stream_executor/stream_executor.h @@ -18,34 +18,6 @@ limitations under the License. // * Loading/launching data-parallel-kernels // * Invoking pre-canned high-performance library routines (like matrix // multiply) -// -// The appropriately-typed kernel and "loader spec" are automatically generated -// for the user within a namespace by the gcudacc compiler output, so typical -// use looks like so: -// -// namespace gpu = ::perftools::gputools; -// namespace gcudacc = ::platforms::gpus::gcudacc; -// -// gpu::StreamExecutor stream_exec{PlatformKind::kCuda}; -// gcudacc::kernel::MyKernel my_kernel{&stream_exec}; -// bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(), -// &my_kernel); -// if (!ok) { ... } -// gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>(); -// if (result == nullptr) { ... } -// int host_result; -// gpu::Stream my_stream{&stream_exec}; -// my_stream -// .Init() -// .ThenLaunch(ThreadDim{1024}, BlockDim{1}, my_kernel, result) -// .ThenMemcpy(&host_result, result, sizeof(host_result)) -// .BlockHostUntilDone() -// if (!my_stream.ok()) { ... } -// printf("%d\n", host_result); -// -// Since the device may operate asynchronously to the host, the -// Stream::BlockHostUntilDone() call forces the calling host thread to wait for -// the chain of commands specified for the Stream to complete execution. #ifndef TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_ #define TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_ |