aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
-rw-r--r--tensorflow/stream_executor/stream_executor.h28
1 files changed, 0 insertions, 28 deletions
diff --git a/tensorflow/stream_executor/stream_executor.h b/tensorflow/stream_executor/stream_executor.h
index dd4664849d..2995dccf46 100644
--- a/tensorflow/stream_executor/stream_executor.h
+++ b/tensorflow/stream_executor/stream_executor.h
@@ -18,34 +18,6 @@ limitations under the License.
// * Loading/launching data-parallel-kernels
// * Invoking pre-canned high-performance library routines (like matrix
// multiply)
-//
-// The appropriately-typed kernel and "loader spec" are automatically generated
-// for the user within a namespace by the gcudacc compiler output, so typical
-// use looks like so:
-//
-// namespace gpu = ::perftools::gputools;
-// namespace gcudacc = ::platforms::gpus::gcudacc;
-//
-// gpu::StreamExecutor stream_exec{PlatformKind::kCuda};
-// gcudacc::kernel::MyKernel my_kernel{&stream_exec};
-// bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(),
-// &my_kernel);
-// if (!ok) { ... }
-// gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>();
-// if (result == nullptr) { ... }
-// int host_result;
-// gpu::Stream my_stream{&stream_exec};
-// my_stream
-// .Init()
-// .ThenLaunch(ThreadDim{1024}, BlockDim{1}, my_kernel, result)
-// .ThenMemcpy(&host_result, result, sizeof(host_result))
-// .BlockHostUntilDone()
-// if (!my_stream.ok()) { ... }
-// printf("%d\n", host_result);
-//
-// Since the device may operate asynchronously to the host, the
-// Stream::BlockHostUntilDone() call forces the calling host thread to wait for
-// the chain of commands specified for the Stream to complete execution.
#ifndef TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_
#define TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_