1 files changed, 0 insertions, 28 deletions
diff --git a/tensorflow/stream_executor/stream_executor.h b/tensorflow/stream_executor/stream_executor.h
index dd4664849d..2995dccf46 100644
--- a/tensorflow/stream_executor/stream_executor.h
+++ b/tensorflow/stream_executor/stream_executor.h
@@ -18,34 +18,6 @@ limitations under the License.
 // * Loading/launching data-parallel-kernels
 // * Invoking pre-canned high-performance library routines (like matrix
 //   multiply)
-//
-// The appropriately-typed kernel and "loader spec" are automatically generated
-// for the user within a namespace by the gcudacc compiler output, so typical
-// use looks like so:
-//
-//    namespace gpu = ::perftools::gputools;
-//    namespace gcudacc = ::platforms::gpus::gcudacc;
-//
-//    gpu::StreamExecutor stream_exec{PlatformKind::kCuda};
-//    gcudacc::kernel::MyKernel my_kernel{&stream_exec};
-//    bool ok = stream_exec.GetKernel(gcudacc::spec::MyKernelSpec(),
-//    &my_kernel);
-//    if (!ok) { ... }
-//    gpu::DeviceMemory<int> result = stream_exec.AllocateZeroed<int>();
-//    if (result == nullptr) { ... }
-//    int host_result;
-//    gpu::Stream my_stream{&stream_exec};
-//    my_stream
-//      .Init()
-//      .ThenLaunch(ThreadDim{1024}, BlockDim{1}, my_kernel, result)
-//      .ThenMemcpy(&host_result, result, sizeof(host_result))
-//      .BlockHostUntilDone()
-//    if (!my_stream.ok()) { ... }
-//    printf("%d\n", host_result);
-//
-// Since the device may operate asynchronously to the host, the
-// Stream::BlockHostUntilDone() call forces the calling host thread to wait for
-// the chain of commands specified for the Stream to complete execution.
 
 #ifndef TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_
 #define TENSORFLOW_STREAM_EXECUTOR_STREAM_EXECUTOR_H_