diff options
author | 2016-12-05 06:27:11 -0800 | |
---|---|---|
committer | 2016-12-05 06:44:09 -0800 | |
commit | e4a4e922ed4be890f4273a7a276768dfefe80a4c (patch) | |
tree | b724c791eb601946961899a621ea4335babfd1e0 /tensorflow/stream_executor/host/host_gpu_executor.h | |
parent | b00e490c3834d72aecce445f72b3a717b068d9db (diff) |
Add a Host platform to the stream executor library that runs code on the CPU.
Avoid using a linker-initialized mutex in multi_platform_manager to work around a initialization/destruction ordering problem on Mac.
Include the CUDA platform ID module even in non-CUDA builds.
Change: 141045549
Diffstat (limited to 'tensorflow/stream_executor/host/host_gpu_executor.h')
-rw-r--r-- | tensorflow/stream_executor/host/host_gpu_executor.h | 215 |
1 files changed, 215 insertions, 0 deletions
diff --git a/tensorflow/stream_executor/host/host_gpu_executor.h b/tensorflow/stream_executor/host/host_gpu_executor.h new file mode 100644 index 0000000000..f217f7947f --- /dev/null +++ b/tensorflow/stream_executor/host/host_gpu_executor.h @@ -0,0 +1,215 @@ +/* Copyright 2016 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +// Declares the HostExecutor class, which is a CPU-only implementation of +// the StreamExecutor interface. For now, this is used for testing and to +// examine the performance of host-based StreamExecutor code. +#ifndef TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ +#define TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ + +#include "tensorflow/stream_executor/blas.h" +#include "tensorflow/stream_executor/host/host_stream.h" +#include "tensorflow/stream_executor/host/host_timer.h" +#include "tensorflow/stream_executor/lib/error.h" +#include "tensorflow/stream_executor/lib/status.h" +#include "tensorflow/stream_executor/rng.h" +#include "tensorflow/stream_executor/stream_executor.h" +#include "tensorflow/stream_executor/stream_executor_internal.h" + +namespace perftools { +namespace gputools { +namespace host { + +// An implementation of StreamExecutor that does no communication or interaction +// with a device, but DOES perform memory operations backed by the host. +// Plugin routines (RNG, BLAS) are also supported and functional. +// Kernel invocations will fail, but host callbacks may be enqueued on this +// executor and its associated stream, and should follow standard ordering +// semantics. +// +// This is useful for evaluating the performance of host-based or fallback +// routines executed under the context of a GPU executor. +// See stream_executor.h for description of the below operations. +class HostExecutor : public internal::StreamExecutorInterface { + public: + explicit HostExecutor(const PluginConfig &plugin_config); + ~HostExecutor() override; + + port::Status Init(int device_ordinal, DeviceOptions device_options) override { + return port::Status::OK(); + } + + bool GetKernel(const MultiKernelLoaderSpec &spec, + KernelBase *kernel) override { + return false; + } + bool Launch(Stream *stream, const ThreadDim &thread_dims, + const BlockDim &block_dims, const KernelBase &kernel, + const KernelArgsArrayBase &args) override { + return false; + } + + void *Allocate(uint64 size) override; + void *AllocateSubBuffer(DeviceMemoryBase *mem, uint64 offset_bytes, + uint64 size_bytes) override; + void Deallocate(DeviceMemoryBase *mem) override; + + void *HostMemoryAllocate(uint64 size) override { return new char[size]; } + void HostMemoryDeallocate(void *mem) override { + delete[] static_cast<char *>(mem); + } + bool HostMemoryRegister(void *mem, uint64 size) override { return true; } + bool HostMemoryUnregister(void *mem) override { return true; } + + bool Memcpy(Stream *stream, void *host_dst, const DeviceMemoryBase &gpu_src, + uint64 size) override; + bool Memcpy(Stream *stream, DeviceMemoryBase *gpu_dst, const void *host_src, + uint64 size) override; + bool MemcpyDeviceToDevice(Stream *stream, DeviceMemoryBase *gpu_dst, + const DeviceMemoryBase &host_src, + uint64 size) override; + + bool MemZero(Stream *stream, DeviceMemoryBase *location, + uint64 size) override; + bool Memset(Stream *stream, DeviceMemoryBase *location, uint8 pattern, + uint64 size) override; + bool Memset32(Stream *stream, DeviceMemoryBase *location, uint32 pattern, + uint64 size) override; + + // No "synchronize all activity" implemented for this platform at the moment. + bool SynchronizeAllActivity() override { return false; } + bool SynchronousMemZero(DeviceMemoryBase *location, uint64 size) override; + + bool SynchronousMemSet(DeviceMemoryBase *location, int value, + uint64 size) override; + + bool SynchronousMemcpy(DeviceMemoryBase *gpu_dst, const void *host_src, + uint64 size) override; + bool SynchronousMemcpy(void *host_dst, const DeviceMemoryBase &gpu_src, + uint64 size) override; + bool SynchronousMemcpyDeviceToDevice(DeviceMemoryBase *gpu_dst, + const DeviceMemoryBase &gpu_src, + uint64 size) override; + + bool HostCallback(Stream *stream, std::function<void()> callback) override; + + port::Status AllocateEvent(Event *event) override { + return port::Status{port::error::UNIMPLEMENTED, ""}; + } + + port::Status DeallocateEvent(Event *event) override { + return port::Status{port::error::UNIMPLEMENTED, ""}; + } + + port::Status RecordEvent(Stream *stream, Event *event) override { + return port::Status{port::error::UNIMPLEMENTED, ""}; + } + + port::Status WaitForEvent(Stream *stream, Event *event) override { + return port::Status{port::error::UNIMPLEMENTED, ""}; + } + + Event::Status PollForEventStatus(Event *event) override { + return Event::Status::kError; + } + + bool AllocateStream(Stream *stream) override; + void DeallocateStream(Stream *stream) override; + bool CreateStreamDependency(Stream *dependent, Stream *other) override; + + // No special initialization is necessary for host timers. + bool AllocateTimer(Timer *timer) override { return true; } + + void DeallocateTimer(Timer *timer) override {} + + bool StartTimer(Stream *stream, Timer *timer) override; + + bool StopTimer(Stream *stream, Timer *timer) override; + + bool BlockHostUntilDone(Stream *stream) override; + + int PlatformDeviceCount() override { return 1; } + + bool DeviceMemoryUsage(int64 *free, int64 *total) const override { + return false; + } + + DeviceDescription *PopulateDeviceDescription() const override; + + port::Status EnablePeerAccessTo(StreamExecutorInterface *other) override { + return port::Status::OK(); + } + + bool CanEnablePeerAccessTo(StreamExecutorInterface *other) override { + return true; + } + + SharedMemoryConfig GetDeviceSharedMemoryConfig() override { + LOG(INFO) << "Shared memory configuration is unsupported for host " + << "executors."; + return SharedMemoryConfig::kDefault; + } + + port::Status SetDeviceSharedMemoryConfig(SharedMemoryConfig config) override { + string error_msg{ + "Shared memory configuration is unsupported for host " + "executors."}; + LOG(INFO) << error_msg; + return port::Status{port::error::UNIMPLEMENTED, error_msg}; + } + + bool SupportsBlas() const override; + blas::BlasSupport *CreateBlas() override; + + bool SupportsDnn() const override { return false; } + dnn::DnnSupport *CreateDnn() override { return nullptr; } + + bool SupportsFft() const override; + fft::FftSupport *CreateFft() override; + + bool SupportsRng() const override; + rng::RngSupport *CreateRng() override; + + std::unique_ptr<internal::EventInterface> CreateEventImplementation() + override { + LOG(WARNING) << "Events not currently supported by HostExecutor."; + return nullptr; + } + + std::unique_ptr<internal::KernelInterface> CreateKernelImplementation() + override { + return nullptr; + } + + std::unique_ptr<internal::StreamInterface> GetStreamImplementation() + override { + return std::unique_ptr<internal::StreamInterface>(new HostStream()); + } + + std::unique_ptr<internal::TimerInterface> GetTimerImplementation() override { + return std::unique_ptr<internal::TimerInterface>(new HostTimer()); + } + + void *CudaContextHack() override { return nullptr; } + + private: + const PluginConfig plugin_config_; +}; + +} // namespace host +} // namespace gputools +} // namespace perftools + +#endif // TENSORFLOW_STREAM_EXECUTOR_HOST_HOST_GPU_EXECUTOR_H_ |