aboutsummaryrefslogtreecommitdiffhomepage
path: root/tensorflow/stream_executor/kernel.cc
diff options
context:
space:
mode:
authorGravatar Vijay Vasudevan <vrv@google.com>2015-12-08 09:58:59 -0800
committerGravatar Vijay Vasudevan <vrv@google.com>2015-12-08 09:58:59 -0800
commitddd4aaf5286de24ba70402ee0ec8b836d3aed8c7 (patch)
tree4efdf6cf4d69b45041fd2a02cd2b7327ea9f1f58 /tensorflow/stream_executor/kernel.cc
parentcd53f3c3302c9312c1840389a9988a879b8b9dd5 (diff)
TensorFlow: upstream changes to git.
Change 109695551 Update FAQ Change 109694725 Add a gradient for resize_bilinear op. Change 109694505 Don't mention variables module in docs variables.Variable should be tf.Variable. Change 109658848 Adding an option to create a new thread-pool for each session. Change 109640570 Take the snapshot of stream-executor. + Expose an interface for scratch space allocation in the interface. Change 109638559 Let image_summary accept uint8 input This allows users to do their own normalization / scaling if the default (very weird) behavior of image_summary is undesired. This required a slight tweak to fake_input.cc to make polymorphically typed fake inputs infer if their type attr is not set but has a default. Unfortunately, adding a second valid type to image_summary *disables* automatic implicit conversion from np.float64 to tf.float32, so this change is slightly backwards incompatible. Change 109636969 Add serialization operations for SparseTensor. Change 109636644 Update generated Op docs. Change 109634899 TensorFlow: add a markdown file for producing release notes for our releases. Seed with 0.5.0 with a boring but accurate description. Change 109634502 Let histogram_summary take any realnumbertype It used to take only floats, not it understands ints. Change 109634434 TensorFlow: update locations where we mention python 3 support, update them to current truth. Change 109632108 Move HSV <> RGB conversions, grayscale conversions, and adjust_* ops back to tensorflow - make GPU-capable version of RGBToHSV and HSVToRGB, allows only float input/output - change docs to reflect new size constraints - change HSV format to be [0,1] for all components - add automatic dtype conversion for all adjust_* and grayscale conversion ops - fix up docs Change 109631077 Improve optimizer exceptions 1. grads_and_vars is now a tuple, so must be wrapped when passed to format. 2. Use '%r' instead of '%s' for dtype formatting Base CL: 109697989
Diffstat (limited to 'tensorflow/stream_executor/kernel.cc')
-rw-r--r--tensorflow/stream_executor/kernel.cc23
1 files changed, 3 insertions, 20 deletions
diff --git a/tensorflow/stream_executor/kernel.cc b/tensorflow/stream_executor/kernel.cc
index ee0b706eef..64a4e6f49e 100644
--- a/tensorflow/stream_executor/kernel.cc
+++ b/tensorflow/stream_executor/kernel.cc
@@ -25,7 +25,6 @@ limitations under the License.
#include "tensorflow/stream_executor/platform.h"
#include "tensorflow/stream_executor/platform/logging.h"
#include "tensorflow/stream_executor/stream_executor.h"
-#include "tensorflow/stream_executor/stream_executor_internal.h"
namespace perftools {
namespace gputools {
@@ -58,29 +57,13 @@ void KernelMetadata::set_shared_memory_bytes(int shared_memory_bytes) {
has_shared_memory_bytes_ = true;
}
-static internal::KernelInterface *KernelImplementationFromPlatformKind(
- PlatformKind platform_kind) {
- if (platform_kind == PlatformKind::kCuda) {
- return (*internal::MakeCUDAKernelImplementation())();
- } else if (platform_kind == PlatformKind::kOpenCL ||
- platform_kind == PlatformKind::kOpenCLAltera) {
- return (*internal::MakeOpenCLKernelImplementation())();
- } else {
- LOG(FATAL) << "cannot create kernel implementation for platform kind: "
- << PlatformKindString(platform_kind);
- }
-}
-
KernelBase::KernelBase(StreamExecutor *parent)
- : implementation_(
- KernelImplementationFromPlatformKind(parent->platform_kind())),
- parent_(parent) {
- DCHECK(parent_ != nullptr);
-}
+ : parent_(parent),
+ implementation_(parent->implementation()->CreateKernelImplementation()) {}
KernelBase::KernelBase(StreamExecutor *parent,
internal::KernelInterface *implementation)
- : implementation_(implementation), parent_(parent) {}
+ : parent_(parent), implementation_(implementation) {}
KernelBase::~KernelBase() {}