aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar A. Unique TensorFlower <gardener@tensorflow.org>2016-07-29 12:24:38 -0800
committerGravatar TensorFlower Gardener <gardener@tensorflow.org>2016-07-29 13:33:23 -0700
commit9dbcce0a88fefd3c48c33af8ff11056ccb54a3d0 (patch)
tree033819be519d3d822bb01b92d458522cde08b2a4
parent11599c024420e28061585ddbeca9fecd5329ece8 (diff)
Register LinSpace implementations on GPU too.
Change: 128840045
-rw-r--r--tensorflow/core/kernels/sequence_ops.cc11
-rw-r--r--tensorflow/python/kernel_tests/init_ops_test.py67
2 files changed, 49 insertions, 29 deletions
diff --git a/tensorflow/core/kernels/sequence_ops.cc b/tensorflow/core/kernels/sequence_ops.cc
index 0acde9c498..3cbd9691d1 100644
--- a/tensorflow/core/kernels/sequence_ops.cc
+++ b/tensorflow/core/kernels/sequence_ops.cc
@@ -118,16 +118,23 @@ class LinSpaceOp : public OpKernel {
}
};
-#define REGISTER_CPU_KERNEL(T) \
+#define REGISTER_KERNEL(DEV, T) \
REGISTER_KERNEL_BUILDER(Name("LinSpace") \
- .Device(DEVICE_CPU) \
+ .Device(DEV) \
.TypeConstraint<T>("T") \
.HostMemory("start") \
.HostMemory("stop") \
.HostMemory("num") \
.HostMemory("output"), \
LinSpaceOp<T>);
+#define REGISTER_CPU_KERNEL(T) REGISTER_KERNEL(DEVICE_CPU, T)
TF_CALL_float(REGISTER_CPU_KERNEL);
TF_CALL_double(REGISTER_CPU_KERNEL);
+// NOTE(touts): We register the op on GPU but it still runs on CPU
+// because its inputs and outputs are tagged as HostMemory.
+#define REGISTER_GPU_KERNEL(T) REGISTER_KERNEL(DEVICE_GPU, T)
+TF_CALL_float(REGISTER_GPU_KERNEL);
+TF_CALL_double(REGISTER_GPU_KERNEL);
+
} // namespace tensorflow
diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py
index d23e4777e0..7d5323e5cb 100644
--- a/tensorflow/python/kernel_tests/init_ops_test.py
+++ b/tensorflow/python/kernel_tests/init_ops_test.py
@@ -276,42 +276,55 @@ class RangeTest(tf.test.TestCase):
# TODO(vrv): move to sequence_ops_test?
class LinSpaceTest(tf.test.TestCase):
+ def _gpu_modes(self):
+ if tf.test.is_gpu_available():
+ return [False, True]
+ else:
+ return [False]
+
def _LinSpace(self, start, stop, num):
- with self.test_session():
- tf_ans = tf.linspace(start, stop, num, name="linspace")
- self.assertEqual([num], tf_ans.get_shape())
- return tf_ans.eval()
+ # NOTE(touts): Needs to pass a graph to get a new session each time.
+ with tf.Graph().as_default() as graph:
+ with self.test_session(graph=graph, force_gpu=self.force_gpu):
+ tf_ans = tf.linspace(start, stop, num, name="linspace")
+ self.assertEqual([num], tf_ans.get_shape())
+ return tf_ans.eval()
def testPositive(self):
- self.assertArrayNear(self._LinSpace(1., 5., 1), np.array([1.]), 1e-5)
- self.assertArrayNear(self._LinSpace(1., 5., 2), np.array([1., 5.]), 1e-5)
- self.assertArrayNear(self._LinSpace(1., 5., 3),
- np.array([1., 3., 5.]), 1e-5)
- self.assertArrayNear(self._LinSpace(1., 5., 4),
- np.array([1., 7. / 3., 11. / 3., 5.]), 1e-5)
+ for self.force_gpu in self._gpu_modes():
+ self.assertArrayNear(self._LinSpace(1., 5., 1), np.array([1.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(1., 5., 2), np.array([1., 5.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(1., 5., 3),
+ np.array([1., 3., 5.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(1., 5., 4),
+ np.array([1., 7. / 3., 11. / 3., 5.]), 1e-5)
def testNegative(self):
- self.assertArrayNear(self._LinSpace(-1., -5., 1), np.array([-1.]), 1e-5)
- self.assertArrayNear(self._LinSpace(-1., -5., 2),
- np.array([-1., -5.]), 1e-5)
- self.assertArrayNear(self._LinSpace(-1., -5., 3),
- np.array([-1., -3., -5.]), 1e-5)
- self.assertArrayNear(self._LinSpace(-1., -5., 4),
- np.array([-1., -7. / 3., -11. / 3., -5.]), 1e-5)
+ for self.force_gpu in self._gpu_modes():
+ self.assertArrayNear(self._LinSpace(-1., -5., 1), np.array([-1.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(-1., -5., 2),
+ np.array([-1., -5.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(-1., -5., 3),
+ np.array([-1., -3., -5.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(-1., -5., 4),
+ np.array([-1., -7. / 3., -11. / 3., -5.]), 1e-5)
def testNegativeToPositive(self):
- self.assertArrayNear(self._LinSpace(-1., 5., 1), np.array([-1.]), 1e-5)
- self.assertArrayNear(self._LinSpace(-1., 5., 2), np.array([-1., 5.]), 1e-5)
- self.assertArrayNear(self._LinSpace(-1., 5., 3),
- np.array([-1., 2., 5.]), 1e-5)
- self.assertArrayNear(self._LinSpace(-1., 5., 4),
- np.array([-1., 1., 3., 5.]), 1e-5)
+ for self.force_gpu in self._gpu_modes():
+ self.assertArrayNear(self._LinSpace(-1., 5., 1), np.array([-1.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(-1., 5., 2), np.array([-1., 5.]),
+ 1e-5)
+ self.assertArrayNear(self._LinSpace(-1., 5., 3),
+ np.array([-1., 2., 5.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(-1., 5., 4),
+ np.array([-1., 1., 3., 5.]), 1e-5)
def testPoint(self):
- self.assertArrayNear(self._LinSpace(5., 5., 1), np.array([5.]), 1e-5)
- self.assertArrayNear(self._LinSpace(5., 5., 2), np.array([5.] * 2), 1e-5)
- self.assertArrayNear(self._LinSpace(5., 5., 3), np.array([5.] * 3), 1e-5)
- self.assertArrayNear(self._LinSpace(5., 5., 4), np.array([5.] * 4), 1e-5)
+ for self.force_gpu in self._gpu_modes():
+ self.assertArrayNear(self._LinSpace(5., 5., 1), np.array([5.]), 1e-5)
+ self.assertArrayNear(self._LinSpace(5., 5., 2), np.array([5.] * 2), 1e-5)
+ self.assertArrayNear(self._LinSpace(5., 5., 3), np.array([5.] * 3), 1e-5)
+ self.assertArrayNear(self._LinSpace(5., 5., 4), np.array([5.] * 4), 1e-5)
class DeviceTest(tf.test.TestCase):