diff options
author | 2016-07-29 12:24:38 -0800 | |
---|---|---|
committer | 2016-07-29 13:33:23 -0700 | |
commit | 9dbcce0a88fefd3c48c33af8ff11056ccb54a3d0 (patch) | |
tree | 033819be519d3d822bb01b92d458522cde08b2a4 | |
parent | 11599c024420e28061585ddbeca9fecd5329ece8 (diff) |
Register LinSpace implementations on GPU too.
Change: 128840045
-rw-r--r-- | tensorflow/core/kernels/sequence_ops.cc | 11 | ||||
-rw-r--r-- | tensorflow/python/kernel_tests/init_ops_test.py | 67 |
2 files changed, 49 insertions, 29 deletions
diff --git a/tensorflow/core/kernels/sequence_ops.cc b/tensorflow/core/kernels/sequence_ops.cc index 0acde9c498..3cbd9691d1 100644 --- a/tensorflow/core/kernels/sequence_ops.cc +++ b/tensorflow/core/kernels/sequence_ops.cc @@ -118,16 +118,23 @@ class LinSpaceOp : public OpKernel { } }; -#define REGISTER_CPU_KERNEL(T) \ +#define REGISTER_KERNEL(DEV, T) \ REGISTER_KERNEL_BUILDER(Name("LinSpace") \ - .Device(DEVICE_CPU) \ + .Device(DEV) \ .TypeConstraint<T>("T") \ .HostMemory("start") \ .HostMemory("stop") \ .HostMemory("num") \ .HostMemory("output"), \ LinSpaceOp<T>); +#define REGISTER_CPU_KERNEL(T) REGISTER_KERNEL(DEVICE_CPU, T) TF_CALL_float(REGISTER_CPU_KERNEL); TF_CALL_double(REGISTER_CPU_KERNEL); +// NOTE(touts): We register the op on GPU but it still runs on CPU +// because its inputs and outputs are tagged as HostMemory. +#define REGISTER_GPU_KERNEL(T) REGISTER_KERNEL(DEVICE_GPU, T) +TF_CALL_float(REGISTER_GPU_KERNEL); +TF_CALL_double(REGISTER_GPU_KERNEL); + } // namespace tensorflow diff --git a/tensorflow/python/kernel_tests/init_ops_test.py b/tensorflow/python/kernel_tests/init_ops_test.py index d23e4777e0..7d5323e5cb 100644 --- a/tensorflow/python/kernel_tests/init_ops_test.py +++ b/tensorflow/python/kernel_tests/init_ops_test.py @@ -276,42 +276,55 @@ class RangeTest(tf.test.TestCase): # TODO(vrv): move to sequence_ops_test? class LinSpaceTest(tf.test.TestCase): + def _gpu_modes(self): + if tf.test.is_gpu_available(): + return [False, True] + else: + return [False] + def _LinSpace(self, start, stop, num): - with self.test_session(): - tf_ans = tf.linspace(start, stop, num, name="linspace") - self.assertEqual([num], tf_ans.get_shape()) - return tf_ans.eval() + # NOTE(touts): Needs to pass a graph to get a new session each time. + with tf.Graph().as_default() as graph: + with self.test_session(graph=graph, force_gpu=self.force_gpu): + tf_ans = tf.linspace(start, stop, num, name="linspace") + self.assertEqual([num], tf_ans.get_shape()) + return tf_ans.eval() def testPositive(self): - self.assertArrayNear(self._LinSpace(1., 5., 1), np.array([1.]), 1e-5) - self.assertArrayNear(self._LinSpace(1., 5., 2), np.array([1., 5.]), 1e-5) - self.assertArrayNear(self._LinSpace(1., 5., 3), - np.array([1., 3., 5.]), 1e-5) - self.assertArrayNear(self._LinSpace(1., 5., 4), - np.array([1., 7. / 3., 11. / 3., 5.]), 1e-5) + for self.force_gpu in self._gpu_modes(): + self.assertArrayNear(self._LinSpace(1., 5., 1), np.array([1.]), 1e-5) + self.assertArrayNear(self._LinSpace(1., 5., 2), np.array([1., 5.]), 1e-5) + self.assertArrayNear(self._LinSpace(1., 5., 3), + np.array([1., 3., 5.]), 1e-5) + self.assertArrayNear(self._LinSpace(1., 5., 4), + np.array([1., 7. / 3., 11. / 3., 5.]), 1e-5) def testNegative(self): - self.assertArrayNear(self._LinSpace(-1., -5., 1), np.array([-1.]), 1e-5) - self.assertArrayNear(self._LinSpace(-1., -5., 2), - np.array([-1., -5.]), 1e-5) - self.assertArrayNear(self._LinSpace(-1., -5., 3), - np.array([-1., -3., -5.]), 1e-5) - self.assertArrayNear(self._LinSpace(-1., -5., 4), - np.array([-1., -7. / 3., -11. / 3., -5.]), 1e-5) + for self.force_gpu in self._gpu_modes(): + self.assertArrayNear(self._LinSpace(-1., -5., 1), np.array([-1.]), 1e-5) + self.assertArrayNear(self._LinSpace(-1., -5., 2), + np.array([-1., -5.]), 1e-5) + self.assertArrayNear(self._LinSpace(-1., -5., 3), + np.array([-1., -3., -5.]), 1e-5) + self.assertArrayNear(self._LinSpace(-1., -5., 4), + np.array([-1., -7. / 3., -11. / 3., -5.]), 1e-5) def testNegativeToPositive(self): - self.assertArrayNear(self._LinSpace(-1., 5., 1), np.array([-1.]), 1e-5) - self.assertArrayNear(self._LinSpace(-1., 5., 2), np.array([-1., 5.]), 1e-5) - self.assertArrayNear(self._LinSpace(-1., 5., 3), - np.array([-1., 2., 5.]), 1e-5) - self.assertArrayNear(self._LinSpace(-1., 5., 4), - np.array([-1., 1., 3., 5.]), 1e-5) + for self.force_gpu in self._gpu_modes(): + self.assertArrayNear(self._LinSpace(-1., 5., 1), np.array([-1.]), 1e-5) + self.assertArrayNear(self._LinSpace(-1., 5., 2), np.array([-1., 5.]), + 1e-5) + self.assertArrayNear(self._LinSpace(-1., 5., 3), + np.array([-1., 2., 5.]), 1e-5) + self.assertArrayNear(self._LinSpace(-1., 5., 4), + np.array([-1., 1., 3., 5.]), 1e-5) def testPoint(self): - self.assertArrayNear(self._LinSpace(5., 5., 1), np.array([5.]), 1e-5) - self.assertArrayNear(self._LinSpace(5., 5., 2), np.array([5.] * 2), 1e-5) - self.assertArrayNear(self._LinSpace(5., 5., 3), np.array([5.] * 3), 1e-5) - self.assertArrayNear(self._LinSpace(5., 5., 4), np.array([5.] * 4), 1e-5) + for self.force_gpu in self._gpu_modes(): + self.assertArrayNear(self._LinSpace(5., 5., 1), np.array([5.]), 1e-5) + self.assertArrayNear(self._LinSpace(5., 5., 2), np.array([5.] * 2), 1e-5) + self.assertArrayNear(self._LinSpace(5., 5., 3), np.array([5.] * 3), 1e-5) + self.assertArrayNear(self._LinSpace(5., 5., 4), np.array([5.] * 4), 1e-5) class DeviceTest(tf.test.TestCase): |