CPU runtime: Improve the performance of matrix-vector and

vector-matrix products. This change makes the single threaded matrix-vector product explicit so that Eigen will always delegate to an optimized GEMV kernel. This is done by using an Eigen Matrix instead of the Eigen Tensor implementation. This is the same optimization done by TensorFlow's matmul op for GEMV. This is used even in the multi-threaded case because it appears to be faster than the multi-threaded version. This change also expands the scope of the CPU runtime test to test vec-mat and mat-vec on both single threaded and multi threaded modes. PiperOrigin-RevId: 165630063
author: A. Unique TensorFlower <gardener@tensorflow.org> 2017-08-17 14:33:02 -0700
committer: TensorFlower Gardener <gardener@tensorflow.org> 2017-08-17 14:38:38 -0700
commit: 02f87fee25552e220c8295b58ab8e58b6fbe598b (patch)
tree: 964569d7ef4a8e369b36a7c7a1852015b67a2e93 /tensorflow/compiler/xla/service/cpu/runtime_matvec.h
parent: 4143410e1140a553621de5de09c1cad12a5eb4cb (diff)
1 files changed, 45 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matvec.h b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h
new file mode 100644
index 0000000000..cb7e0a81f0
--- /dev/null
+++ b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h
@@ -0,0 +1,45 @@
+/* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+==============================================================================*/
+
+#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_
+#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_
+
+#include "tensorflow/core/platform/types.h"
+
+namespace xla {
+
+// Performs a matrix-vector multiplication using Eigen. 'lhs' and 'rhs' are
+// pointers to buffers containing input matrices in column-major order. 'out' is
+// a pointer to a buffer sufficiently large to hold the result of the
+// operation. Following standard nomenclature: lhs is m x k, rhs is k x n, and
+// out is m x n.
+//
+// This requires that m = 1 or n = 1.
+//
+// TODO(b/64684907): Compare runtime performance of these functions with dot
+// simplification.
+void EigenMatVecF32(float* out, float* lhs, float* rhs, tensorflow::int64 m,
+                    tensorflow::int64 n, tensorflow::int64 k,
+                    tensorflow::int32 transpose_lhs,
+                    tensorflow::int32 transpose_rhs);
+
+void EigenMatVecF64(double* out, double* lhs, double* rhs, tensorflow::int64 m,
+                    tensorflow::int64 n, tensorflow::int64 k,
+                    tensorflow::int32 transpose_lhs,
+                    tensorflow::int32 transpose_rhs);
+
+}  // namespace xla
+
+#endif  // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_
author	A. Unique TensorFlower <gardener@tensorflow.org>	2017-08-17 14:33:02 -0700
committer	TensorFlower Gardener <gardener@tensorflow.org>	2017-08-17 14:38:38 -0700
commit	02f87fee25552e220c8295b58ab8e58b6fbe598b (patch)
tree	964569d7ef4a8e369b36a7c7a1852015b67a2e93 /tensorflow/compiler/xla/service/cpu/runtime_matvec.h
parent	4143410e1140a553621de5de09c1cad12a5eb4cb (diff)