diff options
author | A. Unique TensorFlower <gardener@tensorflow.org> | 2017-08-17 14:33:02 -0700 |
---|---|---|
committer | TensorFlower Gardener <gardener@tensorflow.org> | 2017-08-17 14:38:38 -0700 |
commit | 02f87fee25552e220c8295b58ab8e58b6fbe598b (patch) | |
tree | 964569d7ef4a8e369b36a7c7a1852015b67a2e93 /tensorflow/compiler/xla/service/cpu/runtime_matvec.h | |
parent | 4143410e1140a553621de5de09c1cad12a5eb4cb (diff) |
CPU runtime: Improve the performance of matrix-vector and
vector-matrix products.
This change makes the single threaded matrix-vector product explicit
so that Eigen will always delegate to an optimized GEMV kernel. This
is done by using an Eigen Matrix instead of the Eigen Tensor
implementation. This is the same optimization done by TensorFlow's
matmul op for GEMV.
This is used even in the multi-threaded case because it appears to
be faster than the multi-threaded version.
This change also expands the scope of the CPU runtime test to test
vec-mat and mat-vec on both single threaded and multi threaded modes.
PiperOrigin-RevId: 165630063
Diffstat (limited to 'tensorflow/compiler/xla/service/cpu/runtime_matvec.h')
-rw-r--r-- | tensorflow/compiler/xla/service/cpu/runtime_matvec.h | 45 |
1 files changed, 45 insertions, 0 deletions
diff --git a/tensorflow/compiler/xla/service/cpu/runtime_matvec.h b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h new file mode 100644 index 0000000000..cb7e0a81f0 --- /dev/null +++ b/tensorflow/compiler/xla/service/cpu/runtime_matvec.h @@ -0,0 +1,45 @@ +/* Copyright 2017 The TensorFlow Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +==============================================================================*/ + +#ifndef THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_ +#define THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_ + +#include "tensorflow/core/platform/types.h" + +namespace xla { + +// Performs a matrix-vector multiplication using Eigen. 'lhs' and 'rhs' are +// pointers to buffers containing input matrices in column-major order. 'out' is +// a pointer to a buffer sufficiently large to hold the result of the +// operation. Following standard nomenclature: lhs is m x k, rhs is k x n, and +// out is m x n. +// +// This requires that m = 1 or n = 1. +// +// TODO(b/64684907): Compare runtime performance of these functions with dot +// simplification. +void EigenMatVecF32(float* out, float* lhs, float* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); + +void EigenMatVecF64(double* out, double* lhs, double* rhs, tensorflow::int64 m, + tensorflow::int64 n, tensorflow::int64 k, + tensorflow::int32 transpose_lhs, + tensorflow::int32 transpose_rhs); + +} // namespace xla + +#endif // THIRD_PARTY_TENSORFLOW_COMPILER_XLA_SERVICE_CPU_RUNTIME_MATVEC_H_ |