From f41959ccb2d9d4c722fe8fc3351401d53bcf4900 Mon Sep 17 00:00:00 2001 From: Manjunath Kudlur Date: Fri, 6 Nov 2015 16:27:58 -0800 Subject: TensorFlow: Initial commit of TensorFlow library. TensorFlow is an open source software library for numerical computation using data flow graphs. Base CL: 107276108 --- .../Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h | 95 ++++++++++++++++++++++ 1 file changed, 95 insertions(+) create mode 100644 third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h (limited to 'third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h') diff --git a/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h new file mode 100644 index 0000000000..99894cafb5 --- /dev/null +++ b/third_party/eigen3/unsupported/Eigen/CXX11/src/FixedPoint/MatMatProductNEON.h @@ -0,0 +1,95 @@ +// This file is part of Eigen, a lightweight C++ template library +// for linear algebra. +// +// Copyright (C) 2015 Benoit Steiner +// Copyright (C) 2015 Benoit Jacob +// +// This Source Code Form is subject to the terms of the Mozilla +// Public License v. 2.0. If a copy of the MPL was not distributed +// with this file, You can obtain one at http://mozilla.org/MPL/2.0/. + +#ifndef EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_NEON_H +#define EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_NEON_H + + +namespace Eigen { +namespace internal { + + +// AVX2 optimized implementation of the case where the lhs is encoded using signed 8bit +// integers and the rhs using unsigned 8bit integers. +#ifdef EIGEN_USE_OPTIMIZED_INT8_UINT8_MAT_MAT_PRODUCT + +template +class gebp_traits +{ +public: + typedef QInt8 LhsScalar; + typedef QUInt8 RhsScalar; + typedef QInt32 ResScalar; + + enum { + // register block size along the M and N directions + // One for the current implementation + nr = 1, + mr = 1, + // Progress made at each iteration of the product loop + // also 1 for the current implementation + LhsProgress = 1, + RhsProgress = 1 + }; +}; + +// Mat-Mat product of a signed 8bit lhs with an unsigned 8bit rhs +template +struct gebp_kernel +{ + EIGEN_DONT_INLINE + void operator()(const DataMapper& res, const QInt8* blockA, const QUInt8* blockB, + Index rows, Index depth, Index cols, QInt32 alpha, + Index strideA=-1, Index strideB=-1, Index offsetA=0, Index offsetB=0); +}; + +template +EIGEN_DONT_INLINE +void gebp_kernel +::operator()(const DataMapper& res, const QInt8* blockA, const QUInt8* blockB, + Index rows, Index depth, Index cols, QInt32 alpha, + Index strideA, Index strideB, Index offsetA, Index offsetB) +{ + EIGEN_STATIC_ASSERT(!ConjugateLhs, YOU_MADE_A_PROGRAMMING_MISTAKE); + EIGEN_STATIC_ASSERT(!ConjugateRhs, YOU_MADE_A_PROGRAMMING_MISTAKE); + + eigen_assert(alpha.value == 1); + eigen_assert(strideA == -1); + eigen_assert(strideB == -1); + eigen_assert(offsetA == 0); + eigen_assert(offsetB == 0); + + eigen_assert(rows > 0); + eigen_assert(cols > 0); + eigen_assert(depth > 0); + eigen_assert(blockA); + eigen_assert(blockB); + + for (Index j = 0; j < cols; ++j) { + Index startB = j * depth; + + for (Index i = 0; i < rows; ++i) { + Index startA = i * depth; + + for (Index k = 0; k < depth; ++k) { + res(i, j) += blockA[startA + k] * blockB[startB + k]; + } + } + } +} +#endif + + +} // namespace internal +} // namespace Eigen + + + +#endif // EIGEN_CXX11_FIXED_POINT_MAT_MAT_PRODUCT_NEON_H -- cgit v1.2.3