diff options
author | tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2013-01-17 12:17:00 +0000 |
---|---|---|
committer | tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81> | 2013-01-17 12:17:00 +0000 |
commit | 21e4322b925b1a0463094be8e9cc581d284f4b46 (patch) | |
tree | e4c4321f406ace24e9e172b23f0ab9704f35abe8 | |
parent | 4d28d9889b033777afc1950474296d37887ef71b (diff) |
Add SSE2 multiplication for SkMatrix44 on some platforms.
Original author whunt@chromium.org.
https://codereview.appspot.com/7058077/
git-svn-id: http://skia.googlecode.com/svn/trunk@7241 2bbb7eff-a529-9590-31e7-b0007b416f81
-rw-r--r-- | bench/Matrix44Bench.cpp | 23 | ||||
-rw-r--r-- | include/utils/SkMatrix44.h | 17 | ||||
-rw-r--r-- | src/utils/SkMatrix44.cpp | 71 |
3 files changed, 107 insertions, 4 deletions
diff --git a/bench/Matrix44Bench.cpp b/bench/Matrix44Bench.cpp index ce5d99de7b..ac39639f5f 100644 --- a/bench/Matrix44Bench.cpp +++ b/bench/Matrix44Bench.cpp @@ -125,12 +125,34 @@ private: typedef Matrix44Bench INHERITED; }; +class SetConcatMatrix44BenchSpecialCase : public Matrix44Bench { +public: + SetConcatMatrix44BenchSpecialCase(void* param) : INHERITED(param, "setconcat_special") { + fX = fY = fZ = SkDoubleToMScalar(1.5); + fM1.setScale(fX, fY, fZ); + fM2.setTranslate(fX, fY, fZ); + } +protected: + virtual void performTest() { + fM0.reset(); // just to normalize this test with prescale/postscale + for (int i = 0; i < 10; ++i) { + fM0.setConcat(fM1, fM2); + } + } +private: + SkMatrix44 fM0, fM1, fM2; + SkMScalar fX, fY, fZ; + typedef Matrix44Bench INHERITED; +}; + class SetConcatMatrix44Bench : public Matrix44Bench { public: SetConcatMatrix44Bench(void* param) : INHERITED(param, "setconcat") { fX = fY = fZ = SkDoubleToMScalar(1.5); fM1.setScale(fX, fY, fZ); + fM1.set(2, 0, 3.0f); fM2.setTranslate(fX, fY, fZ); + fM2.set(2, 0, 3.0f); } protected: virtual void performTest() { @@ -167,6 +189,7 @@ DEF_BENCH( return new EqualsMatrix44Bench(p); ) DEF_BENCH( return new PreScaleMatrix44Bench(p); ) DEF_BENCH( return new PostScaleMatrix44Bench(p); ) DEF_BENCH( return new InvertMatrix44Bench(p); ) +DEF_BENCH( return new SetConcatMatrix44BenchSpecialCase(p); ) DEF_BENCH( return new SetConcatMatrix44Bench(p); ) DEF_BENCH( return new GetTypeMatrix44Bench(p); ) diff --git a/include/utils/SkMatrix44.h b/include/utils/SkMatrix44.h index 41f1a30209..83fb7c9946 100644 --- a/include/utils/SkMatrix44.h +++ b/include/utils/SkMatrix44.h @@ -51,6 +51,11 @@ static const SkMScalar SK_MScalarPI = 3.14159265f; #endif +#if (defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__)) && \ +defined(SK_MSCALAR_IS_DOUBLE) +#define SK_MATRIX44_USE_SSE2 +#endif + #define SkMScalarToScalar SkMScalarToFloat #define SkScalarToMScalar SkFloatToMScalar @@ -99,7 +104,11 @@ struct SkVector4 { } }; -class SK_API SkMatrix44 { +class +#if defined(SK_MATRIX44_USE_SSE2) && defined(_MSC_VER) +__declspec(align(16)) +#endif +SK_API SkMatrix44 { public: enum Uninitialized_Constructor { @@ -398,6 +407,10 @@ private: inline bool isTriviallyIdentity() const { return 0 == fTypeMask; } -}; +} +#if defined(SK_MATRIX44_USE_SSE2) && !defined(_MSC_VER) +__attribute__ ((aligned (16))) +#endif +; #endif diff --git a/src/utils/SkMatrix44.cpp b/src/utils/SkMatrix44.cpp index 1906593acd..5953da648c 100644 --- a/src/utils/SkMatrix44.cpp +++ b/src/utils/SkMatrix44.cpp @@ -343,6 +343,21 @@ static bool bits_isonly(int value, int mask) { return 0 == (value & ~mask); } +#if defined(SK_MATRIX44_USE_SSE2) +#include <emmintrin.h> +struct MatrixD { + __m128d x_xy, x_zw; + __m128d y_xy, y_zw; + __m128d z_xy, z_zw; + __m128d w_xy, w_zw; +}; + +#if defined(_MSC_VER) +inline __m128d operator +(__m128d a, __m128d b) { return _mm_add_pd(a, b); } +inline __m128d operator *(__m128d a, __m128d b) { return _mm_mul_pd(a, b); } +#endif +#endif + void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) { const SkMatrix44::TypeMask a_mask = a.getType(); const SkMatrix44::TypeMask b_mask = b.getType(); @@ -357,19 +372,70 @@ void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) { } bool useStorage = (this == &a || this == &b); +#if defined(SK_MATRIX44_USE_SSE2) + MatrixD storage; + SkMScalar* result = useStorage ? (SkMScalar*)&storage : &fMat[0][0]; +#else SkMScalar storage[16]; SkMScalar* result = useStorage ? storage : &fMat[0][0]; +#endif if (bits_isonly(a_mask | b_mask, kScale_Mask | kTranslate_Mask)) { - sk_bzero(result, sizeof(storage)); result[0] = a.fMat[0][0] * b.fMat[0][0]; + result[1] = 0.0; + result[2] = 0.0; + result[3] = 0.0; + result[4] = 0.0; result[5] = a.fMat[1][1] * b.fMat[1][1]; + result[6] = 0.0; + result[7] = 0.0; + result[8] = 0.0; + result[9] = 0.0; result[10] = a.fMat[2][2] * b.fMat[2][2]; + result[11] = 0.0; result[12] = a.fMat[0][0] * b.fMat[3][0] + a.fMat[3][0]; result[13] = a.fMat[1][1] * b.fMat[3][1] + a.fMat[3][1]; result[14] = a.fMat[2][2] * b.fMat[3][2] + a.fMat[3][2]; result[15] = 1; } else { +#if defined(SK_MATRIX44_USE_SSE2) + MatrixD* p = (MatrixD*)result; + const MatrixD* pa = (const MatrixD*)a.fMat; + const MatrixD* pb = (const MatrixD*)b.fMat; + __m128d x_xy = pa->x_xy; + __m128d x_zw = pa->x_zw; + __m128d y_xy = pa->y_xy; + __m128d y_zw = pa->y_zw; + __m128d z_xy = pa->z_xy; + __m128d z_zw = pa->z_zw; + __m128d w_xy = pa->w_xy; + __m128d w_zw = pa->w_zw; + __m128d b0, b1, b2, b3; + b0 = _mm_set1_pd(((double*)&pb->x_xy)[0]); + b1 = _mm_set1_pd(((double*)&pb->x_xy)[1]); + b2 = _mm_set1_pd(((double*)&pb->x_zw)[0]); + b3 = _mm_set1_pd(((double*)&pb->x_zw)[1]); + p->x_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy; + p->x_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw; + b0 = _mm_set1_pd(((double*)&pb->y_xy)[0]); + b1 = _mm_set1_pd(((double*)&pb->y_xy)[1]); + b2 = _mm_set1_pd(((double*)&pb->y_zw)[0]); + b3 = _mm_set1_pd(((double*)&pb->y_zw)[1]); + p->y_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy; + p->y_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw; + b0 = _mm_set1_pd(((double*)&pb->z_xy)[0]); + b1 = _mm_set1_pd(((double*)&pb->z_xy)[1]); + b2 = _mm_set1_pd(((double*)&pb->z_zw)[0]); + b3 = _mm_set1_pd(((double*)&pb->z_zw)[1]); + p->z_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy; + p->z_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw; + b0 = _mm_set1_pd(((double*)&pb->w_xy)[0]); + b1 = _mm_set1_pd(((double*)&pb->w_xy)[1]); + b2 = _mm_set1_pd(((double*)&pb->w_zw)[0]); + b3 = _mm_set1_pd(((double*)&pb->w_zw)[1]); + p->w_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy; + p->w_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw; +#else for (int j = 0; j < 4; j++) { for (int i = 0; i < 4; i++) { double value = 0; @@ -379,10 +445,11 @@ void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) { *result++ = SkDoubleToMScalar(value); } } +#endif } if (useStorage) { - memcpy(fMat, storage, sizeof(storage)); + memcpy(fMat, result, sizeof(storage)); } this->dirtyTypeMask(); } |