aboutsummaryrefslogtreecommitdiffhomepage
diff options
context:
space:
mode:
authorGravatar tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-01-17 12:17:00 +0000
committerGravatar tomhudson@google.com <tomhudson@google.com@2bbb7eff-a529-9590-31e7-b0007b416f81>2013-01-17 12:17:00 +0000
commit21e4322b925b1a0463094be8e9cc581d284f4b46 (patch)
treee4c4321f406ace24e9e172b23f0ab9704f35abe8
parent4d28d9889b033777afc1950474296d37887ef71b (diff)
Add SSE2 multiplication for SkMatrix44 on some platforms.
Original author whunt@chromium.org. https://codereview.appspot.com/7058077/ git-svn-id: http://skia.googlecode.com/svn/trunk@7241 2bbb7eff-a529-9590-31e7-b0007b416f81
-rw-r--r--bench/Matrix44Bench.cpp23
-rw-r--r--include/utils/SkMatrix44.h17
-rw-r--r--src/utils/SkMatrix44.cpp71
3 files changed, 107 insertions, 4 deletions
diff --git a/bench/Matrix44Bench.cpp b/bench/Matrix44Bench.cpp
index ce5d99de7b..ac39639f5f 100644
--- a/bench/Matrix44Bench.cpp
+++ b/bench/Matrix44Bench.cpp
@@ -125,12 +125,34 @@ private:
typedef Matrix44Bench INHERITED;
};
+class SetConcatMatrix44BenchSpecialCase : public Matrix44Bench {
+public:
+ SetConcatMatrix44BenchSpecialCase(void* param) : INHERITED(param, "setconcat_special") {
+ fX = fY = fZ = SkDoubleToMScalar(1.5);
+ fM1.setScale(fX, fY, fZ);
+ fM2.setTranslate(fX, fY, fZ);
+ }
+protected:
+ virtual void performTest() {
+ fM0.reset(); // just to normalize this test with prescale/postscale
+ for (int i = 0; i < 10; ++i) {
+ fM0.setConcat(fM1, fM2);
+ }
+ }
+private:
+ SkMatrix44 fM0, fM1, fM2;
+ SkMScalar fX, fY, fZ;
+ typedef Matrix44Bench INHERITED;
+};
+
class SetConcatMatrix44Bench : public Matrix44Bench {
public:
SetConcatMatrix44Bench(void* param) : INHERITED(param, "setconcat") {
fX = fY = fZ = SkDoubleToMScalar(1.5);
fM1.setScale(fX, fY, fZ);
+ fM1.set(2, 0, 3.0f);
fM2.setTranslate(fX, fY, fZ);
+ fM2.set(2, 0, 3.0f);
}
protected:
virtual void performTest() {
@@ -167,6 +189,7 @@ DEF_BENCH( return new EqualsMatrix44Bench(p); )
DEF_BENCH( return new PreScaleMatrix44Bench(p); )
DEF_BENCH( return new PostScaleMatrix44Bench(p); )
DEF_BENCH( return new InvertMatrix44Bench(p); )
+DEF_BENCH( return new SetConcatMatrix44BenchSpecialCase(p); )
DEF_BENCH( return new SetConcatMatrix44Bench(p); )
DEF_BENCH( return new GetTypeMatrix44Bench(p); )
diff --git a/include/utils/SkMatrix44.h b/include/utils/SkMatrix44.h
index 41f1a30209..83fb7c9946 100644
--- a/include/utils/SkMatrix44.h
+++ b/include/utils/SkMatrix44.h
@@ -51,6 +51,11 @@
static const SkMScalar SK_MScalarPI = 3.14159265f;
#endif
+#if (defined(__x86_64__) || defined(_M_X64) || defined(__SSE2__)) && \
+defined(SK_MSCALAR_IS_DOUBLE)
+#define SK_MATRIX44_USE_SSE2
+#endif
+
#define SkMScalarToScalar SkMScalarToFloat
#define SkScalarToMScalar SkFloatToMScalar
@@ -99,7 +104,11 @@ struct SkVector4 {
}
};
-class SK_API SkMatrix44 {
+class
+#if defined(SK_MATRIX44_USE_SSE2) && defined(_MSC_VER)
+__declspec(align(16))
+#endif
+SK_API SkMatrix44 {
public:
enum Uninitialized_Constructor {
@@ -398,6 +407,10 @@ private:
inline bool isTriviallyIdentity() const {
return 0 == fTypeMask;
}
-};
+}
+#if defined(SK_MATRIX44_USE_SSE2) && !defined(_MSC_VER)
+__attribute__ ((aligned (16)))
+#endif
+;
#endif
diff --git a/src/utils/SkMatrix44.cpp b/src/utils/SkMatrix44.cpp
index 1906593acd..5953da648c 100644
--- a/src/utils/SkMatrix44.cpp
+++ b/src/utils/SkMatrix44.cpp
@@ -343,6 +343,21 @@ static bool bits_isonly(int value, int mask) {
return 0 == (value & ~mask);
}
+#if defined(SK_MATRIX44_USE_SSE2)
+#include <emmintrin.h>
+struct MatrixD {
+ __m128d x_xy, x_zw;
+ __m128d y_xy, y_zw;
+ __m128d z_xy, z_zw;
+ __m128d w_xy, w_zw;
+};
+
+#if defined(_MSC_VER)
+inline __m128d operator +(__m128d a, __m128d b) { return _mm_add_pd(a, b); }
+inline __m128d operator *(__m128d a, __m128d b) { return _mm_mul_pd(a, b); }
+#endif
+#endif
+
void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) {
const SkMatrix44::TypeMask a_mask = a.getType();
const SkMatrix44::TypeMask b_mask = b.getType();
@@ -357,19 +372,70 @@ void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) {
}
bool useStorage = (this == &a || this == &b);
+#if defined(SK_MATRIX44_USE_SSE2)
+ MatrixD storage;
+ SkMScalar* result = useStorage ? (SkMScalar*)&storage : &fMat[0][0];
+#else
SkMScalar storage[16];
SkMScalar* result = useStorage ? storage : &fMat[0][0];
+#endif
if (bits_isonly(a_mask | b_mask, kScale_Mask | kTranslate_Mask)) {
- sk_bzero(result, sizeof(storage));
result[0] = a.fMat[0][0] * b.fMat[0][0];
+ result[1] = 0.0;
+ result[2] = 0.0;
+ result[3] = 0.0;
+ result[4] = 0.0;
result[5] = a.fMat[1][1] * b.fMat[1][1];
+ result[6] = 0.0;
+ result[7] = 0.0;
+ result[8] = 0.0;
+ result[9] = 0.0;
result[10] = a.fMat[2][2] * b.fMat[2][2];
+ result[11] = 0.0;
result[12] = a.fMat[0][0] * b.fMat[3][0] + a.fMat[3][0];
result[13] = a.fMat[1][1] * b.fMat[3][1] + a.fMat[3][1];
result[14] = a.fMat[2][2] * b.fMat[3][2] + a.fMat[3][2];
result[15] = 1;
} else {
+#if defined(SK_MATRIX44_USE_SSE2)
+ MatrixD* p = (MatrixD*)result;
+ const MatrixD* pa = (const MatrixD*)a.fMat;
+ const MatrixD* pb = (const MatrixD*)b.fMat;
+ __m128d x_xy = pa->x_xy;
+ __m128d x_zw = pa->x_zw;
+ __m128d y_xy = pa->y_xy;
+ __m128d y_zw = pa->y_zw;
+ __m128d z_xy = pa->z_xy;
+ __m128d z_zw = pa->z_zw;
+ __m128d w_xy = pa->w_xy;
+ __m128d w_zw = pa->w_zw;
+ __m128d b0, b1, b2, b3;
+ b0 = _mm_set1_pd(((double*)&pb->x_xy)[0]);
+ b1 = _mm_set1_pd(((double*)&pb->x_xy)[1]);
+ b2 = _mm_set1_pd(((double*)&pb->x_zw)[0]);
+ b3 = _mm_set1_pd(((double*)&pb->x_zw)[1]);
+ p->x_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
+ p->x_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
+ b0 = _mm_set1_pd(((double*)&pb->y_xy)[0]);
+ b1 = _mm_set1_pd(((double*)&pb->y_xy)[1]);
+ b2 = _mm_set1_pd(((double*)&pb->y_zw)[0]);
+ b3 = _mm_set1_pd(((double*)&pb->y_zw)[1]);
+ p->y_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
+ p->y_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
+ b0 = _mm_set1_pd(((double*)&pb->z_xy)[0]);
+ b1 = _mm_set1_pd(((double*)&pb->z_xy)[1]);
+ b2 = _mm_set1_pd(((double*)&pb->z_zw)[0]);
+ b3 = _mm_set1_pd(((double*)&pb->z_zw)[1]);
+ p->z_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
+ p->z_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
+ b0 = _mm_set1_pd(((double*)&pb->w_xy)[0]);
+ b1 = _mm_set1_pd(((double*)&pb->w_xy)[1]);
+ b2 = _mm_set1_pd(((double*)&pb->w_zw)[0]);
+ b3 = _mm_set1_pd(((double*)&pb->w_zw)[1]);
+ p->w_xy = b0 * x_xy + b1 * y_xy + b2 * z_xy + b3 * w_xy;
+ p->w_zw = b0 * x_zw + b1 * y_zw + b2 * z_zw + b3 * w_zw;
+#else
for (int j = 0; j < 4; j++) {
for (int i = 0; i < 4; i++) {
double value = 0;
@@ -379,10 +445,11 @@ void SkMatrix44::setConcat(const SkMatrix44& a, const SkMatrix44& b) {
*result++ = SkDoubleToMScalar(value);
}
}
+#endif
}
if (useStorage) {
- memcpy(fMat, storage, sizeof(storage));
+ memcpy(fMat, result, sizeof(storage));
}
this->dirtyTypeMask();
}