Browse Source

libobs/graphics: Add SSE intrinsics for matrix4_transpose

hwdro 9 years ago
parent
commit
e3847109be
1 changed files with 33 additions and 20 deletions
  1. 33 20
      libobs/graphics/matrix4.c

+ 33 - 20
libobs/graphics/matrix4.c

@@ -261,25 +261,38 @@ bool matrix4_inv(struct matrix4 *dst, const struct matrix4 *m)
 
 void matrix4_transpose(struct matrix4 *dst, const struct matrix4 *m)
 {
-	struct matrix4 temp;
+	if (dst == m) {
+		struct matrix4 temp = *m;
+		matrix4_transpose(dst, &temp);
+		return;
+	}
 
-	/* TODO: Add SSE */
-	temp.x.x = m->x.x;
-	temp.x.y = m->y.x;
-	temp.x.z = m->z.x;
-	temp.x.w = m->t.x;
-	temp.y.x = m->x.y;
-	temp.y.y = m->y.y;
-	temp.y.z = m->z.y;
-	temp.y.w = m->t.y;
-	temp.z.x = m->x.z;
-	temp.z.y = m->y.z;
-	temp.z.z = m->z.z;
-	temp.z.w = m->t.z;
-	temp.t.x = m->x.w;
-	temp.t.y = m->y.w;
-	temp.t.z = m->z.w;
-	temp.t.w = m->t.w;
-
-	matrix4_copy(dst, &temp);
+#ifdef NO_INTRINSICS
+	dst->x.x = m->x.x;
+	dst->x.y = m->y.x;
+	dst->x.z = m->z.x;
+	dst->x.w = m->t.x;
+	dst->y.x = m->x.y;
+	dst->y.y = m->y.y;
+	dst->y.z = m->z.y;
+	dst->y.w = m->t.y;
+	dst->z.x = m->x.z;
+	dst->z.y = m->y.z;
+	dst->z.z = m->z.z;
+	dst->z.w = m->t.z;
+	dst->t.x = m->x.w;
+	dst->t.y = m->y.w;
+	dst->t.z = m->z.w;
+	dst->t.w = m->t.w;
+#else
+	__m128 a0 = _mm_unpacklo_ps(m->x.m, m->z.m);
+	__m128 a1 = _mm_unpacklo_ps(m->y.m, m->t.m);
+	__m128 a2 = _mm_unpackhi_ps(m->x.m, m->z.m);
+	__m128 a3 = _mm_unpackhi_ps(m->y.m, m->t.m);
+
+	dst->x.m = _mm_unpacklo_ps(a0, a1);
+	dst->y.m = _mm_unpackhi_ps(a0, a1);
+	dst->z.m = _mm_unpacklo_ps(a2, a3);
+	dst->t.m = _mm_unpackhi_ps(a2, a3);
+#endif
 }