00001 #ifndef AI__MATRIX44_SSE_H
00002 #define AI__MATRIX44_SSE_H
00003
00012 #include <xmmintrin.h>
00013 #include "AI_TVector4SSE.h"
00014 #include "AI_TVector3SSE.h"
00015 #include "AI_Quaternion.h"
00016 #include "AI_Euler.h"
00017 #include "AI_Matrixdefs.h"
00018
00019 static float _matrix44_sse_ident[16] =
00020 {
00021 1.0f, 0.0f, 0.0f, 0.0f,
00022 0.0f, 1.0f, 0.0f, 0.0f,
00023 0.0f, 0.0f, 1.0f, 0.0f,
00024 0.0f, 0.0f, 0.0f, 1.0f,
00025 };
00026
00027 class AI_TMatrix44SSE
00028 {
00029 public:
00031 AI_TMatrix44SSE();
00033 AI_TMatrix44SSE(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4);
00035 AI_TMatrix44SSE(const AI_TMatrix44SSE& m1);
00037 AI_TMatrix44SSE(float _m11, float _m12, float _m13, float _m14,
00038 float _m21, float _m22, float _m23, float _m24,
00039 float _m31, float _m32, float _m33, float _m34,
00040 float _m41, float _m42, float _m43, float _m44);
00042 AI_TMatrix44SSE(const AI_Quaternion& q);
00044 AI_TMatrix44SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4);
00046 AI_Quaternion get_quaternion() const;
00048 void set(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4);
00050 void set(const AI_TMatrix44SSE& m1);
00052 void set(float _m11, float _m12, float _m13, float _m14,
00053 float _m21, float _m22, float _m23, float _m24,
00054 float _m31, float _m32, float _m33, float _m34,
00055 float _m41, float _m42, float _m43, float _m44);
00057 void set(const AI_Quaternion& q);
00059 void ident();
00061 void transpose();
00063 float det();
00065 void invert(void);
00067 void invert_simple(void);
00069 void mult_simple(const AI_TMatrix44SSE& m1);
00071 AI_TVector3SSE transform_coord(const AI_TVector3SSE& v) const;
00073 AI_TVector3SSE x_component() const;
00075 AI_TVector3SSE y_component() const;
00077 AI_TVector3SSE z_component() const;
00079 AI_TVector3SSE pos_component() const;
00081 void rotate_x(const float a);
00083 void rotate_y(const float a);
00085 void rotate_z(const float a);
00087 void rotate(const AI_TVector3SSE& vec, float a);
00089 void translate(const AI_TVector3SSE& t);
00091 void set_translation(const AI_TVector3SSE& t);
00093 void scale(const AI_TVector3SSE& s);
00095 void lookat(const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00097 void billboard(const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00099 void operator *= (const AI_TMatrix44SSE& m1);
00101 void mult(const AI_TVector4SSE& src, AI_TVector4SSE& dst) const;
00103 void mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const;
00104
00105 union
00106 {
00107 struct
00108 {
00109 __m128 m1;
00110 __m128 m2;
00111 __m128 m3;
00112 __m128 m4;
00113 };
00114 struct
00115 {
00116 float m[4][4];
00117 };
00118 };
00119 };
00120
00121
00124 inline
00125 AI_TMatrix44SSE::AI_TMatrix44SSE()
00126 {
00127 memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00128 }
00129
00130
00133 inline
00134 AI_TMatrix44SSE::AI_TMatrix44SSE(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4) :
00135 m1(v1.m128), m2(v2.m128), m3(v3.m128), m4(v4.m128)
00136 {
00137
00138 }
00139
00140
00143 inline
00144 AI_TMatrix44SSE::AI_TMatrix44SSE(const AI_TMatrix44SSE& mx) :
00145 m1(mx.m1), m2(mx.m2), m3(mx.m3), m4(mx.m4)
00146 {
00147
00148 }
00149
00150
00153 inline
00154 AI_TMatrix44SSE::AI_TMatrix44SSE(float _m11, float _m12, float _m13, float _m14,
00155 float _m21, float _m22, float _m23, float _m24,
00156 float _m31, float _m32, float _m33, float _m34,
00157 float _m41, float _m42, float _m43, float _m44)
00158 {
00159 m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00160 m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00161 m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00162 m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00163 }
00164
00165
00169 inline
00170 AI_TMatrix44SSE::AI_TMatrix44SSE(const AI_Quaternion& q)
00171 {
00172 float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00173 x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00174 xx = q.x * x2; xy = q.x * y2; xz = q.x * z2;
00175 yy = q.y * y2; yz = q.y * z2; zz = q.z * z2;
00176 wx = q.w * x2; wy = q.w * y2; wz = q.w * z2;
00177
00178 m[0][0] = 1.0f - (yy + zz);
00179 m[1][0] = xy - wz;
00180 m[2][0] = xz + wy;
00181
00182 m[0][1] = xy + wz;
00183 m[1][1] = 1.0f - (xx + zz);
00184 m[2][1] = yz - wx;
00185
00186 m[0][2] = xz - wy;
00187 m[1][2] = yz + wx;
00188 m[2][2] = 1.0f - (xx + yy);
00189
00190 m[3][0] = m[3][1] = m[3][2] = 0.0f;
00191 m[0][3] = m[1][3] = m[2][3] = 0.0f;
00192 m[3][3] = 1.0f;
00193 }
00194
00195
00198 inline
00199 AI_TMatrix44SSE::AI_TMatrix44SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4) :
00200 m1(_m1), m2(_m2), m3(_m3), m4(_m4)
00201 {
00202
00203 }
00204
00205
00212 inline
00213 AI_Quaternion
00214 AI_TMatrix44SSE::get_quaternion() const
00215 {
00216 float qa[4];
00217 float tr = m[0][0] + m[1][1] + m[2][2];
00218 if (tr > 0.0f)
00219 {
00220 float s = ai_sqrt (tr + 1.0f);
00221 qa[3] = s * 0.5f;
00222 s = 0.5f / s;
00223 qa[0] = (m[1][2] - m[2][1]) * s;
00224 qa[1] = (m[2][0] - m[0][2]) * s;
00225 qa[2] = (m[0][1] - m[1][0]) * s;
00226 }
00227 else
00228 {
00229 int i, j, k, nxt[3] = {1,2,0};
00230 i = 0;
00231 if (m[1][1] > m[0][0]) i=1;
00232 if (m[2][2] > m[i][i]) i=2;
00233 j = nxt[i];
00234 k = nxt[j];
00235 float s = ai_sqrt((m[i][i] - (m[j][j] + m[k][k])) + 1.0f);
00236 qa[i] = s * 0.5f;
00237 s = 0.5f / s;
00238 qa[3] = (m[j][k] - m[k][j])* s;
00239 qa[j] = (m[i][j] + m[j][i]) * s;
00240 qa[k] = (m[i][k] + m[k][i]) * s;
00241 }
00242 AI_Quaternion q(qa[0],qa[1],qa[2],qa[3]);
00243 return q;
00244 }
00245
00246
00249 inline
00250 void
00251 AI_TMatrix44SSE::set(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4)
00252 {
00253 m1 = v1.m128;
00254 m2 = v2.m128;
00255 m3 = v3.m128;
00256 m4 = v4.m128;
00257 }
00258
00259
00262 inline
00263 void
00264 AI_TMatrix44SSE::set(const AI_TMatrix44SSE& mx)
00265 {
00266 m1 = mx.m1;
00267 m2 = mx.m2;
00268 m3 = mx.m3;
00269 m4 = mx.m4;
00270 }
00271
00272
00275 inline
00276 void
00277 AI_TMatrix44SSE::set(float _m11, float _m12, float _m13, float _m14,
00278 float _m21, float _m22, float _m23, float _m24,
00279 float _m31, float _m32, float _m33, float _m34,
00280 float _m41, float _m42, float _m43, float _m44)
00281 {
00282 m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00283 m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00284 m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00285 m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00286 }
00287
00288
00292 inline
00293 void
00294 AI_TMatrix44SSE::set(const AI_Quaternion& q)
00295 {
00296 float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00297 x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00298 xx = q.x * x2; xy = q.x * y2; xz = q.x * z2;
00299 yy = q.y * y2; yz = q.y * z2; zz = q.z * z2;
00300 wx = q.w * x2; wy = q.w * y2; wz = q.w * z2;
00301
00302 m[0][0] = 1.0f - (yy + zz);
00303 m[1][0] = xy - wz;
00304 m[2][0] = xz + wy;
00305
00306 m[0][1] = xy + wz;
00307 m[1][1] = 1.0f - (xx + zz);
00308 m[2][1] = yz - wx;
00309
00310 m[0][2] = xz - wy;
00311 m[1][2] = yz + wx;
00312 m[2][2] = 1.0f - (xx + yy);
00313
00314 m[3][0] = m[3][1] = m[3][2] = 0.0f;
00315 m[0][3] = m[1][3] = m[2][3] = 0.0f;
00316 m[3][3] = 1.0f;
00317 }
00318
00319
00322 inline
00323 void
00324 AI_TMatrix44SSE::ident()
00325 {
00326 memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00327 }
00328
00329
00332 inline
00333 void
00334 AI_TMatrix44SSE::transpose()
00335 {
00336 _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
00337 }
00338
00339
00343 inline
00344 float
00345 AI_TMatrix44SSE::det()
00346 {
00347 return
00348 (m[0][0] * m[1][1] - m[0][1] * m[1][0]) * (m[2][2] * m[3][3] - m[2][3] * m[3][2])
00349 -(m[0][0] * m[1][2] - m[0][2] * m[1][0]) * (m[2][1] * m[3][3] - m[2][3] * m[3][1])
00350 +(m[0][0] * m[1][3] - m[0][3] * m[1][0]) * (m[2][1] * m[3][2] - m[2][2] * m[3][1])
00351 +(m[0][1] * m[1][2] - m[0][2] * m[1][1]) * (m[2][0] * m[3][3] - m[2][3] * m[3][0])
00352 -(m[0][1] * m[1][3] - m[0][3] * m[1][1]) * (m[2][0] * m[3][2] - m[2][2] * m[3][0])
00353 +(m[0][2] * m[1][3] - m[0][3] * m[1][2]) * (m[2][0] * m[3][1] - m[2][1] * m[3][0]);
00354 }
00355
00356
00360 inline
00361 void
00362 AI_TMatrix44SSE::invert()
00363 {
00364 float* src = &(m[0][0]);
00365
00366 __m128 minor0, minor1, minor2, minor3;
00367 __m128 row0, row1, row2, row3;
00368 __m128 det, tmp1;
00369
00370 tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src)), (__m64*)(src+ 4));
00371 row1 = _mm_loadh_pi(_mm_loadl_pi(row1, (__m64*)(src+8)), (__m64*)(src+12));
00372
00373 row0 = _mm_shuffle_ps(tmp1, row1, 0x88);
00374 row1 = _mm_shuffle_ps(row1, tmp1, 0xDD);
00375
00376 tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6));
00377 row3 = _mm_loadh_pi(_mm_loadl_pi(row3, (__m64*)(src+10)), (__m64*)(src+14));
00378
00379 row2 = _mm_shuffle_ps(tmp1, row3, 0x88);
00380 row3 = _mm_shuffle_ps(row3, tmp1, 0xDD);
00381
00382 tmp1 = _mm_mul_ps(row2, row3);
00383 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00384
00385 minor0 = _mm_mul_ps(row1, tmp1);
00386 minor1 = _mm_mul_ps(row0, tmp1);
00387
00388 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00389
00390 minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
00391 minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
00392 minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E);
00393
00394 tmp1 = _mm_mul_ps(row1, row2);
00395 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00396
00397 minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
00398 minor3 = _mm_mul_ps(row0, tmp1);
00399
00400 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00401
00402 minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
00403 minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
00404 minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E);
00405
00406 tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3);
00407 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00408 row2 = _mm_shuffle_ps(row2, row2, 0x4E);
00409
00410 minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
00411 minor2 = _mm_mul_ps(row0, tmp1);
00412
00413 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00414
00415 minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
00416 minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
00417 minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E);
00418
00419 tmp1 = _mm_mul_ps(row0, row1);
00420 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00421
00422 minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
00423 minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
00424
00425 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00426
00427 minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
00428 minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
00429
00430 tmp1 = _mm_mul_ps(row0, row3);
00431 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00432
00433 minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
00434 minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
00435
00436 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00437
00438 minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
00439 minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
00440
00441 tmp1 = _mm_mul_ps(row0, row2);
00442 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00443
00444 minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
00445 minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
00446
00447 tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00448
00449 minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
00450 minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
00451
00452 det = _mm_mul_ps(row0, minor0);
00453 det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det);
00454 det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det);
00455 tmp1 = _mm_rcp_ss(det);
00456
00457 det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
00458 det = _mm_shuffle_ps(det, det, 0x00);
00459
00460 minor0 = _mm_mul_ps(det, minor0);
00461 _mm_storel_pi((__m64*)(src), minor0);
00462 _mm_storeh_pi((__m64*)(src+2), minor0);
00463
00464 minor1 = _mm_mul_ps(det, minor1);
00465 _mm_storel_pi((__m64*)(src+4), minor1);
00466 _mm_storeh_pi((__m64*)(src+6), minor1);
00467
00468 minor2 = _mm_mul_ps(det, minor2);
00469 _mm_storel_pi((__m64*)(src+ 8), minor2);
00470 _mm_storeh_pi((__m64*)(src+10), minor2);
00471
00472 minor3 = _mm_mul_ps(det, minor3);
00473 _mm_storel_pi((__m64*)(src+12), minor3);
00474 _mm_storeh_pi((__m64*)(src+14), minor3);
00475 }
00476
00477
00485 inline
00486 void
00487 AI_TMatrix44SSE::invert_simple()
00488 {
00489 float s = det();
00490 if (s == 0.0f) return;
00491 s = 1.0f/s;
00492 this->set(
00493 s * ((m[1][1] * m[2][2]) - (m[1][2] * m[2][1])),
00494 s * ((m[2][1] * m[0][2]) - (m[2][2] * m[0][1])),
00495 s * ((m[0][1] * m[1][2]) - (m[0][2] * m[1][1])),
00496 0.0f,
00497 s * ((m[1][2] * m[2][0]) - (m[1][0] * m[2][2])),
00498 s * ((m[2][2] * m[0][0]) - (m[2][0] * m[0][2])),
00499 s * ((m[0][2] * m[1][0]) - (m[0][0] * m[1][2])),
00500 0.0f,
00501 s * ((m[1][0] * m[2][1]) - (m[1][1] * m[2][0])),
00502 s * ((m[2][0] * m[0][1]) - (m[2][1] * m[0][0])),
00503 s * ((m[0][0] * m[1][1]) - (m[0][1] * m[1][0])),
00504 0.0f,
00505 s * (m[1][0]*(m[2][2]*m[3][1] - m[2][1]*m[3][2]) + m[1][1]*(m[2][0]*m[3][2] - m[2][2]*m[3][0]) + m[1][2]*(m[2][1]*m[3][0] - m[2][0]*m[3][1])),
00506 s * (m[2][0]*(m[0][2]*m[3][1] - m[0][1]*m[3][2]) + m[2][1]*(m[0][0]*m[3][2] - m[0][2]*m[3][0]) + m[2][2]*(m[0][1]*m[3][0] - m[0][0]*m[3][1])),
00507 s * (m[3][0]*(m[0][2]*m[1][1] - m[0][1]*m[1][2]) + m[3][1]*(m[0][0]*m[1][2] - m[0][2]*m[1][0]) + m[3][2]*(m[0][1]*m[1][0] - m[0][0]*m[1][1])),
00508 1.0f);
00509 }
00510
00511
00519 inline
00520 void
00521 AI_TMatrix44SSE::mult_simple(const AI_TMatrix44SSE& mx)
00522 {
00523 m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00524 m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00525 m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00526 m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00527 }
00528
00529
00535 inline
00536 AI_TVector3SSE
00537 AI_TMatrix44SSE::transform_coord(const AI_TVector3SSE& v) const
00538 {
00539 float d = 1.0f / (m[0][3]*v.x + m[1][3]*v.y + m[2][3]*v.z + m[3][3]);
00540 return AI_TVector3SSE(
00541 (m[0][0]*v.x + m[1][0]*v.y + m[2][0]*v.z + m[3][0]) * d,
00542 (m[0][1]*v.x + m[1][1]*v.y + m[2][1]*v.z + m[3][1]) * d,
00543 (m[0][2]*v.x + m[1][2]*v.y + m[2][2]*v.z + m[3][2]) * d);
00544 }
00545
00546
00549 inline
00550 AI_TVector3SSE
00551 AI_TMatrix44SSE::x_component() const
00552 {
00553 AI_TVector3SSE v(m1);
00554 return v;
00555 }
00556
00557
00560 inline
00561 AI_TVector3SSE
00562 AI_TMatrix44SSE::y_component() const
00563 {
00564 AI_TVector3SSE v(m2);
00565 return v;
00566 }
00567
00568
00571 inline
00572 AI_TVector3SSE
00573 AI_TMatrix44SSE::z_component() const
00574 {
00575 AI_TVector3SSE v(m3);
00576 return v;
00577 }
00578
00579
00582 inline
00583 AI_TVector3SSE
00584 AI_TMatrix44SSE::pos_component() const
00585 {
00586 AI_TVector3SSE v(m[3][0], m[3][1], m[3][2]);
00587 return v;
00588 }
00589
00590
00594 inline
00595 void
00596 AI_TMatrix44SSE::rotate_x(const float a)
00597 {
00598 float c = ai_cos(a);
00599 float s = ai_sin(a);
00600 int i;
00601 for (i=0; i<4; i++) {
00602 float mi1 = m[i][1];
00603 float mi2 = m[i][2];
00604 m[i][1] = mi1*c + mi2*-s;
00605 m[i][2] = mi1*s + mi2*c;
00606 }
00607 }
00608
00609
00613 inline
00614 void
00615 AI_TMatrix44SSE::rotate_y(const float a)
00616 {
00617 float c = ai_cos(a);
00618 float s = ai_sin(a);
00619 int i;
00620 for (i=0; i<4; i++) {
00621 float mi0 = m[i][0];
00622 float mi2 = m[i][2];
00623 m[i][0] = mi0*c + mi2*s;
00624 m[i][2] = mi0*-s + mi2*c;
00625 }
00626 }
00627
00628
00632 inline
00633 void
00634 AI_TMatrix44SSE::rotate_z(const float a)
00635 {
00636 float c = ai_cos(a);
00637 float s = ai_sin(a);
00638 int i;
00639 for (i=0; i<4; i++) {
00640 float mi0 = m[i][0];
00641 float mi1 = m[i][1];
00642 m[i][0] = mi0*c + mi1*-s;
00643 m[i][1] = mi0*s + mi1*c;
00644 }
00645 }
00646
00647
00650 inline
00651 void
00652 AI_TMatrix44SSE::translate(const AI_TVector3SSE& t)
00653 {
00654 m4 = _mm_add_ps(m4, t.m128);
00655 }
00656
00657
00661 inline
00662 void
00663 AI_TMatrix44SSE::set_translation(const AI_TVector3SSE& t)
00664 {
00665 m4 = t.m128;
00666 };
00667
00668
00671 inline
00672 void
00673 AI_TMatrix44SSE::scale(const AI_TVector3SSE& s)
00674 {
00675
00676 __m128 scale = _mm_add_ps(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f), s.m128);
00677 m1 = _mm_mul_ps(m1, scale);
00678 m2 = _mm_mul_ps(m2, scale);
00679 m3 = _mm_mul_ps(m3, scale);
00680 m4 = _mm_mul_ps(m4, scale);
00681 }
00682
00683
00686 inline
00687 void
00688 AI_TMatrix44SSE::lookat(const AI_TVector3SSE& to, const AI_TVector3SSE& up)
00689 {
00690 AI_TVector3SSE from(m[3][0], m[3][1], m[3][2]);
00691 AI_TVector3SSE z(from - to);
00692 z.norm();
00693 AI_TVector3SSE y(up);
00694 AI_TVector3SSE x(y * z);
00695 y = z * x;
00696 x.norm();
00697 y.norm();
00698
00699 m1 = x.m128;
00700 m2 = y.m128;
00701 m3 = z.m128;
00702 }
00703
00704
00707 inline
00708 void
00709 AI_TMatrix44SSE::billboard(const AI_TVector3SSE& to, const AI_TVector3SSE& up)
00710 {
00711 AI_TVector3SSE from(m[3][0], m[3][1], m[3][2]);
00712 AI_TVector3SSE z(from - to);
00713 z.norm();
00714 AI_TVector3SSE y(up);
00715 AI_TVector3SSE x(y * z);
00716 z = x * y;
00717 x.norm();
00718 y.norm();
00719 z.norm();
00720
00721 m1 = x.m128;
00722 m2 = y.m128;
00723 m3 = z.m128;
00724 }
00725
00726
00732 inline
00733 void
00734 AI_TMatrix44SSE::operator *= (const AI_TMatrix44SSE& mx)
00735 {
00736 m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00737 m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00738 m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00739 m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00740 }
00741
00742
00746 inline
00747 void
00748 AI_TMatrix44SSE::rotate(const AI_TVector3SSE& vec, float a)
00749 {
00750 AI_TVector3SSE v(vec);
00751 v.norm();
00752 float sa = (float) ai_sin(a);
00753 float ca = (float) ai_cos(a);
00754
00755 AI_TMatrix44SSE rotM;
00756 rotM.m[0][0] = ca + (1.0f - ca) * v.x * v.x;
00757 rotM.m[0][1] = (1.0f - ca) * v.x * v.y - sa * v.z;
00758 rotM.m[0][2] = (1.0f - ca) * v.z * v.x + sa * v.y;
00759 rotM.m[1][0] = (1.0f - ca) * v.x * v.y + sa * v.z;
00760 rotM.m[1][1] = ca + (1.0f - ca) * v.y * v.y;
00761 rotM.m[1][2] = (1.0f - ca) * v.y * v.z - sa * v.x;
00762 rotM.m[2][0] = (1.0f - ca) * v.z * v.x - sa * v.y;
00763 rotM.m[2][1] = (1.0f - ca) * v.y * v.z + sa * v.x;
00764 rotM.m[2][2] = ca + (1.0f - ca) * v.z * v.z;
00765
00766 (*this) *= rotM;
00767 }
00768
00769
00774 inline
00775 void
00776 AI_TMatrix44SSE::mult(const AI_TVector4SSE& src, AI_TVector4SSE& dst) const
00777 {
00778 dst.m128 = _mm_add_ps(
00779 _mm_add_ps(
00780 _mm_add_ps(
00781 _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))),
00782 _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))),
00783 _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))),
00784 _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00785 }
00786
00787
00792 inline
00793 void
00794 AI_TMatrix44SSE::mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const
00795 {
00796 dst.m128 = _mm_add_ps(
00797 _mm_add_ps(
00798 _mm_add_ps(
00799 _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))),
00800 _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))),
00801 _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))),
00802 _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00803 }
00804
00805
00808 static
00809 inline
00810 AI_TMatrix44SSE
00811 operator * (const AI_TMatrix44SSE& ma, const AI_TMatrix44SSE& mb)
00812 {
00813 return AI_TMatrix44SSE(
00814 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00815 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00816 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00817 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(3,3,3,3)), mb.m4))
00818 );
00819 }
00820
00821
00824 static
00825 inline
00826 AI_TVector3SSE operator * (const AI_TMatrix44SSE& m, const AI_TVector3SSE& v)
00827 {
00828 return AI_TVector3SSE(
00829 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f)))
00830 );
00831 }
00832
00833
00836 static
00837 inline
00838 AI_TVector4SSE operator * (const AI_TMatrix44SSE& m, const AI_TVector4SSE& v)
00839 {
00840 return AI_TVector4SSE(
00841 _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(3,3,3,3))))
00842 );
00843 }
00844
00845
00846 #endif