AI_TMatrix44_sse.h

Go to the documentation of this file.
00001 #ifndef AI__MATRIX44_SSE_H
00002 #define AI__MATRIX44_SSE_H
00003 //------------------------------------------------------------------------------
00012 #include <xmmintrin.h>
00013 #include "AI_TVector4SSE.h"
00014 #include "AI_TVector3SSE.h"
00015 #include "AI_Quaternion.h"
00016 #include "AI_Euler.h"
00017 #include "AI_Matrixdefs.h"
00018 
00019 static float _matrix44_sse_ident[16] = 
00020 {
00021     1.0f, 0.0f, 0.0f, 0.0f,
00022     0.0f, 1.0f, 0.0f, 0.0f,
00023     0.0f, 0.0f, 1.0f, 0.0f,
00024     0.0f, 0.0f, 0.0f, 1.0f,
00025 };
00026 
00027 class AI_TMatrix44SSE
00028 {
00029 public:
00031     AI_TMatrix44SSE();
00033     AI_TMatrix44SSE(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4);
00035     AI_TMatrix44SSE(const AI_TMatrix44SSE& m1);
00037     AI_TMatrix44SSE(float _m11, float _m12, float _m13, float _m14,
00038                   float _m21, float _m22, float _m23, float _m24,
00039                   float _m31, float _m32, float _m33, float _m34,
00040                   float _m41, float _m42, float _m43, float _m44);
00042     AI_TMatrix44SSE(const AI_Quaternion& q);
00044     AI_TMatrix44SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4);
00046     AI_Quaternion get_quaternion() const;
00048     void set(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4);
00050     void set(const AI_TMatrix44SSE& m1);
00052     void set(float _m11, float _m12, float _m13, float _m14,
00053              float _m21, float _m22, float _m23, float _m24,
00054              float _m31, float _m32, float _m33, float _m34,
00055              float _m41, float _m42, float _m43, float _m44);
00057     void set(const AI_Quaternion& q);
00059     void ident();
00061     void transpose();
00063     float det();
00065     void invert(void);
00067     void invert_simple(void);
00069     void mult_simple(const AI_TMatrix44SSE& m1);
00071     AI_TVector3SSE transform_coord(const AI_TVector3SSE& v) const;
00073     AI_TVector3SSE x_component() const;
00075     AI_TVector3SSE y_component() const;
00077     AI_TVector3SSE z_component() const;
00079     AI_TVector3SSE pos_component() const;
00081     void rotate_x(const float a);
00083     void rotate_y(const float a);
00085     void rotate_z(const float a);
00087     void rotate(const AI_TVector3SSE& vec, float a);
00089     void translate(const AI_TVector3SSE& t);
00091     void set_translation(const AI_TVector3SSE& t);
00093     void scale(const AI_TVector3SSE& s);
00095     void lookat(const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00097     void billboard(const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00099     void operator *= (const AI_TMatrix44SSE& m1);
00101     void mult(const AI_TVector4SSE& src, AI_TVector4SSE& dst) const;
00103     void mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const;
00104 
00105     union
00106     {
00107         struct
00108         {
00109             __m128 m1;
00110             __m128 m2;
00111             __m128 m3;
00112             __m128 m4;
00113         };
00114         struct
00115         {
00116             float m[4][4];
00117         };
00118     };
00119 };
00120 
00121 //------------------------------------------------------------------------------
00124 inline
00125 AI_TMatrix44SSE::AI_TMatrix44SSE()
00126 {
00127     memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00128 }
00129 
00130 //------------------------------------------------------------------------------
00133 inline
00134 AI_TMatrix44SSE::AI_TMatrix44SSE(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4) :
00135     m1(v1.m128), m2(v2.m128), m3(v3.m128), m4(v4.m128)
00136 {
00137     // empty
00138 }
00139 
00140 //------------------------------------------------------------------------------
00143 inline
00144 AI_TMatrix44SSE::AI_TMatrix44SSE(const AI_TMatrix44SSE& mx) :
00145     m1(mx.m1), m2(mx.m2), m3(mx.m3), m4(mx.m4)
00146 {
00147     // empty
00148 }
00149 
00150 //------------------------------------------------------------------------------
00153 inline
00154 AI_TMatrix44SSE::AI_TMatrix44SSE(float _m11, float _m12, float _m13, float _m14,
00155                              float _m21, float _m22, float _m23, float _m24,
00156                              float _m31, float _m32, float _m33, float _m34,
00157                              float _m41, float _m42, float _m43, float _m44)
00158 {
00159     m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00160     m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00161     m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00162     m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00163 }
00164 
00165 //------------------------------------------------------------------------------
00169 inline
00170 AI_TMatrix44SSE::AI_TMatrix44SSE(const AI_Quaternion& q) 
00171 {
00172     float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00173     x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00174     xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
00175     yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
00176     wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
00177 
00178     m[0][0] = 1.0f - (yy + zz);
00179     m[1][0] = xy - wz;
00180     m[2][0] = xz + wy;
00181 
00182     m[0][1] = xy + wz;
00183     m[1][1] = 1.0f - (xx + zz);
00184     m[2][1] = yz - wx;
00185 
00186     m[0][2] = xz - wy;
00187     m[1][2] = yz + wx;
00188     m[2][2] = 1.0f - (xx + yy);
00189 
00190     m[3][0] = m[3][1] = m[3][2] = 0.0f;
00191     m[0][3] = m[1][3] = m[2][3] = 0.0f;
00192     m[3][3] = 1.0f;
00193 }
00194 
00195 //------------------------------------------------------------------------------
00198 inline
00199 AI_TMatrix44SSE::AI_TMatrix44SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3, const __m128& _m4) :
00200     m1(_m1), m2(_m2), m3(_m3), m4(_m4)
00201 {
00202     // empty
00203 }
00204 
00205 //------------------------------------------------------------------------------
00212 inline
00213 AI_Quaternion 
00214 AI_TMatrix44SSE::get_quaternion() const
00215 {
00216     float qa[4];
00217     float tr = m[0][0] + m[1][1] + m[2][2];
00218     if (tr > 0.0f) 
00219     {
00220         float s = ai_sqrt (tr + 1.0f);
00221         qa[3] = s * 0.5f;
00222         s = 0.5f / s;
00223         qa[0] = (m[1][2] - m[2][1]) * s;
00224         qa[1] = (m[2][0] - m[0][2]) * s;
00225         qa[2] = (m[0][1] - m[1][0]) * s;
00226     } 
00227     else 
00228     {
00229         int i, j, k, nxt[3] = {1,2,0};
00230         i = 0;
00231         if (m[1][1] > m[0][0]) i=1;
00232         if (m[2][2] > m[i][i]) i=2;
00233         j = nxt[i];
00234         k = nxt[j];
00235         float s = ai_sqrt((m[i][i] - (m[j][j] + m[k][k])) + 1.0f);
00236         qa[i] = s * 0.5f;
00237         s = 0.5f / s;
00238         qa[3] = (m[j][k] - m[k][j])* s;
00239         qa[j] = (m[i][j] + m[j][i]) * s;
00240         qa[k] = (m[i][k] + m[k][i]) * s;
00241     }
00242     AI_Quaternion q(qa[0],qa[1],qa[2],qa[3]);
00243     return q;
00244 }
00245 
00246 //------------------------------------------------------------------------------
00249 inline
00250 void 
00251 AI_TMatrix44SSE::set(const AI_TVector4SSE& v1, const AI_TVector4SSE& v2, const AI_TVector4SSE& v3, const AI_TVector4SSE& v4) 
00252 {
00253     m1 = v1.m128;
00254     m2 = v2.m128;
00255     m3 = v3.m128;
00256     m4 = v4.m128;
00257 }
00258 
00259 //------------------------------------------------------------------------------
00262 inline
00263 void 
00264 AI_TMatrix44SSE::set(const AI_TMatrix44SSE& mx) 
00265 {
00266     m1 = mx.m1;
00267     m2 = mx.m2;
00268     m3 = mx.m3;
00269     m4 = mx.m4;
00270 }
00271 
00272 //------------------------------------------------------------------------------
00275 inline
00276 void
00277 AI_TMatrix44SSE::set(float _m11, float _m12, float _m13, float _m14,
00278                    float _m21, float _m22, float _m23, float _m24,
00279                    float _m31, float _m32, float _m33, float _m34,
00280                    float _m41, float _m42, float _m43, float _m44)
00281 {
00282     m1 = _mm_set_ps(_m14, _m13, _m12, _m11);
00283     m2 = _mm_set_ps(_m24, _m23, _m22, _m21);
00284     m3 = _mm_set_ps(_m34, _m33, _m32, _m31);
00285     m4 = _mm_set_ps(_m44, _m43, _m42, _m41);
00286 }
00287 
00288 //------------------------------------------------------------------------------
00292 inline
00293 void 
00294 AI_TMatrix44SSE::set(const AI_Quaternion& q) 
00295 {
00296     float wx, wy, wz, xx, yy, yz, xy, xz, zz, x2, y2, z2;
00297     x2 = q.x + q.x; y2 = q.y + q.y; z2 = q.z + q.z;
00298     xx = q.x * x2;   xy = q.x * y2;   xz = q.x * z2;
00299     yy = q.y * y2;   yz = q.y * z2;   zz = q.z * z2;
00300     wx = q.w * x2;   wy = q.w * y2;   wz = q.w * z2;
00301 
00302     m[0][0] = 1.0f - (yy + zz);
00303     m[1][0] = xy - wz;
00304     m[2][0] = xz + wy;
00305 
00306     m[0][1] = xy + wz;
00307     m[1][1] = 1.0f - (xx + zz);
00308     m[2][1] = yz - wx;
00309 
00310     m[0][2] = xz - wy;
00311     m[1][2] = yz + wx;
00312     m[2][2] = 1.0f - (xx + yy);
00313 
00314     m[3][0] = m[3][1] = m[3][2] = 0.0f;
00315     m[0][3] = m[1][3] = m[2][3] = 0.0f;
00316     m[3][3] = 1.0f;
00317 }
00318 
00319 //------------------------------------------------------------------------------
00322 inline
00323 void 
00324 AI_TMatrix44SSE::ident() 
00325 {
00326     memcpy(&(m[0][0]), _matrix44_sse_ident, sizeof(_matrix44_sse_ident));
00327 }
00328 
00329 //------------------------------------------------------------------------------
00332 inline
00333 void 
00334 AI_TMatrix44SSE::transpose() 
00335 {
00336     _MM_TRANSPOSE4_PS(m1, m2, m3, m4);
00337 }
00338 
00339 //------------------------------------------------------------------------------
00343 inline
00344 float 
00345 AI_TMatrix44SSE::det() 
00346 {
00347     return
00348         (m[0][0] * m[1][1] - m[0][1] * m[1][0]) * (m[2][2] * m[3][3] - m[2][3] * m[3][2])
00349        -(m[0][0] * m[1][2] - m[0][2] * m[1][0]) * (m[2][1] * m[3][3] - m[2][3] * m[3][1])
00350        +(m[0][0] * m[1][3] - m[0][3] * m[1][0]) * (m[2][1] * m[3][2] - m[2][2] * m[3][1])
00351        +(m[0][1] * m[1][2] - m[0][2] * m[1][1]) * (m[2][0] * m[3][3] - m[2][3] * m[3][0])
00352        -(m[0][1] * m[1][3] - m[0][3] * m[1][1]) * (m[2][0] * m[3][2] - m[2][2] * m[3][0])
00353        +(m[0][2] * m[1][3] - m[0][3] * m[1][2]) * (m[2][0] * m[3][1] - m[2][1] * m[3][0]);
00354 }
00355 
00356 //------------------------------------------------------------------------------
00360 inline
00361 void
00362 AI_TMatrix44SSE::invert() 
00363 {
00364     float* src = &(m[0][0]);
00365 
00366     __m128 minor0, minor1, minor2, minor3;
00367     __m128 row0, row1, row2, row3;
00368     __m128 det, tmp1;
00369 
00370     tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src)), (__m64*)(src+ 4));
00371     row1 = _mm_loadh_pi(_mm_loadl_pi(row1, (__m64*)(src+8)), (__m64*)(src+12));
00372 
00373     row0 = _mm_shuffle_ps(tmp1, row1, 0x88);
00374     row1 = _mm_shuffle_ps(row1, tmp1, 0xDD);
00375 
00376     tmp1 = _mm_loadh_pi(_mm_loadl_pi(tmp1, (__m64*)(src+ 2)), (__m64*)(src+ 6));
00377     row3 = _mm_loadh_pi(_mm_loadl_pi(row3, (__m64*)(src+10)), (__m64*)(src+14));
00378 
00379     row2 = _mm_shuffle_ps(tmp1, row3, 0x88);
00380     row3 = _mm_shuffle_ps(row3, tmp1, 0xDD);
00381 
00382     tmp1 = _mm_mul_ps(row2, row3);
00383     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00384 
00385     minor0 = _mm_mul_ps(row1, tmp1);
00386     minor1 = _mm_mul_ps(row0, tmp1);
00387 
00388     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00389 
00390     minor0 = _mm_sub_ps(_mm_mul_ps(row1, tmp1), minor0);
00391     minor1 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor1);
00392     minor1 = _mm_shuffle_ps(minor1, minor1, 0x4E);
00393 
00394     tmp1 = _mm_mul_ps(row1, row2);
00395     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00396 
00397     minor0 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor0);
00398     minor3 = _mm_mul_ps(row0, tmp1);
00399 
00400     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00401     
00402     minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row3, tmp1));
00403     minor3 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor3);
00404     minor3 = _mm_shuffle_ps(minor3, minor3, 0x4E);
00405 
00406     tmp1 = _mm_mul_ps(_mm_shuffle_ps(row1, row1, 0x4E), row3);
00407     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00408     row2 = _mm_shuffle_ps(row2, row2, 0x4E);
00409 
00410     minor0 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor0);
00411     minor2 = _mm_mul_ps(row0, tmp1);
00412 
00413     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00414 
00415     minor0 = _mm_sub_ps(minor0, _mm_mul_ps(row2, tmp1));
00416     minor2 = _mm_sub_ps(_mm_mul_ps(row0, tmp1), minor2);
00417     minor2 = _mm_shuffle_ps(minor2, minor2, 0x4E);
00418 
00419     tmp1 = _mm_mul_ps(row0, row1);
00420     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00421 
00422     minor2 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor2);
00423     minor3 = _mm_sub_ps(_mm_mul_ps(row2, tmp1), minor3);
00424 
00425     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00426 
00427     minor2 = _mm_sub_ps(_mm_mul_ps(row3, tmp1), minor2);
00428     minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row2, tmp1));
00429 
00430     tmp1 = _mm_mul_ps(row0, row3);
00431     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00432 
00433     minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row2, tmp1));
00434     minor2 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor2);
00435 
00436     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00437 
00438     minor1 = _mm_add_ps(_mm_mul_ps(row2, tmp1), minor1);
00439     minor2 = _mm_sub_ps(minor2, _mm_mul_ps(row1, tmp1));
00440 
00441     tmp1 = _mm_mul_ps(row0, row2);
00442     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0xB1);
00443 
00444     minor1 = _mm_add_ps(_mm_mul_ps(row3, tmp1), minor1);
00445     minor3 = _mm_sub_ps(minor3, _mm_mul_ps(row1, tmp1));
00446 
00447     tmp1 = _mm_shuffle_ps(tmp1, tmp1, 0x4E);
00448 
00449     minor1 = _mm_sub_ps(minor1, _mm_mul_ps(row3, tmp1));
00450     minor3 = _mm_add_ps(_mm_mul_ps(row1, tmp1), minor3);
00451 
00452     det = _mm_mul_ps(row0, minor0);
00453     det = _mm_add_ps(_mm_shuffle_ps(det, det, 0x4E), det);
00454     det = _mm_add_ss(_mm_shuffle_ps(det, det, 0xB1), det);
00455     tmp1 = _mm_rcp_ss(det);
00456 
00457     det = _mm_sub_ss(_mm_add_ss(tmp1, tmp1), _mm_mul_ss(det, _mm_mul_ss(tmp1, tmp1)));
00458     det = _mm_shuffle_ps(det, det, 0x00);
00459 
00460     minor0 = _mm_mul_ps(det, minor0);
00461     _mm_storel_pi((__m64*)(src), minor0);
00462     _mm_storeh_pi((__m64*)(src+2), minor0);
00463 
00464     minor1 = _mm_mul_ps(det, minor1);
00465     _mm_storel_pi((__m64*)(src+4), minor1);
00466     _mm_storeh_pi((__m64*)(src+6), minor1);
00467 
00468     minor2 = _mm_mul_ps(det, minor2);
00469     _mm_storel_pi((__m64*)(src+ 8), minor2);
00470     _mm_storeh_pi((__m64*)(src+10), minor2);
00471 
00472     minor3 = _mm_mul_ps(det, minor3);
00473     _mm_storel_pi((__m64*)(src+12), minor3);
00474     _mm_storeh_pi((__m64*)(src+14), minor3);
00475 }
00476 
00477 //------------------------------------------------------------------------------
00485 inline
00486 void 
00487 AI_TMatrix44SSE::invert_simple() 
00488 {
00489     float s = det();
00490     if (s == 0.0f) return;
00491     s = 1.0f/s;
00492     this->set(
00493         s * ((m[1][1] * m[2][2]) - (m[1][2] * m[2][1])),
00494         s * ((m[2][1] * m[0][2]) - (m[2][2] * m[0][1])),
00495         s * ((m[0][1] * m[1][2]) - (m[0][2] * m[1][1])),
00496         0.0f,
00497         s * ((m[1][2] * m[2][0]) - (m[1][0] * m[2][2])),
00498         s * ((m[2][2] * m[0][0]) - (m[2][0] * m[0][2])),
00499         s * ((m[0][2] * m[1][0]) - (m[0][0] * m[1][2])),
00500         0.0f,
00501         s * ((m[1][0] * m[2][1]) - (m[1][1] * m[2][0])),
00502         s * ((m[2][0] * m[0][1]) - (m[2][1] * m[0][0])),
00503         s * ((m[0][0] * m[1][1]) - (m[0][1] * m[1][0])),
00504         0.0f,
00505         s * (m[1][0]*(m[2][2]*m[3][1] - m[2][1]*m[3][2]) + m[1][1]*(m[2][0]*m[3][2] - m[2][2]*m[3][0]) + m[1][2]*(m[2][1]*m[3][0] - m[2][0]*m[3][1])),
00506         s * (m[2][0]*(m[0][2]*m[3][1] - m[0][1]*m[3][2]) + m[2][1]*(m[0][0]*m[3][2] - m[0][2]*m[3][0]) + m[2][2]*(m[0][1]*m[3][0] - m[0][0]*m[3][1])),
00507         s * (m[3][0]*(m[0][2]*m[1][1] - m[0][1]*m[1][2]) + m[3][1]*(m[0][0]*m[1][2] - m[0][2]*m[1][0]) + m[3][2]*(m[0][1]*m[1][0] - m[0][0]*m[1][1])),
00508         1.0f);
00509 }
00510 
00511 //------------------------------------------------------------------------------
00519 inline
00520 void
00521 AI_TMatrix44SSE::mult_simple(const AI_TMatrix44SSE& mx) 
00522 {
00523     m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00524     m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00525     m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00526     m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00527 }
00528 
00529 //------------------------------------------------------------------------------
00535 inline
00536 AI_TVector3SSE
00537 AI_TMatrix44SSE::transform_coord(const AI_TVector3SSE& v) const
00538 {
00539     float d = 1.0f / (m[0][3]*v.x + m[1][3]*v.y + m[2][3]*v.z + m[3][3]);
00540     return AI_TVector3SSE(
00541         (m[0][0]*v.x + m[1][0]*v.y + m[2][0]*v.z + m[3][0]) * d,
00542         (m[0][1]*v.x + m[1][1]*v.y + m[2][1]*v.z + m[3][1]) * d,
00543         (m[0][2]*v.x + m[1][2]*v.y + m[2][2]*v.z + m[3][2]) * d);
00544 }
00545 
00546 //------------------------------------------------------------------------------
00549 inline
00550 AI_TVector3SSE 
00551 AI_TMatrix44SSE::x_component() const
00552 {
00553     AI_TVector3SSE v(m1);
00554     return v;
00555 }
00556 
00557 //------------------------------------------------------------------------------
00560 inline
00561 AI_TVector3SSE 
00562 AI_TMatrix44SSE::y_component() const
00563 {
00564     AI_TVector3SSE v(m2);
00565     return v;
00566 }
00567 
00568 //------------------------------------------------------------------------------
00571 inline
00572 AI_TVector3SSE 
00573 AI_TMatrix44SSE::z_component() const 
00574 {
00575     AI_TVector3SSE v(m3);
00576     return v;
00577 }
00578 
00579 //------------------------------------------------------------------------------
00582 inline
00583 AI_TVector3SSE 
00584 AI_TMatrix44SSE::pos_component() const 
00585 {
00586     AI_TVector3SSE v(m[3][0], m[3][1], m[3][2]);
00587     return v;
00588 }
00589 
00590 //------------------------------------------------------------------------------
00594 inline
00595 void
00596 AI_TMatrix44SSE::rotate_x(const float a) 
00597 {
00598     float c = ai_cos(a);
00599     float s = ai_sin(a);
00600     int i;
00601     for (i=0; i<4; i++) {
00602         float mi1 = m[i][1];
00603         float mi2 = m[i][2];
00604         m[i][1] = mi1*c + mi2*-s;
00605         m[i][2] = mi1*s + mi2*c;
00606     }
00607 }
00608 
00609 //------------------------------------------------------------------------------
00613 inline
00614 void 
00615 AI_TMatrix44SSE::rotate_y(const float a) 
00616 {
00617     float c = ai_cos(a);
00618     float s = ai_sin(a);
00619     int i;
00620     for (i=0; i<4; i++) {
00621         float mi0 = m[i][0];
00622         float mi2 = m[i][2];
00623         m[i][0] = mi0*c + mi2*s;
00624         m[i][2] = mi0*-s + mi2*c;
00625     }
00626 }
00627 
00628 //------------------------------------------------------------------------------
00632 inline
00633 void 
00634 AI_TMatrix44SSE::rotate_z(const float a) 
00635 {
00636     float c = ai_cos(a);
00637     float s = ai_sin(a);
00638     int i;
00639     for (i=0; i<4; i++) {
00640         float mi0 = m[i][0];
00641         float mi1 = m[i][1];
00642         m[i][0] = mi0*c + mi1*-s;
00643         m[i][1] = mi0*s + mi1*c;
00644     }
00645 }
00646 
00647 //------------------------------------------------------------------------------
00650 inline
00651 void 
00652 AI_TMatrix44SSE::translate(const AI_TVector3SSE& t) 
00653 {
00654     m4 = _mm_add_ps(m4, t.m128);
00655 }
00656 
00657 //------------------------------------------------------------------------------
00661 inline
00662 void
00663 AI_TMatrix44SSE::set_translation(const AI_TVector3SSE& t) 
00664 {
00665     m4 = t.m128;
00666 };
00667 
00668 //------------------------------------------------------------------------------
00671 inline
00672 void
00673 AI_TMatrix44SSE::scale(const AI_TVector3SSE& s) 
00674 {
00675     // AI_TVector3SSE have the w element set to zero, we need it at 1...
00676     __m128 scale = _mm_add_ps(_mm_set_ps(1.0f, 0.0f, 0.0f, 0.0f), s.m128);
00677     m1 = _mm_mul_ps(m1, scale);
00678     m2 = _mm_mul_ps(m2, scale);
00679     m3 = _mm_mul_ps(m3, scale);
00680     m4 = _mm_mul_ps(m4, scale);
00681 }
00682 
00683 //------------------------------------------------------------------------------
00686 inline
00687 void 
00688 AI_TMatrix44SSE::lookat(const AI_TVector3SSE& to, const AI_TVector3SSE& up) 
00689 {
00690     AI_TVector3SSE from(m[3][0], m[3][1], m[3][2]);
00691     AI_TVector3SSE z(from - to);
00692     z.norm();
00693     AI_TVector3SSE y(up);
00694     AI_TVector3SSE x(y * z);  // x = y cross z
00695     y = z * x;              // y = z cross x
00696     x.norm();
00697     y.norm();
00698 
00699     m1 = x.m128;
00700     m2 = y.m128;
00701     m3 = z.m128;
00702 }
00703 
00704 //------------------------------------------------------------------------------
00707 inline
00708 void 
00709 AI_TMatrix44SSE::billboard(const AI_TVector3SSE& to, const AI_TVector3SSE& up)
00710 {
00711     AI_TVector3SSE from(m[3][0], m[3][1], m[3][2]);
00712     AI_TVector3SSE z(from - to);
00713     z.norm();
00714     AI_TVector3SSE y(up);
00715     AI_TVector3SSE x(y * z);
00716     z = x * y;       
00717     x.norm();
00718     y.norm();
00719     z.norm();
00720 
00721     m1 = x.m128;
00722     m2 = y.m128;
00723     m3 = z.m128;
00724 }
00725 
00726 //------------------------------------------------------------------------------
00732 inline
00733 void
00734 AI_TMatrix44SSE::operator *= (const AI_TMatrix44SSE& mx) 
00735 {
00736     m1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00737     m2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00738     m3 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00739     m4 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(0,0,0,0)), mx.m1), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(1,1,1,1)), mx.m2)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(2,2,2,2)), mx.m3)), _mm_mul_ps(_mm_shuffle_ps(m4, m4, _MM_SHUFFLE(3,3,3,3)), mx.m4));
00740 }
00741 
00742 //------------------------------------------------------------------------------
00746 inline
00747 void 
00748 AI_TMatrix44SSE::rotate(const AI_TVector3SSE& vec, float a)
00749 {
00750     AI_TVector3SSE v(vec);
00751     v.norm();
00752     float sa = (float) ai_sin(a);
00753     float ca = (float) ai_cos(a);
00754 
00755     AI_TMatrix44SSE rotM;
00756     rotM.m[0][0] = ca + (1.0f - ca) * v.x * v.x;
00757     rotM.m[0][1] = (1.0f - ca) * v.x * v.y - sa * v.z;
00758     rotM.m[0][2] = (1.0f - ca) * v.z * v.x + sa * v.y;
00759     rotM.m[1][0] = (1.0f - ca) * v.x * v.y + sa * v.z;
00760     rotM.m[1][1] = ca + (1.0f - ca) * v.y * v.y;
00761     rotM.m[1][2] = (1.0f - ca) * v.y * v.z - sa * v.x;
00762     rotM.m[2][0] = (1.0f - ca) * v.z * v.x - sa * v.y;
00763     rotM.m[2][1] = (1.0f - ca) * v.y * v.z + sa * v.x;
00764     rotM.m[2][2] = ca + (1.0f - ca) * v.z * v.z;
00765     
00766     (*this) *= rotM;
00767 }
00768 
00769 //------------------------------------------------------------------------------
00774 inline
00775 void
00776 AI_TMatrix44SSE::mult(const AI_TVector4SSE& src, AI_TVector4SSE& dst) const
00777 {
00778     dst.m128 = _mm_add_ps(
00779                _mm_add_ps(
00780                _mm_add_ps(
00781                     _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))), 
00782                     _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))), 
00783                     _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))), 
00784                     _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00785 }
00786 
00787 //------------------------------------------------------------------------------
00792 inline
00793 void
00794 AI_TMatrix44SSE::mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const
00795 {
00796     dst.m128 = _mm_add_ps(
00797                _mm_add_ps(
00798                _mm_add_ps(
00799                     _mm_mul_ps(m1, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0))), 
00800                     _mm_mul_ps(m2, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)))), 
00801                     _mm_mul_ps(m3, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)))), 
00802                     _mm_mul_ps(m4, _mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(3,3,3,3))));
00803 }
00804 
00805 //------------------------------------------------------------------------------
00808 static 
00809 inline 
00810 AI_TMatrix44SSE 
00811 operator * (const AI_TMatrix44SSE& ma, const AI_TMatrix44SSE& mb) 
00812 {
00813     return AI_TMatrix44SSE(
00814         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00815         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00816         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(3,3,3,3)), mb.m4)),
00817         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(0,0,0,0)), mb.m1), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(1,1,1,1)), mb.m2)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(2,2,2,2)), mb.m3)), _mm_mul_ps(_mm_shuffle_ps(ma.m4, ma.m4, _MM_SHUFFLE(3,3,3,3)), mb.m4))
00818     );
00819 }
00820 
00821 //------------------------------------------------------------------------------
00824 static 
00825 inline 
00826 AI_TVector3SSE operator * (const AI_TMatrix44SSE& m, const AI_TVector3SSE& v)
00827 {
00828     return AI_TVector3SSE(
00829         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_set_ps(0.0f, 1.0f, 1.0f, 1.0f)))
00830     );
00831 }
00832 
00833 //------------------------------------------------------------------------------
00836 static 
00837 inline 
00838 AI_TVector4SSE operator * (const AI_TMatrix44SSE& m, const AI_TVector4SSE& v)
00839 {
00840     return AI_TVector4SSE(
00841         _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(m.m1, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0))), _mm_mul_ps(m.m2, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)))), _mm_mul_ps(m.m3, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)))), _mm_mul_ps(m.m4, _mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(3,3,3,3))))
00842     );
00843 }
00844 
00845 //------------------------------------------------------------------------------
00846 #endif