AI_TMatrix33_sse.h

Go to the documentation of this file.
00001 #ifndef AI__MATRIX33_SSE_H
00002 #define AI__MATRIX33_SSE_H
00003 //------------------------------------------------------------------------------
00012 #include "AI_TVector3SSE.h"
00013 #include "AI_Quaternion.h"
00014 #include "AI_Euler.h"
00015 #include "AI_Matrixdefs.h"
00016 
00017 #include <string.h>
00018 
00019 static float _matrix33_sse_ident[12] = 
00020 {
00021     1.0f, 0.0f, 0.0f, 0.0f,
00022     0.0f, 1.0f, 0.0f, 0.0f,
00023     0.0f, 0.0f, 1.0f, 0.0f,
00024 };
00025 
00026 class AI_TMatrix33SSE 
00027 {
00028 public:
00030     AI_TMatrix33SSE();
00032     AI_TMatrix33SSE(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3);
00034     AI_TMatrix33SSE(const AI_TMatrix33SSE& mx);
00036     AI_TMatrix33SSE(float _m11, float _m12, float _m13, float _m21, float _m22, float _m23, float _m31, float _m32, float _m33);
00038     AI_TMatrix33SSE(const AI_Quaternion& q);
00040     AI_TMatrix33SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3);
00042     AI_Quaternion get_quaternion() const;
00044     AI_TVector3SSE to_euler() const;
00046     void from_euler(const AI_TVector3SSE& ea);
00048     void lookat(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00050     void billboard(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00052     void set(float m11, float m12, float m13, float m21, float m22, float m23, float m31, float m32, float m33);
00054     void set(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3);
00056     void set(const AI_TMatrix33SSE& mx);
00058     void ident();
00060     void transpose();
00062     bool orthonorm(float limit);
00064     void scale(const AI_TVector3SSE& s);
00066     void rotate_x(const float a);
00068     void rotate_y(const float a);
00070     void rotate_z(const float a);
00072     void rotate_local_x(const float a);
00074     void rotate_local_y(const float a);
00076     void rotate_local_z(const float a);
00078     void rotate(const AI_TVector3SSE& vec, float a);
00080     AI_TVector3SSE x_component(void) const;
00082     AI_TVector3SSE y_component(void) const;
00084     AI_TVector3SSE z_component(void) const;
00085     // inplace matrix multiply
00086     void operator *= (const AI_TMatrix33SSE& m1);
00088     void mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const;
00089 
00090     union
00091     {
00092         struct
00093         {
00094             __m128 m1;
00095             __m128 m2;
00096             __m128 m3;
00097         };
00098         struct
00099         {
00100             float m[3][4];
00101         };
00102     };
00103 };
00104 
00105 //------------------------------------------------------------------------------
00109 static 
00110 inline 
00111 AI_TMatrix33SSE operator * (const AI_TMatrix33SSE& ma, const AI_TMatrix33SSE& mb) 
00112 {
00113     return AI_TMatrix33SSE(
00114         _mm_add_ps(
00115         _mm_add_ps(
00116             _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(0,0,0,0)), mb.m1), 
00117             _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(1,1,1,1)), mb.m2)), 
00118             _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(2,2,2,2)), mb.m3)),
00119 
00120         _mm_add_ps(
00121         _mm_add_ps(
00122             _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(0,0,0,0)), mb.m1), 
00123             _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(1,1,1,1)), mb.m2)), 
00124             _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(2,2,2,2)), mb.m3)),
00125 
00126         _mm_add_ps(
00127         _mm_add_ps(
00128             _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(0,0,0,0)), mb.m1), 
00129             _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(1,1,1,1)), mb.m2)), 
00130             _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(2,2,2,2)), mb.m3)));
00131 }
00132 
00133 //------------------------------------------------------------------------------
00137 static 
00138 inline 
00139 AI_TVector3SSE operator * (const AI_TMatrix33SSE& mx, const AI_TVector3SSE& v)
00140 {
00141     return AI_TVector3SSE(
00142         _mm_add_ps(
00143         _mm_add_ps(
00144             _mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0)), mx.m1), 
00145             _mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)), mx.m2)), 
00146             _mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)), mx.m3)));
00147 }
00148 
00149 //------------------------------------------------------------------------------
00152 inline
00153 AI_TMatrix33SSE::AI_TMatrix33SSE() 
00154 {
00155     memcpy(&(m[0][0]), _matrix33_sse_ident, sizeof(_matrix33_sse_ident));
00156 }
00157 
00158 //------------------------------------------------------------------------------
00161 inline
00162 AI_TMatrix33SSE::AI_TMatrix33SSE(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3) :
00163     m1(v1.m128),
00164     m2(v2.m128),
00165     m3(v3.m128)
00166 {
00167     // empty
00168 }
00169 
00170 //------------------------------------------------------------------------------
00173 inline
00174 AI_TMatrix33SSE::AI_TMatrix33SSE(const AI_TMatrix33SSE& mx) :
00175     m1(mx.m1),
00176     m2(mx.m2),
00177     m3(mx.m3)
00178 {
00179     // empty
00180 }
00181 
00182 //------------------------------------------------------------------------------
00185 inline
00186 AI_TMatrix33SSE::AI_TMatrix33SSE(float _m11, float _m12, float _m13,
00187                              float _m21, float _m22, float _m23,
00188                              float _m31, float _m32, float _m33)
00189 {
00190     m1 = _mm_set_ps(0.0f, _m13, _m12, _m11);
00191     m2 = _mm_set_ps(0.0f, _m23, _m22, _m21);
00192     m3 = _mm_set_ps(0.0f, _m33, _m32, _m31);
00193 }
00194 
00195 //------------------------------------------------------------------------------
00198 inline
00199 AI_TMatrix33SSE::AI_TMatrix33SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3) :
00200     m1(_m1),
00201     m2(_m2),
00202     m3(_m3)
00203 {
00204     // empty
00205 }
00206 
00207 //------------------------------------------------------------------------------
00211 inline
00212 AI_TMatrix33SSE::AI_TMatrix33SSE(const AI_Quaternion& q) 
00213 {
00214     float xx = q.x*q.x; float yy = q.y*q.y; float zz = q.z*q.z;
00215     float xy = q.x*q.y; float xz = q.x*q.z; float yz = q.y*q.z;
00216     float wx = q.w*q.x; float wy = q.w*q.y; float wz = q.w*q.z;
00217 
00218     m[0][0] = 1.0f - 2.0f * (yy + zz);
00219     m[1][0] =        2.0f * (xy - wz);
00220     m[2][0] =        2.0f * (xz + wy);
00221 
00222     m[0][1] =        2.0f * (xy + wz);
00223     m[1][1] = 1.0f - 2.0f * (xx + zz);
00224     m[2][1] =        2.0f * (yz - wx);
00225 
00226     m[0][2] =        2.0f * (xz - wy);
00227     m[1][2] =        2.0f * (yz + wx);
00228     m[2][2] = 1.0f - 2.0f * (xx + yy);
00229 }
00230 
00231 //------------------------------------------------------------------------------
00235 inline
00236 AI_Quaternion 
00237 AI_TMatrix33SSE::get_quaternion() const
00238 {
00239     float qa[4];
00240     float tr = m[0][0] + m[1][1] + m[2][2];
00241     if (tr > 0.0f) 
00242     {
00243         float s = ai_sqrt (tr + 1.0f);
00244         qa[3] = s * 0.5f;
00245         s = 0.5f / s;
00246         qa[0] = (m[1][2] - m[2][1]) * s;
00247         qa[1] = (m[2][0] - m[0][2]) * s;
00248         qa[2] = (m[0][1] - m[1][0]) * s;
00249     } 
00250     else 
00251     {
00252         int i, j, k, nxt[3] = {1,2,0};
00253         i = 0;
00254         if (m[1][1] > m[0][0]) i=1;
00255         if (m[2][2] > m[i][i]) i=2;
00256         j = nxt[i];
00257         k = nxt[j];
00258         float s = ai_sqrt((m[i][i] - (m[j][j] + m[k][k])) + 1.0f);
00259         qa[i] = s * 0.5f;
00260         s = 0.5f / s;
00261         qa[3] = (m[j][k] - m[k][j])* s;
00262         qa[j] = (m[i][j] + m[j][i]) * s;
00263         qa[k] = (m[i][k] + m[k][i]) * s;
00264     }
00265     AI_Quaternion q(qa[0],qa[1],qa[2],qa[3]);
00266     return q;
00267 }
00268 
00269 //------------------------------------------------------------------------------
00273 inline
00274 AI_TVector3SSE
00275 AI_TMatrix33SSE::to_euler() const
00276 {    
00277     AI_TVector3SSE ea;
00278     
00279     // work on matrix with flipped row/columns
00280     AI_TMatrix33SSE tmp(*this);
00281     tmp.transpose();
00282 
00283     int i,j,k,h,n,s,f;
00284     EulGetOrd(EulOrdXYZs,i,j,k,h,n,s,f);
00285     if (s==EulRepYes) 
00286     {
00287         double sy = (float) sqrt(tmp.m[0][1] * tmp.m[0][1] + tmp.m[0][2] * tmp.m[0][2]);
00288         if (sy > 16*FLT_EPSILON) 
00289         {
00290             ea.x = (float) atan2(tmp.m[0][1], tmp.m[0][2]);
00291             ea.y = (float) atan2(sy, tmp.m[0][0]);
00292             ea.z = (float) atan2(tmp.m[1][0], -tmp.m[2][0]);
00293         } else {
00294             ea.x = (float) atan2(-tmp.m[1][2], tmp.m[1][1]);
00295             ea.y = (float) atan2(sy, tmp.m[0][0]);
00296             ea.z = 0;
00297         }
00298     } 
00299     else 
00300     {
00301         double cy = sqrt(tmp.m[0][0] * tmp.m[0][0] + tmp.m[1][0] * tmp.m[1][0]);
00302         if (cy > 16*FLT_EPSILON) 
00303         {
00304             ea.x = (float) atan2(tmp.m[2][1], tmp.m[2][2]);
00305             ea.y = (float) atan2(-tmp.m[2][0], cy);
00306             ea.z = (float) atan2(tmp.m[1][0], tmp.m[0][0]);
00307         } 
00308         else 
00309         {
00310             ea.x = (float) atan2(-tmp.m[1][2], tmp.m[1][1]);
00311             ea.y = (float) atan2(-tmp.m[2][0], cy);
00312             ea.z = 0;
00313         }
00314     }
00315     if (n==EulParOdd) {ea.x = -ea.x; ea.y = - ea.y; ea.z = -ea.z;}
00316     if (f==EulFrmR) {float t = ea.x; ea.x = ea.z; ea.z = t;}
00317 
00318     return ea;
00319 }
00320 
00321 //------------------------------------------------------------------------------
00325 inline
00326 void 
00327 AI_TMatrix33SSE::from_euler(const AI_TVector3SSE& ea) 
00328 {
00329     AI_TVector3SSE tea = ea;
00330     double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
00331     int i,j,k,h,n,s,f;
00332     EulGetOrd(EulOrdXYZs,i,j,k,h,n,s,f);
00333     if (f==EulFrmR) {float t = ea.x; tea.x = ea.z; tea.z = t;}
00334     if (n==EulParOdd) {tea.x = -ea.x; tea.y = -ea.y; tea.z = -ea.z;}
00335     ti = tea.x;   tj = tea.y;   th = tea.z;
00336     ci = cos(ti); cj = cos(tj); ch = cos(th);
00337     si = sin(ti); sj = sin(tj); sh = sin(th);
00338     cc = ci*ch; cs = ci*sh; sc = si*ch; ss = si*sh;
00339     if (s==EulRepYes) 
00340     {
00341         m[0][0] = (float)(cj);     m[0][1] = (float)(sj*si);     m[0][2] = (float)(sj*ci);
00342         m[1][0] = (float)(sj*sh);  m[1][1] = (float)(-cj*ss+cc); m[1][2] = (float)(-cj*cs-sc);
00343         m[2][0] = (float)(-sj*ch); m[1][2] = (float)( cj*sc+cs); m[2][2] = (float)( cj*cc-ss);
00344     } 
00345     else 
00346     {
00347         m[0][0] = (float)(cj*ch); m[0][1] = (float)(sj*sc-cs); m[0][2] = (float)(sj*cc+ss);
00348         m[1][0] = (float)(cj*sh); m[1][1] = (float)(sj*ss+cc); m[1][2] = (float)(sj*cs-sc);
00349         m[2][0] = (float)(-sj);   m[2][1] = (float)(cj*si);    m[2][2] = (float)(cj*ci);
00350     }
00351 
00352     // flip row/column
00353     this->transpose();
00354 }
00355 
00356 //------------------------------------------------------------------------------
00359 inline
00360 void 
00361 AI_TMatrix33SSE::lookat(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up) 
00362 {
00363     AI_TVector3SSE z(from - to);
00364     z.norm();
00365     AI_TVector3SSE y(up);
00366     AI_TVector3SSE x(y * z);   // x = y cross z
00367     y = z * x;          // y = z cross x
00368     x.norm();
00369     y.norm();
00370 
00371     m1 = x.m128;
00372     m2 = y.m128;
00373     m3 = z.m128;
00374 }
00375 
00376 //------------------------------------------------------------------------------
00379 inline
00380 void 
00381 AI_TMatrix33SSE::billboard(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up)
00382 {
00383     AI_TVector3SSE z(from - to);
00384     z.norm();
00385     AI_TVector3SSE y(up);
00386     AI_TVector3SSE x(y * z);
00387     z = x * y;
00388     x.norm();
00389     y.norm();
00390     z.norm();
00391 
00392     m1 = x.m128;
00393     m2 = y.m128;
00394     m3 = z.m128;
00395 }
00396 
00397 //------------------------------------------------------------------------------
00400 inline
00401 void 
00402 AI_TMatrix33SSE::set(float m11, float m12, float m13,
00403                    float m21, float m22, float m23,
00404                    float m31, float m32, float m33) 
00405 {
00406     m1 = _mm_set_ps(0.0f, m13, m12, m11);
00407     m2 = _mm_set_ps(0.0f, m23, m22, m21);
00408     m3 = _mm_set_ps(0.0f, m33, m32, m31);
00409 }
00410 
00411 //------------------------------------------------------------------------------
00414 inline
00415 void 
00416 AI_TMatrix33SSE::set(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3) 
00417 {
00418     m1 = v1.m128;
00419     m2 = v2.m128;
00420     m3 = v3.m128;
00421 }
00422 
00423 //------------------------------------------------------------------------------
00426 inline
00427 void 
00428 AI_TMatrix33SSE::set(const AI_TMatrix33SSE& mx) 
00429 {
00430     m1 = mx.m1;
00431     m2 = mx.m2;
00432     m3 = mx.m3;
00433 }
00434 
00435 //------------------------------------------------------------------------------
00438 inline
00439 void 
00440 AI_TMatrix33SSE::ident() 
00441 {
00442     memcpy(&(m[0][0]), _matrix33_sse_ident, sizeof(_matrix33_sse_ident));
00443 }
00444 
00445 //------------------------------------------------------------------------------
00449 inline
00450 void 
00451 AI_TMatrix33SSE::transpose() 
00452 {
00453     #undef n_swap
00454     #define n_swap(x,y) { float t=x; x=y; y=t; }
00455     n_swap(m[0][1],m[1][0]);
00456     n_swap(m[0][2],m[2][0]);
00457     n_swap(m[1][2],m[2][1]);
00458 }
00459 
00460 //------------------------------------------------------------------------------
00464 inline
00465 bool 
00466 AI_TMatrix33SSE::orthonorm(float limit) 
00467 {
00468     if (((m[0][0]*m[1][0]+m[0][1]*m[1][1]+m[0][2]*m[1][2])<limit) &&
00469         ((m[0][0]*m[2][0]+m[0][1]*m[2][1]+m[0][2]*m[2][2])<limit) &&
00470         ((m[2][0]*m[1][0]+m[2][1]*m[1][1]+m[2][2]*m[1][2])<limit) &&
00471         ((m[0][0]*m[0][0]+m[0][1]*m[0][1]+m[0][2]*m[0][2])>(1.0-limit)) &&
00472         ((m[0][0]*m[0][0]+m[0][1]*m[0][1]+m[0][2]*m[0][2])<(1.0+limit)) &&
00473         ((m[1][0]*m[1][0]+m[1][1]*m[1][1]+m[1][2]*m[1][2])>(1.0-limit)) &&
00474         ((m[1][0]*m[1][0]+m[1][1]*m[1][1]+m[1][2]*m[1][2])<(1.0+limit)) &&
00475         ((m[2][0]*m[2][0]+m[2][1]*m[2][1]+m[2][2]*m[2][2])>(1.0-limit)) &&
00476         ((m[2][0]*m[2][0]+m[2][1]*m[2][1]+m[2][2]*m[2][2])<(1.0+limit)))
00477         return true;
00478     else
00479         return false;
00480 }
00481 
00482 //------------------------------------------------------------------------------
00485 inline
00486 void
00487 AI_TMatrix33SSE::scale(const AI_TVector3SSE& s)
00488 {
00489     m1 = _mm_mul_ps(m1, s.m128);
00490     m2 = _mm_mul_ps(m2, s.m128);
00491     m3 = _mm_mul_ps(m3, s.m128);
00492 }
00493 
00494 //------------------------------------------------------------------------------
00498 inline
00499 void 
00500 AI_TMatrix33SSE::rotate_x(const float a)
00501 {
00502     float c = ai_cos(a);
00503     float s = ai_sin(a);
00504     int i;
00505     for (i=0; i<3; i++)
00506     {
00507         float mi1 = m[i][1];
00508         float mi2 = m[i][2];
00509         m[i][1] = mi1*c + mi2*-s;
00510         m[i][2] = mi1*s + mi2*c;
00511     }
00512 }
00513 
00514 //------------------------------------------------------------------------------
00518 inline
00519 void 
00520 AI_TMatrix33SSE::rotate_y(const float a)
00521 {
00522     float c = ai_cos(a);
00523     float s = ai_sin(a);
00524     int i;
00525     for (i=0; i<3; i++)
00526     {
00527         float mi0 = m[i][0];
00528         float mi2 = m[i][2];
00529         m[i][0] = mi0*c + mi2*s;
00530         m[i][2] = mi0*-s + mi2*c;
00531     }
00532 }
00533 
00534 //------------------------------------------------------------------------------
00538 inline
00539 void 
00540 AI_TMatrix33SSE::rotate_z(const float a)
00541 {
00542     float c = ai_cos(a);
00543     float s = ai_sin(a);
00544     int i;
00545     for (i=0; i<3; i++)
00546     {
00547         float mi0 = m[i][0];
00548         float mi1 = m[i][1];
00549         m[i][0] = mi0*c + mi1*-s;
00550         m[i][1] = mi0*s + mi1*c;
00551     }
00552 }
00553 
00554 //------------------------------------------------------------------------------
00558 inline
00559 void 
00560 AI_TMatrix33SSE::rotate_local_x(const float a)
00561 {
00562     AI_TMatrix33SSE rotM;  // initialized as identity matrix
00563     rotM.m[1][1] = (float) cos(a); rotM.m[1][2] = -(float) sin(a);
00564     rotM.m[2][1] = (float) sin(a); rotM.m[2][2] =  (float) cos(a);
00565 
00566     (*this) = rotM * (*this); 
00567 }
00568 
00569 //------------------------------------------------------------------------------
00573 inline
00574 void 
00575 AI_TMatrix33SSE::rotate_local_y(const float a)
00576 {
00577     AI_TMatrix33SSE rotM;  // initialized as identity matrix
00578     rotM.m[0][0] = (float) cos(a);  rotM.m[0][2] = (float) sin(a);
00579     rotM.m[2][0] = -(float) sin(a); rotM.m[2][2] = (float) cos(a);
00580 
00581     (*this) = rotM * (*this); 
00582 }
00583 
00584 //------------------------------------------------------------------------------
00588 inline
00589 void 
00590 AI_TMatrix33SSE::rotate_local_z(const float a)
00591 {
00592     AI_TMatrix33SSE rotM;  // initialized as identity matrix
00593     rotM.m[0][0] = (float) cos(a); rotM.m[0][1] = -(float) sin(a);
00594     rotM.m[1][0] = (float) sin(a); rotM.m[1][1] =  (float) cos(a);
00595 
00596     (*this) = rotM * (*this); 
00597 }
00598 
00599 //------------------------------------------------------------------------------
00603 inline
00604 void 
00605 AI_TMatrix33SSE::rotate(const AI_TVector3SSE& vec, float a)
00606 {
00607     AI_TVector3SSE v(vec);
00608     v.norm();
00609     float sa = (float) ai_sin(a);
00610     float ca = (float) ai_cos(a);
00611 
00612     AI_TMatrix33SSE rotM;
00613     rotM.m[0][0] = ca + (1.0f - ca) * v.x * v.x;
00614     rotM.m[0][1] = (1.0f - ca) * v.x * v.y - sa * v.z;
00615     rotM.m[0][2] = (1.0f - ca) * v.z * v.x + sa * v.y;
00616     rotM.m[1][0] = (1.0f - ca) * v.x * v.y + sa * v.z;
00617     rotM.m[1][1] = ca + (1.0f - ca) * v.y * v.y;
00618     rotM.m[1][2] = (1.0f - ca) * v.y * v.z - sa * v.x;
00619     rotM.m[2][0] = (1.0f - ca) * v.z * v.x - sa * v.y;
00620     rotM.m[2][1] = (1.0f - ca) * v.y * v.z + sa * v.x;
00621     rotM.m[2][2] = ca + (1.0f - ca) * v.z * v.z;
00622     
00623     (*this) = (*this) * rotM;
00624 }
00625 
00626 //------------------------------------------------------------------------------
00629 inline
00630 AI_TVector3SSE 
00631 AI_TMatrix33SSE::x_component() const
00632 {
00633     return AI_TVector3SSE(m1);
00634 }
00635 
00636 //------------------------------------------------------------------------------
00639 inline
00640 AI_TVector3SSE 
00641 AI_TMatrix33SSE::y_component(void) const
00642 {
00643     return AI_TVector3SSE(m2);
00644 }
00645 
00646 //------------------------------------------------------------------------------
00649 inline
00650 AI_TVector3SSE 
00651 AI_TMatrix33SSE::z_component(void) const 
00652 {
00653     return AI_TVector3SSE(m3);
00654 };
00655 
00656 //------------------------------------------------------------------------------
00659 inline
00660 void
00661 AI_TMatrix33SSE::operator *= (const AI_TMatrix33SSE& mx) 
00662 {
00663     m1 = _mm_add_ps(
00664          _mm_add_ps(
00665             _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1), 
00666             _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)), 
00667             _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3));
00668 
00669     m2 = _mm_add_ps(
00670          _mm_add_ps(
00671             _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1), 
00672             _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)), 
00673             _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3));
00674 
00675     m3 = _mm_add_ps(
00676          _mm_add_ps(
00677             _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1), 
00678             _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)), 
00679             _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3));
00680 }
00681 
00682 //------------------------------------------------------------------------------
00687 inline
00688 void 
00689 AI_TMatrix33SSE::mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const
00690 {
00691     dst.m128 = _mm_add_ps(
00692                _mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0)), m1), 
00693                           _mm_mul_ps(_mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)), m2)), 
00694                           _mm_mul_ps(_mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)), m3));
00695 }
00696 
00697 //------------------------------------------------------------------------------
00698 #endif