00001 #ifndef AI__MATRIX33_SSE_H
00002 #define AI__MATRIX33_SSE_H
00003
00012 #include "AI_TVector3SSE.h"
00013 #include "AI_Quaternion.h"
00014 #include "AI_Euler.h"
00015 #include "AI_Matrixdefs.h"
00016
00017 #include <string.h>
00018
00019 static float _matrix33_sse_ident[12] =
00020 {
00021 1.0f, 0.0f, 0.0f, 0.0f,
00022 0.0f, 1.0f, 0.0f, 0.0f,
00023 0.0f, 0.0f, 1.0f, 0.0f,
00024 };
00025
00026 class AI_TMatrix33SSE
00027 {
00028 public:
00030 AI_TMatrix33SSE();
00032 AI_TMatrix33SSE(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3);
00034 AI_TMatrix33SSE(const AI_TMatrix33SSE& mx);
00036 AI_TMatrix33SSE(float _m11, float _m12, float _m13, float _m21, float _m22, float _m23, float _m31, float _m32, float _m33);
00038 AI_TMatrix33SSE(const AI_Quaternion& q);
00040 AI_TMatrix33SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3);
00042 AI_Quaternion get_quaternion() const;
00044 AI_TVector3SSE to_euler() const;
00046 void from_euler(const AI_TVector3SSE& ea);
00048 void lookat(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00050 void billboard(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up);
00052 void set(float m11, float m12, float m13, float m21, float m22, float m23, float m31, float m32, float m33);
00054 void set(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3);
00056 void set(const AI_TMatrix33SSE& mx);
00058 void ident();
00060 void transpose();
00062 bool orthonorm(float limit);
00064 void scale(const AI_TVector3SSE& s);
00066 void rotate_x(const float a);
00068 void rotate_y(const float a);
00070 void rotate_z(const float a);
00072 void rotate_local_x(const float a);
00074 void rotate_local_y(const float a);
00076 void rotate_local_z(const float a);
00078 void rotate(const AI_TVector3SSE& vec, float a);
00080 AI_TVector3SSE x_component(void) const;
00082 AI_TVector3SSE y_component(void) const;
00084 AI_TVector3SSE z_component(void) const;
00085
00086 void operator *= (const AI_TMatrix33SSE& m1);
00088 void mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const;
00089
00090 union
00091 {
00092 struct
00093 {
00094 __m128 m1;
00095 __m128 m2;
00096 __m128 m3;
00097 };
00098 struct
00099 {
00100 float m[3][4];
00101 };
00102 };
00103 };
00104
00105
00109 static
00110 inline
00111 AI_TMatrix33SSE operator * (const AI_TMatrix33SSE& ma, const AI_TMatrix33SSE& mb)
00112 {
00113 return AI_TMatrix33SSE(
00114 _mm_add_ps(
00115 _mm_add_ps(
00116 _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(0,0,0,0)), mb.m1),
00117 _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(1,1,1,1)), mb.m2)),
00118 _mm_mul_ps(_mm_shuffle_ps(ma.m1, ma.m1, _MM_SHUFFLE(2,2,2,2)), mb.m3)),
00119
00120 _mm_add_ps(
00121 _mm_add_ps(
00122 _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(0,0,0,0)), mb.m1),
00123 _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(1,1,1,1)), mb.m2)),
00124 _mm_mul_ps(_mm_shuffle_ps(ma.m2, ma.m2, _MM_SHUFFLE(2,2,2,2)), mb.m3)),
00125
00126 _mm_add_ps(
00127 _mm_add_ps(
00128 _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(0,0,0,0)), mb.m1),
00129 _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(1,1,1,1)), mb.m2)),
00130 _mm_mul_ps(_mm_shuffle_ps(ma.m3, ma.m3, _MM_SHUFFLE(2,2,2,2)), mb.m3)));
00131 }
00132
00133
00137 static
00138 inline
00139 AI_TVector3SSE operator * (const AI_TMatrix33SSE& mx, const AI_TVector3SSE& v)
00140 {
00141 return AI_TVector3SSE(
00142 _mm_add_ps(
00143 _mm_add_ps(
00144 _mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(0,0,0,0)), mx.m1),
00145 _mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(1,1,1,1)), mx.m2)),
00146 _mm_mul_ps(_mm_shuffle_ps(v.m128, v.m128, _MM_SHUFFLE(2,2,2,2)), mx.m3)));
00147 }
00148
00149
00152 inline
00153 AI_TMatrix33SSE::AI_TMatrix33SSE()
00154 {
00155 memcpy(&(m[0][0]), _matrix33_sse_ident, sizeof(_matrix33_sse_ident));
00156 }
00157
00158
00161 inline
00162 AI_TMatrix33SSE::AI_TMatrix33SSE(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3) :
00163 m1(v1.m128),
00164 m2(v2.m128),
00165 m3(v3.m128)
00166 {
00167
00168 }
00169
00170
00173 inline
00174 AI_TMatrix33SSE::AI_TMatrix33SSE(const AI_TMatrix33SSE& mx) :
00175 m1(mx.m1),
00176 m2(mx.m2),
00177 m3(mx.m3)
00178 {
00179
00180 }
00181
00182
00185 inline
00186 AI_TMatrix33SSE::AI_TMatrix33SSE(float _m11, float _m12, float _m13,
00187 float _m21, float _m22, float _m23,
00188 float _m31, float _m32, float _m33)
00189 {
00190 m1 = _mm_set_ps(0.0f, _m13, _m12, _m11);
00191 m2 = _mm_set_ps(0.0f, _m23, _m22, _m21);
00192 m3 = _mm_set_ps(0.0f, _m33, _m32, _m31);
00193 }
00194
00195
00198 inline
00199 AI_TMatrix33SSE::AI_TMatrix33SSE(const __m128& _m1, const __m128& _m2, const __m128& _m3) :
00200 m1(_m1),
00201 m2(_m2),
00202 m3(_m3)
00203 {
00204
00205 }
00206
00207
00211 inline
00212 AI_TMatrix33SSE::AI_TMatrix33SSE(const AI_Quaternion& q)
00213 {
00214 float xx = q.x*q.x; float yy = q.y*q.y; float zz = q.z*q.z;
00215 float xy = q.x*q.y; float xz = q.x*q.z; float yz = q.y*q.z;
00216 float wx = q.w*q.x; float wy = q.w*q.y; float wz = q.w*q.z;
00217
00218 m[0][0] = 1.0f - 2.0f * (yy + zz);
00219 m[1][0] = 2.0f * (xy - wz);
00220 m[2][0] = 2.0f * (xz + wy);
00221
00222 m[0][1] = 2.0f * (xy + wz);
00223 m[1][1] = 1.0f - 2.0f * (xx + zz);
00224 m[2][1] = 2.0f * (yz - wx);
00225
00226 m[0][2] = 2.0f * (xz - wy);
00227 m[1][2] = 2.0f * (yz + wx);
00228 m[2][2] = 1.0f - 2.0f * (xx + yy);
00229 }
00230
00231
00235 inline
00236 AI_Quaternion
00237 AI_TMatrix33SSE::get_quaternion() const
00238 {
00239 float qa[4];
00240 float tr = m[0][0] + m[1][1] + m[2][2];
00241 if (tr > 0.0f)
00242 {
00243 float s = ai_sqrt (tr + 1.0f);
00244 qa[3] = s * 0.5f;
00245 s = 0.5f / s;
00246 qa[0] = (m[1][2] - m[2][1]) * s;
00247 qa[1] = (m[2][0] - m[0][2]) * s;
00248 qa[2] = (m[0][1] - m[1][0]) * s;
00249 }
00250 else
00251 {
00252 int i, j, k, nxt[3] = {1,2,0};
00253 i = 0;
00254 if (m[1][1] > m[0][0]) i=1;
00255 if (m[2][2] > m[i][i]) i=2;
00256 j = nxt[i];
00257 k = nxt[j];
00258 float s = ai_sqrt((m[i][i] - (m[j][j] + m[k][k])) + 1.0f);
00259 qa[i] = s * 0.5f;
00260 s = 0.5f / s;
00261 qa[3] = (m[j][k] - m[k][j])* s;
00262 qa[j] = (m[i][j] + m[j][i]) * s;
00263 qa[k] = (m[i][k] + m[k][i]) * s;
00264 }
00265 AI_Quaternion q(qa[0],qa[1],qa[2],qa[3]);
00266 return q;
00267 }
00268
00269
00273 inline
00274 AI_TVector3SSE
00275 AI_TMatrix33SSE::to_euler() const
00276 {
00277 AI_TVector3SSE ea;
00278
00279
00280 AI_TMatrix33SSE tmp(*this);
00281 tmp.transpose();
00282
00283 int i,j,k,h,n,s,f;
00284 EulGetOrd(EulOrdXYZs,i,j,k,h,n,s,f);
00285 if (s==EulRepYes)
00286 {
00287 double sy = (float) sqrt(tmp.m[0][1] * tmp.m[0][1] + tmp.m[0][2] * tmp.m[0][2]);
00288 if (sy > 16*FLT_EPSILON)
00289 {
00290 ea.x = (float) atan2(tmp.m[0][1], tmp.m[0][2]);
00291 ea.y = (float) atan2(sy, tmp.m[0][0]);
00292 ea.z = (float) atan2(tmp.m[1][0], -tmp.m[2][0]);
00293 } else {
00294 ea.x = (float) atan2(-tmp.m[1][2], tmp.m[1][1]);
00295 ea.y = (float) atan2(sy, tmp.m[0][0]);
00296 ea.z = 0;
00297 }
00298 }
00299 else
00300 {
00301 double cy = sqrt(tmp.m[0][0] * tmp.m[0][0] + tmp.m[1][0] * tmp.m[1][0]);
00302 if (cy > 16*FLT_EPSILON)
00303 {
00304 ea.x = (float) atan2(tmp.m[2][1], tmp.m[2][2]);
00305 ea.y = (float) atan2(-tmp.m[2][0], cy);
00306 ea.z = (float) atan2(tmp.m[1][0], tmp.m[0][0]);
00307 }
00308 else
00309 {
00310 ea.x = (float) atan2(-tmp.m[1][2], tmp.m[1][1]);
00311 ea.y = (float) atan2(-tmp.m[2][0], cy);
00312 ea.z = 0;
00313 }
00314 }
00315 if (n==EulParOdd) {ea.x = -ea.x; ea.y = - ea.y; ea.z = -ea.z;}
00316 if (f==EulFrmR) {float t = ea.x; ea.x = ea.z; ea.z = t;}
00317
00318 return ea;
00319 }
00320
00321
00325 inline
00326 void
00327 AI_TMatrix33SSE::from_euler(const AI_TVector3SSE& ea)
00328 {
00329 AI_TVector3SSE tea = ea;
00330 double ti, tj, th, ci, cj, ch, si, sj, sh, cc, cs, sc, ss;
00331 int i,j,k,h,n,s,f;
00332 EulGetOrd(EulOrdXYZs,i,j,k,h,n,s,f);
00333 if (f==EulFrmR) {float t = ea.x; tea.x = ea.z; tea.z = t;}
00334 if (n==EulParOdd) {tea.x = -ea.x; tea.y = -ea.y; tea.z = -ea.z;}
00335 ti = tea.x; tj = tea.y; th = tea.z;
00336 ci = cos(ti); cj = cos(tj); ch = cos(th);
00337 si = sin(ti); sj = sin(tj); sh = sin(th);
00338 cc = ci*ch; cs = ci*sh; sc = si*ch; ss = si*sh;
00339 if (s==EulRepYes)
00340 {
00341 m[0][0] = (float)(cj); m[0][1] = (float)(sj*si); m[0][2] = (float)(sj*ci);
00342 m[1][0] = (float)(sj*sh); m[1][1] = (float)(-cj*ss+cc); m[1][2] = (float)(-cj*cs-sc);
00343 m[2][0] = (float)(-sj*ch); m[1][2] = (float)( cj*sc+cs); m[2][2] = (float)( cj*cc-ss);
00344 }
00345 else
00346 {
00347 m[0][0] = (float)(cj*ch); m[0][1] = (float)(sj*sc-cs); m[0][2] = (float)(sj*cc+ss);
00348 m[1][0] = (float)(cj*sh); m[1][1] = (float)(sj*ss+cc); m[1][2] = (float)(sj*cs-sc);
00349 m[2][0] = (float)(-sj); m[2][1] = (float)(cj*si); m[2][2] = (float)(cj*ci);
00350 }
00351
00352
00353 this->transpose();
00354 }
00355
00356
00359 inline
00360 void
00361 AI_TMatrix33SSE::lookat(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up)
00362 {
00363 AI_TVector3SSE z(from - to);
00364 z.norm();
00365 AI_TVector3SSE y(up);
00366 AI_TVector3SSE x(y * z);
00367 y = z * x;
00368 x.norm();
00369 y.norm();
00370
00371 m1 = x.m128;
00372 m2 = y.m128;
00373 m3 = z.m128;
00374 }
00375
00376
00379 inline
00380 void
00381 AI_TMatrix33SSE::billboard(const AI_TVector3SSE& from, const AI_TVector3SSE& to, const AI_TVector3SSE& up)
00382 {
00383 AI_TVector3SSE z(from - to);
00384 z.norm();
00385 AI_TVector3SSE y(up);
00386 AI_TVector3SSE x(y * z);
00387 z = x * y;
00388 x.norm();
00389 y.norm();
00390 z.norm();
00391
00392 m1 = x.m128;
00393 m2 = y.m128;
00394 m3 = z.m128;
00395 }
00396
00397
00400 inline
00401 void
00402 AI_TMatrix33SSE::set(float m11, float m12, float m13,
00403 float m21, float m22, float m23,
00404 float m31, float m32, float m33)
00405 {
00406 m1 = _mm_set_ps(0.0f, m13, m12, m11);
00407 m2 = _mm_set_ps(0.0f, m23, m22, m21);
00408 m3 = _mm_set_ps(0.0f, m33, m32, m31);
00409 }
00410
00411
00414 inline
00415 void
00416 AI_TMatrix33SSE::set(const AI_TVector3SSE& v1, const AI_TVector3SSE& v2, const AI_TVector3SSE& v3)
00417 {
00418 m1 = v1.m128;
00419 m2 = v2.m128;
00420 m3 = v3.m128;
00421 }
00422
00423
00426 inline
00427 void
00428 AI_TMatrix33SSE::set(const AI_TMatrix33SSE& mx)
00429 {
00430 m1 = mx.m1;
00431 m2 = mx.m2;
00432 m3 = mx.m3;
00433 }
00434
00435
00438 inline
00439 void
00440 AI_TMatrix33SSE::ident()
00441 {
00442 memcpy(&(m[0][0]), _matrix33_sse_ident, sizeof(_matrix33_sse_ident));
00443 }
00444
00445
00449 inline
00450 void
00451 AI_TMatrix33SSE::transpose()
00452 {
00453 #undef n_swap
00454 #define n_swap(x,y) { float t=x; x=y; y=t; }
00455 n_swap(m[0][1],m[1][0]);
00456 n_swap(m[0][2],m[2][0]);
00457 n_swap(m[1][2],m[2][1]);
00458 }
00459
00460
00464 inline
00465 bool
00466 AI_TMatrix33SSE::orthonorm(float limit)
00467 {
00468 if (((m[0][0]*m[1][0]+m[0][1]*m[1][1]+m[0][2]*m[1][2])<limit) &&
00469 ((m[0][0]*m[2][0]+m[0][1]*m[2][1]+m[0][2]*m[2][2])<limit) &&
00470 ((m[2][0]*m[1][0]+m[2][1]*m[1][1]+m[2][2]*m[1][2])<limit) &&
00471 ((m[0][0]*m[0][0]+m[0][1]*m[0][1]+m[0][2]*m[0][2])>(1.0-limit)) &&
00472 ((m[0][0]*m[0][0]+m[0][1]*m[0][1]+m[0][2]*m[0][2])<(1.0+limit)) &&
00473 ((m[1][0]*m[1][0]+m[1][1]*m[1][1]+m[1][2]*m[1][2])>(1.0-limit)) &&
00474 ((m[1][0]*m[1][0]+m[1][1]*m[1][1]+m[1][2]*m[1][2])<(1.0+limit)) &&
00475 ((m[2][0]*m[2][0]+m[2][1]*m[2][1]+m[2][2]*m[2][2])>(1.0-limit)) &&
00476 ((m[2][0]*m[2][0]+m[2][1]*m[2][1]+m[2][2]*m[2][2])<(1.0+limit)))
00477 return true;
00478 else
00479 return false;
00480 }
00481
00482
00485 inline
00486 void
00487 AI_TMatrix33SSE::scale(const AI_TVector3SSE& s)
00488 {
00489 m1 = _mm_mul_ps(m1, s.m128);
00490 m2 = _mm_mul_ps(m2, s.m128);
00491 m3 = _mm_mul_ps(m3, s.m128);
00492 }
00493
00494
00498 inline
00499 void
00500 AI_TMatrix33SSE::rotate_x(const float a)
00501 {
00502 float c = ai_cos(a);
00503 float s = ai_sin(a);
00504 int i;
00505 for (i=0; i<3; i++)
00506 {
00507 float mi1 = m[i][1];
00508 float mi2 = m[i][2];
00509 m[i][1] = mi1*c + mi2*-s;
00510 m[i][2] = mi1*s + mi2*c;
00511 }
00512 }
00513
00514
00518 inline
00519 void
00520 AI_TMatrix33SSE::rotate_y(const float a)
00521 {
00522 float c = ai_cos(a);
00523 float s = ai_sin(a);
00524 int i;
00525 for (i=0; i<3; i++)
00526 {
00527 float mi0 = m[i][0];
00528 float mi2 = m[i][2];
00529 m[i][0] = mi0*c + mi2*s;
00530 m[i][2] = mi0*-s + mi2*c;
00531 }
00532 }
00533
00534
00538 inline
00539 void
00540 AI_TMatrix33SSE::rotate_z(const float a)
00541 {
00542 float c = ai_cos(a);
00543 float s = ai_sin(a);
00544 int i;
00545 for (i=0; i<3; i++)
00546 {
00547 float mi0 = m[i][0];
00548 float mi1 = m[i][1];
00549 m[i][0] = mi0*c + mi1*-s;
00550 m[i][1] = mi0*s + mi1*c;
00551 }
00552 }
00553
00554
00558 inline
00559 void
00560 AI_TMatrix33SSE::rotate_local_x(const float a)
00561 {
00562 AI_TMatrix33SSE rotM;
00563 rotM.m[1][1] = (float) cos(a); rotM.m[1][2] = -(float) sin(a);
00564 rotM.m[2][1] = (float) sin(a); rotM.m[2][2] = (float) cos(a);
00565
00566 (*this) = rotM * (*this);
00567 }
00568
00569
00573 inline
00574 void
00575 AI_TMatrix33SSE::rotate_local_y(const float a)
00576 {
00577 AI_TMatrix33SSE rotM;
00578 rotM.m[0][0] = (float) cos(a); rotM.m[0][2] = (float) sin(a);
00579 rotM.m[2][0] = -(float) sin(a); rotM.m[2][2] = (float) cos(a);
00580
00581 (*this) = rotM * (*this);
00582 }
00583
00584
00588 inline
00589 void
00590 AI_TMatrix33SSE::rotate_local_z(const float a)
00591 {
00592 AI_TMatrix33SSE rotM;
00593 rotM.m[0][0] = (float) cos(a); rotM.m[0][1] = -(float) sin(a);
00594 rotM.m[1][0] = (float) sin(a); rotM.m[1][1] = (float) cos(a);
00595
00596 (*this) = rotM * (*this);
00597 }
00598
00599
00603 inline
00604 void
00605 AI_TMatrix33SSE::rotate(const AI_TVector3SSE& vec, float a)
00606 {
00607 AI_TVector3SSE v(vec);
00608 v.norm();
00609 float sa = (float) ai_sin(a);
00610 float ca = (float) ai_cos(a);
00611
00612 AI_TMatrix33SSE rotM;
00613 rotM.m[0][0] = ca + (1.0f - ca) * v.x * v.x;
00614 rotM.m[0][1] = (1.0f - ca) * v.x * v.y - sa * v.z;
00615 rotM.m[0][2] = (1.0f - ca) * v.z * v.x + sa * v.y;
00616 rotM.m[1][0] = (1.0f - ca) * v.x * v.y + sa * v.z;
00617 rotM.m[1][1] = ca + (1.0f - ca) * v.y * v.y;
00618 rotM.m[1][2] = (1.0f - ca) * v.y * v.z - sa * v.x;
00619 rotM.m[2][0] = (1.0f - ca) * v.z * v.x - sa * v.y;
00620 rotM.m[2][1] = (1.0f - ca) * v.y * v.z + sa * v.x;
00621 rotM.m[2][2] = ca + (1.0f - ca) * v.z * v.z;
00622
00623 (*this) = (*this) * rotM;
00624 }
00625
00626
00629 inline
00630 AI_TVector3SSE
00631 AI_TMatrix33SSE::x_component() const
00632 {
00633 return AI_TVector3SSE(m1);
00634 }
00635
00636
00639 inline
00640 AI_TVector3SSE
00641 AI_TMatrix33SSE::y_component(void) const
00642 {
00643 return AI_TVector3SSE(m2);
00644 }
00645
00646
00649 inline
00650 AI_TVector3SSE
00651 AI_TMatrix33SSE::z_component(void) const
00652 {
00653 return AI_TVector3SSE(m3);
00654 };
00655
00656
00659 inline
00660 void
00661 AI_TMatrix33SSE::operator *= (const AI_TMatrix33SSE& mx)
00662 {
00663 m1 = _mm_add_ps(
00664 _mm_add_ps(
00665 _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(0,0,0,0)), mx.m1),
00666 _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(1,1,1,1)), mx.m2)),
00667 _mm_mul_ps(_mm_shuffle_ps(m1, m1, _MM_SHUFFLE(2,2,2,2)), mx.m3));
00668
00669 m2 = _mm_add_ps(
00670 _mm_add_ps(
00671 _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(0,0,0,0)), mx.m1),
00672 _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(1,1,1,1)), mx.m2)),
00673 _mm_mul_ps(_mm_shuffle_ps(m2, m2, _MM_SHUFFLE(2,2,2,2)), mx.m3));
00674
00675 m3 = _mm_add_ps(
00676 _mm_add_ps(
00677 _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(0,0,0,0)), mx.m1),
00678 _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(1,1,1,1)), mx.m2)),
00679 _mm_mul_ps(_mm_shuffle_ps(m3, m3, _MM_SHUFFLE(2,2,2,2)), mx.m3));
00680 }
00681
00682
00687 inline
00688 void
00689 AI_TMatrix33SSE::mult(const AI_TVector3SSE& src, AI_TVector3SSE& dst) const
00690 {
00691 dst.m128 = _mm_add_ps(
00692 _mm_add_ps(_mm_mul_ps(_mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(0,0,0,0)), m1),
00693 _mm_mul_ps(_mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(1,1,1,1)), m2)),
00694 _mm_mul_ps(_mm_shuffle_ps(src.m128, src.m128, _MM_SHUFFLE(2,2,2,2)), m3));
00695 }
00696
00697
00698 #endif