AI_TVector3_sse.h

Go to the documentation of this file.
00001 #ifndef AI__VECTOR3_SSE_H
00002 #define AI__VECTOR3_SSE_H
00003 //------------------------------------------------------------------------------
00012 #include <xmmintrin.h>
00013 #include <math.h>
00014 
00015 class AI_TVector3SSE
00016 {
00017 public:
00019     AI_TVector3SSE();
00021     AI_TVector3SSE(const float _x, const float _y, const float _z);
00023     AI_TVector3SSE(const AI_TVector3SSE& vec);
00025     AI_TVector3SSE(const float* p);
00027     AI_TVector3SSE(const __m128& m);
00029     void set(const float _x, const float _y, const float _z);
00031     void set(const AI_TVector3SSE& vec);
00033     void set(const float* p);
00035     float len() const;
00037     float lensquared() const;
00039     void norm();
00041     void operator +=(const AI_TVector3SSE& v0);
00043     void operator -=(const AI_TVector3SSE& v0);
00045     void operator *=(float s);
00047     bool isequal(const AI_TVector3SSE& v, float tol) const;
00049     int compare(const AI_TVector3SSE& v, float tol) const;
00051     void rotate(const AI_TVector3SSE& axis, float angle);
00053     void lerp(const AI_TVector3SSE& v0, float lerpVal);
00055     void lerp(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1, float lerpVal);
00057     AI_TVector3SSE findortho() const;
00058 
00059     union
00060     {
00061         __m128 m128;
00062         struct
00063         {
00064             float x, y, z, pad;
00065         };
00066     };
00067 };
00068 
00069 //------------------------------------------------------------------------------
00072 inline
00073 AI_TVector3SSE::AI_TVector3SSE()
00074 {
00075     m128 = _mm_setzero_ps();
00076 }
00077 
00078 //------------------------------------------------------------------------------
00081 inline
00082 AI_TVector3SSE::AI_TVector3SSE(const float _x, const float _y, const float _z)
00083 {
00084     m128 = _mm_set_ps(0.0f, _z, _y, _x);
00085 }
00086 
00087 //------------------------------------------------------------------------------
00090 inline
00091 AI_TVector3SSE::AI_TVector3SSE(const AI_TVector3SSE& vec)
00092 {
00093     m128 = vec.m128;
00094 }
00095 
00096 //------------------------------------------------------------------------------
00099 inline
00100 AI_TVector3SSE::AI_TVector3SSE(const __m128& m)
00101 {
00102     m128 = m;
00103 }
00104 
00105 //------------------------------------------------------------------------------
00108 inline
00109 void
00110 AI_TVector3SSE::set(const float _x, const float _y, const float _z)
00111 {
00112     m128 = _mm_set_ps(0.0f, _z, _y, _x);
00113 }
00114 
00115 //------------------------------------------------------------------------------
00118 inline
00119 void
00120 AI_TVector3SSE::set(const AI_TVector3SSE& vec)
00121 {
00122     m128 = vec.m128;
00123 }
00124 
00125 //------------------------------------------------------------------------------
00128 inline
00129 float
00130 AI_TVector3SSE::len() const
00131 {
00132     static const int X = 0;
00133     static const int Y = 1;
00134     static const int Z = 2;
00135     static const int W = 3;
00136 
00137     __m128 a = _mm_mul_ps(m128, m128);
00138 
00139     // horizontal add
00140     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00141     __m128 l = _mm_sqrt_ss(b);
00142     return l.m128_f32[X];
00143 }
00144 
00145 //------------------------------------------------------------------------------
00148 inline
00149 float 
00150 AI_TVector3SSE::lensquared() const
00151 {
00152     static const int X = 0;
00153     static const int Y = 1;
00154     static const int Z = 2;
00155     static const int W = 3;
00156 
00157     __m128 a = _mm_mul_ps(m128, m128);
00158     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00159     return b.m128_f32[X];
00160 }
00161 
00162 //------------------------------------------------------------------------------
00165 inline
00166 void
00167 AI_TVector3SSE::norm()
00168 {
00169     static const int X = 0;
00170     static const int Y = 1;
00171     static const int Z = 2;
00172     static const int W = 3;
00173 
00174     __m128 a = _mm_mul_ps(m128, m128);
00175 
00176     // horizontal add
00177     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00178 
00179     // get reciprocal of square root of squared length
00180     __m128 f = _mm_rsqrt_ss(b);
00181     __m128 oneDivLen = _mm_shuffle_ps(f, f, _MM_SHUFFLE(X,X,X,X));
00182     
00183     m128 = _mm_mul_ps(m128, oneDivLen);
00184 }
00185 
00186 //------------------------------------------------------------------------------
00189 inline
00190 void
00191 AI_TVector3SSE::operator +=(const AI_TVector3SSE& v)
00192 {
00193     m128 = _mm_add_ps(m128, v.m128);
00194 }
00195 
00196 //------------------------------------------------------------------------------
00199 inline
00200 void
00201 AI_TVector3SSE::operator -=(const AI_TVector3SSE& v)
00202 {
00203     m128 = _mm_sub_ps(m128, v.m128);
00204 }
00205 
00206 //------------------------------------------------------------------------------
00209 inline
00210 void
00211 AI_TVector3SSE::operator *=(float s)
00212 {
00213     __m128 packed = _mm_set1_ps(s);
00214     m128 = _mm_mul_ps(m128, packed);
00215 }
00216 
00217 //------------------------------------------------------------------------------
00220 inline
00221 bool
00222 AI_TVector3SSE::isequal(const AI_TVector3SSE& v, float tol) const
00223 {
00224     if (fabs(v.x - x) > tol)      return false;
00225     else if (fabs(v.y - y) > tol) return false;
00226     else if (fabs(v.z - z) > tol) return false;
00227     return true;
00228 }
00229 
00230 //------------------------------------------------------------------------------
00233 inline
00234 int
00235 AI_TVector3SSE::compare(const AI_TVector3SSE& v, float tol) const
00236 {
00237     if (fabs(v.x - x) > tol)      return (v.x > x) ? +1 : -1; 
00238     else if (fabs(v.y - y) > tol) return (v.y > y) ? +1 : -1;
00239     else if (fabs(v.z - z) > tol) return (v.z > z) ? +1 : -1;
00240     else                          return 0;
00241 }
00242 
00243 //------------------------------------------------------------------------------
00246 inline
00247 void
00248 AI_TVector3SSE::rotate(const AI_TVector3SSE& axis, float angle)
00249 {
00250     // rotates this one around given vector. We do
00251     // rotation with matrices, but these aren't defined yet!
00252     float rotM[9];
00253     float sa, ca;
00254 
00255     sa = (float) sin(angle);
00256     ca = (float) cos(angle);
00257 
00258     // build a rotation matrix
00259     rotM[0] = ca + (1 - ca) * axis.x * axis.x;
00260     rotM[1] = (1 - ca) * axis.x * axis.y - sa * axis.z;
00261     rotM[2] = (1 - ca) * axis.z * axis.x + sa * axis.y;
00262     rotM[3] = (1 - ca) * axis.x * axis.y + sa * axis.z;
00263     rotM[4] = ca + (1 - ca) * axis.y * axis.y;
00264     rotM[5] = (1 - ca) * axis.y * axis.z - sa * axis.x;
00265     rotM[6] = (1 - ca) * axis.z * axis.x - sa * axis.y;
00266     rotM[7] = (1 - ca) * axis.y * axis.z + sa * axis.x;
00267     rotM[8] = ca + (1 - ca) * axis.z * axis.z;
00268 
00269     // "handmade" multiplication
00270     AI_TVector3SSE help(rotM[0] * this->x + rotM[1] * this->y + rotM[2] * this->z,
00271                       rotM[3] * this->x + rotM[4] * this->y + rotM[5] * this->z,
00272                       rotM[6] * this->x + rotM[7] * this->y + rotM[8] * this->z);
00273     *this = help;
00274 }
00275 
00276 //------------------------------------------------------------------------------
00279 static 
00280 inline 
00281 AI_TVector3SSE operator +(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1) 
00282 {
00283     return AI_TVector3SSE(_mm_add_ps(v0.m128, v1.m128));
00284 }
00285 
00286 //------------------------------------------------------------------------------
00289 static 
00290 inline 
00291 AI_TVector3SSE operator -(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1) 
00292 {
00293     return AI_TVector3SSE(_mm_sub_ps(v0.m128, v1.m128));
00294 }
00295 
00296 //------------------------------------------------------------------------------
00299 static 
00300 inline 
00301 AI_TVector3SSE operator *(const AI_TVector3SSE& v0, const float s) 
00302 {
00303     __m128 packed = _mm_set1_ps(s);
00304     return AI_TVector3SSE(_mm_mul_ps(v0.m128, packed));
00305 }
00306 
00307 //------------------------------------------------------------------------------
00310 static 
00311 inline 
00312 AI_TVector3SSE operator -(const AI_TVector3SSE& v) 
00313 {
00314     __m128 zero = _mm_setzero_ps();
00315     return AI_TVector3SSE(_mm_sub_ps(zero, v.m128));
00316 }
00317 
00318 //------------------------------------------------------------------------------
00322 static
00323 inline
00324 float operator %(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1)
00325 {
00326     __m128 a = _mm_mul_ps(v0.m128, v1.m128);
00327     __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0,0,0,0)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2,2,2,2))));
00328     return b.m128_f32[0];
00329 }
00330 
00331 //------------------------------------------------------------------------------
00335 static 
00336 inline 
00337 AI_TVector3SSE operator *(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1) 
00338 {
00339     // x = v0.y * v1.z - v0.z * v1.y
00340     // y = v0.z * v1.x - v0.x * v1.z
00341     // z = v0.x * v1.y - v0.y * v1.x
00342     //
00343     // a = v0.y | v0.z | v0.x | xxx
00344     // b = v1.z | v1.x | v1.y | xxx
00345     // c = v0.z | v0.x | v0.y | xxx
00346     // d = v1.y | v1.z | v1.x | xxx
00347     //
00348 
00349     static const int X = 0;
00350     static const int Y = 1;
00351     static const int Z = 2;
00352     static const int W = 3;
00353 
00354     __m128 a = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, X, Z, Y));
00355     __m128 b = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, Y, X, Z));
00356     __m128 c = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, Y, X, Z));
00357     __m128 d = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, X, Z, Y));
00358 
00359     __m128 e = _mm_mul_ps(a, b);
00360     __m128 f = _mm_mul_ps(c, d);
00361 
00362     return AI_TVector3SSE(_mm_sub_ps(e, f));
00363 }
00364 
00365 //------------------------------------------------------------------------------
00368 inline
00369 void
00370 AI_TVector3SSE::lerp(const AI_TVector3SSE& v0, float lerpVal)
00371 {
00372     x = v0.x + ((x - v0.x) * lerpVal);
00373     y = v0.y + ((y - v0.y) * lerpVal);
00374     z = v0.z + ((z - v0.z) * lerpVal);
00375 }
00376 
00377 //------------------------------------------------------------------------------
00380 inline
00381 void
00382 AI_TVector3SSE::lerp(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1, float lerpVal)
00383 {
00384     x = v0.x + ((v1.x - v0.x) * lerpVal);
00385     y = v0.y + ((v1.y - v0.y) * lerpVal);
00386     z = v0.z + ((v1.z - v0.z) * lerpVal);
00387 }
00388 
00389 //------------------------------------------------------------------------------
00394 inline
00395 AI_TVector3SSE
00396 AI_TVector3SSE::findortho() const
00397 {
00398     if (0.0 != x)
00399     {
00400         return AI_TVector3SSE((-y - z) / x, 1.0, 1.0);
00401     } else
00402     if (0.0 != y)
00403     {
00404         return AI_TVector3SSE(1.0, (-x - z) / y, 1.0);
00405     } else
00406     if (0.0 != z)
00407     {
00408         return AI_TVector3SSE(1.0, 1.0, (-x - y) / z);
00409     } else
00410     {
00411         return AI_TVector3SSE(0.0, 0.0, 0.0);
00412     }
00413 }
00414 
00415 //------------------------------------------------------------------------------
00416 #endif