AI_TVector4_sse.h

Go to the documentation of this file.
00001 #ifndef AI__VECTOR4_SSE_H
00002 #define AI__VECTOR4_SSE_H
00003 //------------------------------------------------------------------------------
00012 #include <xmmintrin.h>
00013 
00014 class AI_TVector4SSE
00015 {
00016 public:
00018     AI_TVector4SSE();
00020     AI_TVector4SSE(const float _x, const float _y, const float _z, const float _w);
00022     AI_TVector4SSE(const AI_TVector4SSE& vec);
00024     void set(const float _x, const float _y, const float _z, const float _w);
00026     void set(const AI_TVector4SSE& v);
00028     float len() const;
00030     void norm();
00032     void operator +=(const AI_TVector4SSE& v);
00034     void operator -=(const AI_TVector4SSE& v);
00036     void operator *=(const float s);
00038     bool isequal(const AI_TVector4SSE& v, float tol) const;
00040     int compare(const AI_TVector4SSE& v, float tol) const;
00042     void minimum(const AI_TVector4SSE& v);
00044     void maximum(const AI_TVector4SSE& v);
00046     void lerp(const AI_TVector4SSE& v0, float lerpVal);
00048     void lerp(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1, float lerpVal);
00049 
00050     union
00051     {
00052         __m128 m128;
00053         struct
00054         {
00055             float x, y, z, w;
00056         };
00057     };
00058 
00060     AI_TVector4SSE(const __m128& m);
00062     float addhorizontal(const __m128 &a);
00063 };
00064 
00065 //------------------------------------------------------------------------------
00068 inline
00069 AI_TVector4SSE::AI_TVector4SSE()
00070 {
00071     m128 = _mm_setzero_ps();
00072 }
00073 
00074 //------------------------------------------------------------------------------
00077 inline
00078 AI_TVector4SSE::AI_TVector4SSE(const float _x, const float _y, const float _z, const float _w)
00079 {
00080     m128 = _mm_set_ps(_w, _z, _y, _x);
00081 }
00082 
00083 //------------------------------------------------------------------------------
00086 inline
00087 AI_TVector4SSE::AI_TVector4SSE(const AI_TVector4SSE& v)
00088 {
00089     m128 = v.m128;
00090 }
00091 
00092 //------------------------------------------------------------------------------
00095 inline
00096 AI_TVector4SSE::AI_TVector4SSE(const __m128& m) :
00097     m128(m)
00098 {
00099     // empty
00100 }
00101 
00102 //------------------------------------------------------------------------------
00105 inline
00106 void
00107 AI_TVector4SSE::set(const float _x, const float _y, const float _z, const float _w)
00108 {
00109     m128 = _mm_set_ps(_w, _z, _y, _x);
00110 }
00111 
00112 //------------------------------------------------------------------------------
00115 inline
00116 void
00117 AI_TVector4SSE::set(const AI_TVector4SSE& v)
00118 {
00119     m128 = v.m128;
00120 }
00121 
00122 //------------------------------------------------------------------------------
00125 inline
00126 float
00127 AI_TVector4SSE::len() const
00128 {
00129     const int X = 0;
00130     const int Y = 1;
00131     const int Z = 2;
00132     const int W = 3;
00133 
00134     __m128 a = _mm_mul_ps(m128, m128);
00135 
00136     // horizontal add
00137     __m128 b = _mm_add_ss(a, _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z)))));
00138     __m128 l = _mm_sqrt_ss(b);
00139 
00140     return l.m128_f32[X];
00141 }
00142 
00143 //------------------------------------------------------------------------------
00146 inline
00147 void
00148 AI_TVector4SSE::norm()
00149 {
00150     const int X = 0;
00151     const int Y = 1;
00152     const int Z = 2;
00153     const int W = 3;
00154 
00155     // get len
00156     __m128 a = _mm_mul_ps(m128, m128);
00157     __m128 b = _mm_add_ss(a, _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z)))));
00158     
00159     // get reciprocal of square root of squared length
00160     __m128 f = _mm_rsqrt_ss(b);
00161     __m128 oneDivLen = _mm_shuffle_ps(f, f, _MM_SHUFFLE(X, X, X, X));
00162     
00163     m128 = _mm_mul_ps(m128, oneDivLen);
00164 }
00165 
00166 //------------------------------------------------------------------------------
00169 inline
00170 void
00171 AI_TVector4SSE::operator +=(const AI_TVector4SSE& v)
00172 {
00173     m128 = _mm_add_ps(m128, v.m128);
00174 }
00175 
00176 //------------------------------------------------------------------------------
00179 inline
00180 void
00181 AI_TVector4SSE::operator -=(const AI_TVector4SSE& v)
00182 {
00183     m128 = _mm_sub_ps(m128, v.m128);
00184 }
00185 
00186 //------------------------------------------------------------------------------
00189 inline
00190 void
00191 AI_TVector4SSE::operator *=(const float s)
00192 {
00193     __m128 packed = _mm_set1_ps(s);
00194     m128 = _mm_mul_ps(m128, packed);
00195 }
00196 
00197 //------------------------------------------------------------------------------
00200 inline
00201 bool
00202 AI_TVector4SSE::isequal(const AI_TVector4SSE& v, float tol) const
00203 {
00204     if (fabs(v.x - x) > tol)      return false;
00205     else if (fabs(v.y - y) > tol) return false;
00206     else if (fabs(v.z - z) > tol) return false;
00207     else if (fabs(v.w - w) > tol) return false;
00208     return true;
00209 }
00210 
00211 //------------------------------------------------------------------------------
00214 inline
00215 int
00216 AI_TVector4SSE::compare(const AI_TVector4SSE& v, float tol) const
00217 {
00218     if (fabs(v.x - x) > tol)      return (v.x > x) ? +1 : -1; 
00219     else if (fabs(v.y - y) > tol) return (v.y > y) ? +1 : -1;
00220     else if (fabs(v.z - z) > tol) return (v.z > z) ? +1 : -1;
00221     else if (fabs(v.w - w) > tol) return (v.w > w) ? +1 : -1;
00222     else                          return 0;
00223 }
00224 
00225 //------------------------------------------------------------------------------
00228 inline
00229 void
00230 AI_TVector4SSE::minimum(const AI_TVector4SSE& v)
00231 {
00232     if (v.x < x) x = v.x;
00233     if (v.y < y) y = v.y;
00234     if (v.z < z) z = v.z;
00235     if (v.w < w) w = v.w;
00236 }
00237 
00238 //------------------------------------------------------------------------------
00241 inline
00242 void
00243 AI_TVector4SSE::maximum(const AI_TVector4SSE& v)
00244 {
00245     if (v.x > x) x = v.x;
00246     if (v.y > y) y = v.y;
00247     if (v.z > z) z = v.z;
00248     if (v.w > w) w = v.w;
00249 }
00250 
00251 //------------------------------------------------------------------------------
00254 static 
00255 inline 
00256 AI_TVector4SSE operator +(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1) 
00257 {
00258     return AI_TVector4SSE(_mm_add_ps(v0.m128, v1.m128));
00259 }
00260 
00261 //------------------------------------------------------------------------------
00264 static 
00265 inline 
00266 AI_TVector4SSE operator -(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1) 
00267 {
00268     return AI_TVector4SSE(_mm_sub_ps(v0.m128, v1.m128));
00269 }
00270 
00271 //------------------------------------------------------------------------------
00274 static 
00275 inline 
00276 AI_TVector4SSE operator *(const AI_TVector4SSE& v0, const float& s) 
00277 {
00278     __m128 packed = _mm_set1_ps(s);
00279     return AI_TVector4SSE(_mm_mul_ps(v0.m128, packed));
00280 }
00281 
00282 //------------------------------------------------------------------------------
00285 static 
00286 inline 
00287 AI_TVector4SSE operator -(const AI_TVector4SSE& v)
00288 {
00289     __m128 zero = _mm_setzero_ps();
00290     return AI_TVector4SSE(_mm_sub_ps(zero, v.m128));
00291 }
00292 
00293 //------------------------------------------------------------------------------
00296 inline
00297 void
00298 AI_TVector4SSE::lerp(const AI_TVector4SSE& v0, float lerpVal)
00299 {
00300     x = v0.x + ((x - v0.x) * lerpVal);
00301     y = v0.y + ((y - v0.y) * lerpVal);
00302     z = v0.z + ((z - v0.z) * lerpVal);
00303     w = v0.w + ((w - v0.w) * lerpVal);
00304 }
00305 
00306 //------------------------------------------------------------------------------
00309 inline
00310 void
00311 AI_TVector4SSE::lerp(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1, float lerpVal)
00312 {
00313     x = v0.x + ((v1.x - v0.x) * lerpVal);
00314     y = v0.y + ((v1.y - v0.y) * lerpVal);
00315     z = v0.z + ((v1.z - v0.z) * lerpVal);
00316     w = v0.w + ((v1.w - v0.w) * lerpVal);
00317 }
00318 
00319 //------------------------------------------------------------------------------
00320 #endif