00001 #ifndef AI__VECTOR4_SSE_H
00002 #define AI__VECTOR4_SSE_H
00003
00012 #include <xmmintrin.h>
00013
00014 class AI_TVector4SSE
00015 {
00016 public:
00018 AI_TVector4SSE();
00020 AI_TVector4SSE(const float _x, const float _y, const float _z, const float _w);
00022 AI_TVector4SSE(const AI_TVector4SSE& vec);
00024 void set(const float _x, const float _y, const float _z, const float _w);
00026 void set(const AI_TVector4SSE& v);
00028 float len() const;
00030 void norm();
00032 void operator +=(const AI_TVector4SSE& v);
00034 void operator -=(const AI_TVector4SSE& v);
00036 void operator *=(const float s);
00038 bool isequal(const AI_TVector4SSE& v, float tol) const;
00040 int compare(const AI_TVector4SSE& v, float tol) const;
00042 void minimum(const AI_TVector4SSE& v);
00044 void maximum(const AI_TVector4SSE& v);
00046 void lerp(const AI_TVector4SSE& v0, float lerpVal);
00048 void lerp(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1, float lerpVal);
00049
00050 union
00051 {
00052 __m128 m128;
00053 struct
00054 {
00055 float x, y, z, w;
00056 };
00057 };
00058
00060 AI_TVector4SSE(const __m128& m);
00062 float addhorizontal(const __m128 &a);
00063 };
00064
00065
00068 inline
00069 AI_TVector4SSE::AI_TVector4SSE()
00070 {
00071 m128 = _mm_setzero_ps();
00072 }
00073
00074
00077 inline
00078 AI_TVector4SSE::AI_TVector4SSE(const float _x, const float _y, const float _z, const float _w)
00079 {
00080 m128 = _mm_set_ps(_w, _z, _y, _x);
00081 }
00082
00083
00086 inline
00087 AI_TVector4SSE::AI_TVector4SSE(const AI_TVector4SSE& v)
00088 {
00089 m128 = v.m128;
00090 }
00091
00092
00095 inline
00096 AI_TVector4SSE::AI_TVector4SSE(const __m128& m) :
00097 m128(m)
00098 {
00099
00100 }
00101
00102
00105 inline
00106 void
00107 AI_TVector4SSE::set(const float _x, const float _y, const float _z, const float _w)
00108 {
00109 m128 = _mm_set_ps(_w, _z, _y, _x);
00110 }
00111
00112
00115 inline
00116 void
00117 AI_TVector4SSE::set(const AI_TVector4SSE& v)
00118 {
00119 m128 = v.m128;
00120 }
00121
00122
00125 inline
00126 float
00127 AI_TVector4SSE::len() const
00128 {
00129 const int X = 0;
00130 const int Y = 1;
00131 const int Z = 2;
00132 const int W = 3;
00133
00134 __m128 a = _mm_mul_ps(m128, m128);
00135
00136
00137 __m128 b = _mm_add_ss(a, _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z)))));
00138 __m128 l = _mm_sqrt_ss(b);
00139
00140 return l.m128_f32[X];
00141 }
00142
00143
00146 inline
00147 void
00148 AI_TVector4SSE::norm()
00149 {
00150 const int X = 0;
00151 const int Y = 1;
00152 const int Z = 2;
00153 const int W = 3;
00154
00155
00156 __m128 a = _mm_mul_ps(m128, m128);
00157 __m128 b = _mm_add_ss(a, _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z)))));
00158
00159
00160 __m128 f = _mm_rsqrt_ss(b);
00161 __m128 oneDivLen = _mm_shuffle_ps(f, f, _MM_SHUFFLE(X, X, X, X));
00162
00163 m128 = _mm_mul_ps(m128, oneDivLen);
00164 }
00165
00166
00169 inline
00170 void
00171 AI_TVector4SSE::operator +=(const AI_TVector4SSE& v)
00172 {
00173 m128 = _mm_add_ps(m128, v.m128);
00174 }
00175
00176
00179 inline
00180 void
00181 AI_TVector4SSE::operator -=(const AI_TVector4SSE& v)
00182 {
00183 m128 = _mm_sub_ps(m128, v.m128);
00184 }
00185
00186
00189 inline
00190 void
00191 AI_TVector4SSE::operator *=(const float s)
00192 {
00193 __m128 packed = _mm_set1_ps(s);
00194 m128 = _mm_mul_ps(m128, packed);
00195 }
00196
00197
00200 inline
00201 bool
00202 AI_TVector4SSE::isequal(const AI_TVector4SSE& v, float tol) const
00203 {
00204 if (fabs(v.x - x) > tol) return false;
00205 else if (fabs(v.y - y) > tol) return false;
00206 else if (fabs(v.z - z) > tol) return false;
00207 else if (fabs(v.w - w) > tol) return false;
00208 return true;
00209 }
00210
00211
00214 inline
00215 int
00216 AI_TVector4SSE::compare(const AI_TVector4SSE& v, float tol) const
00217 {
00218 if (fabs(v.x - x) > tol) return (v.x > x) ? +1 : -1;
00219 else if (fabs(v.y - y) > tol) return (v.y > y) ? +1 : -1;
00220 else if (fabs(v.z - z) > tol) return (v.z > z) ? +1 : -1;
00221 else if (fabs(v.w - w) > tol) return (v.w > w) ? +1 : -1;
00222 else return 0;
00223 }
00224
00225
00228 inline
00229 void
00230 AI_TVector4SSE::minimum(const AI_TVector4SSE& v)
00231 {
00232 if (v.x < x) x = v.x;
00233 if (v.y < y) y = v.y;
00234 if (v.z < z) z = v.z;
00235 if (v.w < w) w = v.w;
00236 }
00237
00238
00241 inline
00242 void
00243 AI_TVector4SSE::maximum(const AI_TVector4SSE& v)
00244 {
00245 if (v.x > x) x = v.x;
00246 if (v.y > y) y = v.y;
00247 if (v.z > z) z = v.z;
00248 if (v.w > w) w = v.w;
00249 }
00250
00251
00254 static
00255 inline
00256 AI_TVector4SSE operator +(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1)
00257 {
00258 return AI_TVector4SSE(_mm_add_ps(v0.m128, v1.m128));
00259 }
00260
00261
00264 static
00265 inline
00266 AI_TVector4SSE operator -(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1)
00267 {
00268 return AI_TVector4SSE(_mm_sub_ps(v0.m128, v1.m128));
00269 }
00270
00271
00274 static
00275 inline
00276 AI_TVector4SSE operator *(const AI_TVector4SSE& v0, const float& s)
00277 {
00278 __m128 packed = _mm_set1_ps(s);
00279 return AI_TVector4SSE(_mm_mul_ps(v0.m128, packed));
00280 }
00281
00282
00285 static
00286 inline
00287 AI_TVector4SSE operator -(const AI_TVector4SSE& v)
00288 {
00289 __m128 zero = _mm_setzero_ps();
00290 return AI_TVector4SSE(_mm_sub_ps(zero, v.m128));
00291 }
00292
00293
00296 inline
00297 void
00298 AI_TVector4SSE::lerp(const AI_TVector4SSE& v0, float lerpVal)
00299 {
00300 x = v0.x + ((x - v0.x) * lerpVal);
00301 y = v0.y + ((y - v0.y) * lerpVal);
00302 z = v0.z + ((z - v0.z) * lerpVal);
00303 w = v0.w + ((w - v0.w) * lerpVal);
00304 }
00305
00306
00309 inline
00310 void
00311 AI_TVector4SSE::lerp(const AI_TVector4SSE& v0, const AI_TVector4SSE& v1, float lerpVal)
00312 {
00313 x = v0.x + ((v1.x - v0.x) * lerpVal);
00314 y = v0.y + ((v1.y - v0.y) * lerpVal);
00315 z = v0.z + ((v1.z - v0.z) * lerpVal);
00316 w = v0.w + ((v1.w - v0.w) * lerpVal);
00317 }
00318
00319
00320 #endif