00001 #ifndef AI__VECTOR3_SSE_H
00002 #define AI__VECTOR3_SSE_H
00003
00012 #include <xmmintrin.h>
00013 #include <math.h>
00014
00015 class AI_TVector3SSE
00016 {
00017 public:
00019 AI_TVector3SSE();
00021 AI_TVector3SSE(const float _x, const float _y, const float _z);
00023 AI_TVector3SSE(const AI_TVector3SSE& vec);
00025 AI_TVector3SSE(const float* p);
00027 AI_TVector3SSE(const __m128& m);
00029 void set(const float _x, const float _y, const float _z);
00031 void set(const AI_TVector3SSE& vec);
00033 void set(const float* p);
00035 float len() const;
00037 float lensquared() const;
00039 void norm();
00041 void operator +=(const AI_TVector3SSE& v0);
00043 void operator -=(const AI_TVector3SSE& v0);
00045 void operator *=(float s);
00047 bool isequal(const AI_TVector3SSE& v, float tol) const;
00049 int compare(const AI_TVector3SSE& v, float tol) const;
00051 void rotate(const AI_TVector3SSE& axis, float angle);
00053 void lerp(const AI_TVector3SSE& v0, float lerpVal);
00055 void lerp(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1, float lerpVal);
00057 AI_TVector3SSE findortho() const;
00058
00059 union
00060 {
00061 __m128 m128;
00062 struct
00063 {
00064 float x, y, z, pad;
00065 };
00066 };
00067 };
00068
00069
00072 inline
00073 AI_TVector3SSE::AI_TVector3SSE()
00074 {
00075 m128 = _mm_setzero_ps();
00076 }
00077
00078
00081 inline
00082 AI_TVector3SSE::AI_TVector3SSE(const float _x, const float _y, const float _z)
00083 {
00084 m128 = _mm_set_ps(0.0f, _z, _y, _x);
00085 }
00086
00087
00090 inline
00091 AI_TVector3SSE::AI_TVector3SSE(const AI_TVector3SSE& vec)
00092 {
00093 m128 = vec.m128;
00094 }
00095
00096
00099 inline
00100 AI_TVector3SSE::AI_TVector3SSE(const __m128& m)
00101 {
00102 m128 = m;
00103 }
00104
00105
00108 inline
00109 void
00110 AI_TVector3SSE::set(const float _x, const float _y, const float _z)
00111 {
00112 m128 = _mm_set_ps(0.0f, _z, _y, _x);
00113 }
00114
00115
00118 inline
00119 void
00120 AI_TVector3SSE::set(const AI_TVector3SSE& vec)
00121 {
00122 m128 = vec.m128;
00123 }
00124
00125
00128 inline
00129 float
00130 AI_TVector3SSE::len() const
00131 {
00132 static const int X = 0;
00133 static const int Y = 1;
00134 static const int Z = 2;
00135 static const int W = 3;
00136
00137 __m128 a = _mm_mul_ps(m128, m128);
00138
00139
00140 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00141 __m128 l = _mm_sqrt_ss(b);
00142 return l.m128_f32[X];
00143 }
00144
00145
00148 inline
00149 float
00150 AI_TVector3SSE::lensquared() const
00151 {
00152 static const int X = 0;
00153 static const int Y = 1;
00154 static const int Z = 2;
00155 static const int W = 3;
00156
00157 __m128 a = _mm_mul_ps(m128, m128);
00158 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00159 return b.m128_f32[X];
00160 }
00161
00162
00165 inline
00166 void
00167 AI_TVector3SSE::norm()
00168 {
00169 static const int X = 0;
00170 static const int Y = 1;
00171 static const int Z = 2;
00172 static const int W = 3;
00173
00174 __m128 a = _mm_mul_ps(m128, m128);
00175
00176
00177 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(X,X,X,X)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(Y,Y,Y,Y)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(Z,Z,Z,Z))));
00178
00179
00180 __m128 f = _mm_rsqrt_ss(b);
00181 __m128 oneDivLen = _mm_shuffle_ps(f, f, _MM_SHUFFLE(X,X,X,X));
00182
00183 m128 = _mm_mul_ps(m128, oneDivLen);
00184 }
00185
00186
00189 inline
00190 void
00191 AI_TVector3SSE::operator +=(const AI_TVector3SSE& v)
00192 {
00193 m128 = _mm_add_ps(m128, v.m128);
00194 }
00195
00196
00199 inline
00200 void
00201 AI_TVector3SSE::operator -=(const AI_TVector3SSE& v)
00202 {
00203 m128 = _mm_sub_ps(m128, v.m128);
00204 }
00205
00206
00209 inline
00210 void
00211 AI_TVector3SSE::operator *=(float s)
00212 {
00213 __m128 packed = _mm_set1_ps(s);
00214 m128 = _mm_mul_ps(m128, packed);
00215 }
00216
00217
00220 inline
00221 bool
00222 AI_TVector3SSE::isequal(const AI_TVector3SSE& v, float tol) const
00223 {
00224 if (fabs(v.x - x) > tol) return false;
00225 else if (fabs(v.y - y) > tol) return false;
00226 else if (fabs(v.z - z) > tol) return false;
00227 return true;
00228 }
00229
00230
00233 inline
00234 int
00235 AI_TVector3SSE::compare(const AI_TVector3SSE& v, float tol) const
00236 {
00237 if (fabs(v.x - x) > tol) return (v.x > x) ? +1 : -1;
00238 else if (fabs(v.y - y) > tol) return (v.y > y) ? +1 : -1;
00239 else if (fabs(v.z - z) > tol) return (v.z > z) ? +1 : -1;
00240 else return 0;
00241 }
00242
00243
00246 inline
00247 void
00248 AI_TVector3SSE::rotate(const AI_TVector3SSE& axis, float angle)
00249 {
00250
00251
00252 float rotM[9];
00253 float sa, ca;
00254
00255 sa = (float) sin(angle);
00256 ca = (float) cos(angle);
00257
00258
00259 rotM[0] = ca + (1 - ca) * axis.x * axis.x;
00260 rotM[1] = (1 - ca) * axis.x * axis.y - sa * axis.z;
00261 rotM[2] = (1 - ca) * axis.z * axis.x + sa * axis.y;
00262 rotM[3] = (1 - ca) * axis.x * axis.y + sa * axis.z;
00263 rotM[4] = ca + (1 - ca) * axis.y * axis.y;
00264 rotM[5] = (1 - ca) * axis.y * axis.z - sa * axis.x;
00265 rotM[6] = (1 - ca) * axis.z * axis.x - sa * axis.y;
00266 rotM[7] = (1 - ca) * axis.y * axis.z + sa * axis.x;
00267 rotM[8] = ca + (1 - ca) * axis.z * axis.z;
00268
00269
00270 AI_TVector3SSE help(rotM[0] * this->x + rotM[1] * this->y + rotM[2] * this->z,
00271 rotM[3] * this->x + rotM[4] * this->y + rotM[5] * this->z,
00272 rotM[6] * this->x + rotM[7] * this->y + rotM[8] * this->z);
00273 *this = help;
00274 }
00275
00276
00279 static
00280 inline
00281 AI_TVector3SSE operator +(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1)
00282 {
00283 return AI_TVector3SSE(_mm_add_ps(v0.m128, v1.m128));
00284 }
00285
00286
00289 static
00290 inline
00291 AI_TVector3SSE operator -(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1)
00292 {
00293 return AI_TVector3SSE(_mm_sub_ps(v0.m128, v1.m128));
00294 }
00295
00296
00299 static
00300 inline
00301 AI_TVector3SSE operator *(const AI_TVector3SSE& v0, const float s)
00302 {
00303 __m128 packed = _mm_set1_ps(s);
00304 return AI_TVector3SSE(_mm_mul_ps(v0.m128, packed));
00305 }
00306
00307
00310 static
00311 inline
00312 AI_TVector3SSE operator -(const AI_TVector3SSE& v)
00313 {
00314 __m128 zero = _mm_setzero_ps();
00315 return AI_TVector3SSE(_mm_sub_ps(zero, v.m128));
00316 }
00317
00318
00322 static
00323 inline
00324 float operator %(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1)
00325 {
00326 __m128 a = _mm_mul_ps(v0.m128, v1.m128);
00327 __m128 b = _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(0,0,0,0)), _mm_add_ss(_mm_shuffle_ps(a, a, _MM_SHUFFLE(1,1,1,1)), _mm_shuffle_ps(a, a, _MM_SHUFFLE(2,2,2,2))));
00328 return b.m128_f32[0];
00329 }
00330
00331
00335 static
00336 inline
00337 AI_TVector3SSE operator *(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1)
00338 {
00339
00340
00341
00342
00343
00344
00345
00346
00347
00348
00349 static const int X = 0;
00350 static const int Y = 1;
00351 static const int Z = 2;
00352 static const int W = 3;
00353
00354 __m128 a = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, X, Z, Y));
00355 __m128 b = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, Y, X, Z));
00356 __m128 c = _mm_shuffle_ps(v0.m128, v0.m128, _MM_SHUFFLE(W, Y, X, Z));
00357 __m128 d = _mm_shuffle_ps(v1.m128, v1.m128, _MM_SHUFFLE(W, X, Z, Y));
00358
00359 __m128 e = _mm_mul_ps(a, b);
00360 __m128 f = _mm_mul_ps(c, d);
00361
00362 return AI_TVector3SSE(_mm_sub_ps(e, f));
00363 }
00364
00365
00368 inline
00369 void
00370 AI_TVector3SSE::lerp(const AI_TVector3SSE& v0, float lerpVal)
00371 {
00372 x = v0.x + ((x - v0.x) * lerpVal);
00373 y = v0.y + ((y - v0.y) * lerpVal);
00374 z = v0.z + ((z - v0.z) * lerpVal);
00375 }
00376
00377
00380 inline
00381 void
00382 AI_TVector3SSE::lerp(const AI_TVector3SSE& v0, const AI_TVector3SSE& v1, float lerpVal)
00383 {
00384 x = v0.x + ((v1.x - v0.x) * lerpVal);
00385 y = v0.y + ((v1.y - v0.y) * lerpVal);
00386 z = v0.z + ((v1.z - v0.z) * lerpVal);
00387 }
00388
00389
00394 inline
00395 AI_TVector3SSE
00396 AI_TVector3SSE::findortho() const
00397 {
00398 if (0.0 != x)
00399 {
00400 return AI_TVector3SSE((-y - z) / x, 1.0, 1.0);
00401 } else
00402 if (0.0 != y)
00403 {
00404 return AI_TVector3SSE(1.0, (-x - z) / y, 1.0);
00405 } else
00406 if (0.0 != z)
00407 {
00408 return AI_TVector3SSE(1.0, 1.0, (-x - y) / z);
00409 } else
00410 {
00411 return AI_TVector3SSE(0.0, 0.0, 0.0);
00412 }
00413 }
00414
00415
00416 #endif