Text archives Help
- From: sparker@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1114 - in trunk: Engine/Shadows Interface Model/Materials Model/Primitives
- Date: Fri, 9 Jun 2006 17:47:08 -0600 (MDT)
Author: sparker
Date: Fri Jun 9 17:47:05 2006
New Revision: 1114
Modified:
trunk/Engine/Shadows/HardShadows.cc
trunk/Interface/RayPacket.h
trunk/Model/Materials/Phong.cc
trunk/Model/Primitives/Parallelogram.cc
trunk/Model/Primitives/Sphere.cc
Log:
Fix 64-bit SSE build
Modified: trunk/Engine/Shadows/HardShadows.cc
==============================================================================
--- trunk/Engine/Shadows/HardShadows.cc (original)
+++ trunk/Engine/Shadows/HardShadows.cc Fri Jun 9 17:47:05 2006
@@ -100,7 +100,12 @@
__m128 dir = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, normalx),
_mm_mul_ps(dy, normaly)), _mm_mul_ps(dz, normalz));
__m128 mask = _mm_cmple_ps(dir, _mm_setzero_ps());
+#ifdef __x86_64
+ _mm_store_ps((float*)&shadowData->hitMatl[i], _mm_unpacklo_ps(mask,
mask));
+ _mm_store_ps((float*)&shadowData->hitMatl[i+2], _mm_unpackhi_ps(mask,
mask));
+#else
_mm_store_ps((float*)&shadowData->hitMatl[i], mask);
+#endif
if(_mm_movemask_ps(mask) != 0xf){
// Mask is inverted to make our life easier. 1 means do not
compute shadow ray
Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Fri Jun 9 17:47:05 2006
@@ -376,7 +376,12 @@
data->minT[i] = MAXT;
}
for(;i<e;i+=4){
+#ifdef __x86_64
_mm_store_ps((float*)&data->hitMatl[i], _mm_setzero_ps());
+ _mm_store_ps((float*)&data->hitMatl[i+2], _mm_setzero_ps());
+#else
+ _mm_store_ps((float*)&data->hitMatl[i], _mm_setzero_ps());
+#endif
_mm_store_ps(&data->minT[i], _mm_set1_ps(MAXT));
}
for(;i<rayEnd;i++){
Modified: trunk/Model/Materials/Phong.cc
==============================================================================
--- trunk/Model/Materials/Phong.cc (original)
+++ trunk/Model/Materials/Phong.cc Fri Jun 9 17:47:05 2006
@@ -184,7 +184,13 @@
RayPacketData* data = rays.data;
RayPacketData* shadowData = shadowRays.data;
for(;i<e;i+=4){
+#ifdef __x86_64
+ __m128 masklo =
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i]), _mm_setzero_pd());
+ __m128 maskhi =
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i+2]),
_mm_setzero_pd());
+ __m128 mask = _mm_shuffle_ps(masklo, maskhi, _MM_SHUFFLE(2, 0, 2, 0));
+#else
__m128 mask =
_mm_cmpeq_ps(_mm_load_ps((float*)&shadowData->hitMatl[i]), _mm_setzero_ps());
+#endif
if(_mm_movemask_ps(mask) == 0)
continue;
// Not in shadow, so compute the direct and specular contributions.
Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc (original)
+++ trunk/Model/Primitives/Parallelogram.cc Fri Jun 9 17:47:05 2006
@@ -135,14 +135,35 @@
if(_mm_movemask_ps(hit) == 15){
_mm_store_ps(&data->minT[i], t);
+#ifdef __x86_64
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi64x((long)getTexCoordMapper()));
+ _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
_mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
_mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
_mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
} else {
_mm_maskmoveu_si128((__m128i)t, (__m128i)hit,
(char*)&data->minT[i]);
+#ifdef __x86_64
+ __m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
+ __m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit,
(char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit,
(char*)&data->hitMatl[i+2]);
+
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit,
(char*)&data->hitPrim[i+2]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
lohit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
hihit, (char*)&data->hitTex[i+2]);
+#else
_mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
_mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
_mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
}
}
for(;i<rays.rayEnd;i++){
@@ -276,10 +297,38 @@
if(_mm_movemask_ps(hit) == 0)
continue;
- _mm_maskmoveu_si128((__m128i)t, (__m128i)hit, (char*)&data->minT[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+ if(_mm_movemask_ps(hit) == 15){
+ _mm_store_ps(&data->minT[i], t);
+#ifdef __x86_64
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi64x((long)getTexCoordMapper()));
+ _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+ } else {
+ _mm_maskmoveu_si128((__m128i)t, (__m128i)hit,
(char*)&data->minT[i]);
+#ifdef __x86_64
+ __m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
+ __m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit,
(char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit,
(char*)&data->hitMatl[i+2]);
+
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit,
(char*)&data->hitPrim[i+2]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
lohit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
hihit, (char*)&data->hitTex[i+2]);
+#else
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+ }
}
for(;i<rays.rayEnd;i++){
Vector dir = rays.getDirection(i);
Modified: trunk/Model/Primitives/Sphere.cc
==============================================================================
--- trunk/Model/Primitives/Sphere.cc (original)
+++ trunk/Model/Primitives/Sphere.cc Fri Jun 9 17:47:05 2006
@@ -120,17 +120,38 @@
__m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0,
_mm_set1_ps(T_EPSILON)));
if(_mm_movemask_ps(hit0) != 0){
hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0,
_mm_load_ps(&data->minT[i])));
- if(_mm_movemask_ps(hit0) == 15){
- _mm_store_ps(&data->minT[i], t0);
- _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
- _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
- } else if(_mm_movemask_ps(hit0) != 0) {
- _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0,
(char*)&data->minT[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit0, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit0, (char*)&data->hitTex[i]);
- }
+ if(_mm_movemask_ps(hit0) == 15){
+ _mm_store_ps(&data->minT[i], t0);
+#ifdef __x86_64
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi64x((long)getTexCoordMapper()));
+ _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+ } else {
+ _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0,
(char*)&data->minT[i]);
+#ifdef __x86_64
+ __m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
+ __m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()),
lohit, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
+
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit,
(char*)&data->hitPrim[i+2]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
lohit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
hihit, (char*)&data->hitTex[i+2]);
+#else
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+ }
// Mask off rays that successfully hit at t0
hit = _mm_andnot_ps(hit, hit0);
if(_mm_movemask_ps(hit) == 0)
@@ -140,17 +161,38 @@
__m128 t1 = _mm_sub_ps(r, B);
__m128 hit1 = _mm_and_ps(hit, _mm_cmpgt_ps(t1,
_mm_set1_ps(T_EPSILON)));
hit1 = _mm_and_ps(hit1, _mm_cmplt_ps(t1,
_mm_load_ps(&data->minT[i])));
- if(_mm_movemask_ps(hit1) == 15){
- _mm_store_ps(&data->minT[i], t1);
- _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
- _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
- } else if(_mm_movemask_ps(hit1) != 0){
- _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1,
(char*)&data->minT[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit1, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit1, (char*)&data->hitTex[i]);
- }
+ if(_mm_movemask_ps(hit1) == 15){
+ _mm_store_ps(&data->minT[i], t1);
+#ifdef __x86_64
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi64x((long)getTexCoordMapper()));
+ _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+ } else {
+ _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1,
(char*)&data->minT[i]);
+#ifdef __x86_64
+ __m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
+ __m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit,
(char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit,
(char*)&data->hitMatl[i+2]);
+
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit,
(char*)&data->hitPrim[i+2]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
lohit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
hihit, (char*)&data->hitTex[i+2]);
+#else
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+ }
}
for(;i<rays.rayEnd;i++){
Vector D(rays.getDirection(i));
@@ -273,17 +315,38 @@
__m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0,
_mm_set1_ps(T_EPSILON)));
if(_mm_movemask_ps(hit0) != 0){
hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0,
_mm_load_ps(&data->minT[i])));
- if(_mm_movemask_ps(hit0) == 15){
- _mm_store_ps(&data->minT[i], t0);
- _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
- _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
- } else if(_mm_movemask_ps(hit0) != 0) {
- _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0,
(char*)&data->minT[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit0, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit0, (char*)&data->hitTex[i]);
- }
+ if(_mm_movemask_ps(hit0) == 15){
+ _mm_store_ps(&data->minT[i], t0);
+#ifdef __x86_64
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi64x((long)getTexCoordMapper()));
+ _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+ } else {
+ _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0,
(char*)&data->minT[i]);
+#ifdef __x86_64
+ __m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
+ __m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()),
lohit, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
+
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit,
(char*)&data->hitPrim[i+2]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
lohit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
hihit, (char*)&data->hitTex[i+2]);
+#else
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+ }
// Mask off rays that successfully hit at t0
hit = _mm_andnot_ps(hit, hit0);
if(_mm_movemask_ps(hit) == 0)
@@ -293,17 +356,38 @@
__m128 t1 = _mm_sub_ps(r, B);
__m128 hit1 = _mm_and_ps(hit, _mm_cmpgt_ps(t1,
_mm_set1_ps(T_EPSILON)));
hit1 = _mm_and_ps(hit1, _mm_cmplt_ps(t1,
_mm_load_ps(&data->minT[i])));
- if(_mm_movemask_ps(hit1) == 15){
- _mm_store_ps(&data->minT[i], t1);
- _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
- _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
- } else if(_mm_movemask_ps(hit1) != 0){
- _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1,
(char*)&data->minT[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit1, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit1, (char*)&data->hitTex[i]);
- }
+ if(_mm_movemask_ps(hit1) == 15){
+ _mm_store_ps(&data->minT[i], t1);
+#ifdef __x86_64
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi64x((long)getTexCoordMapper()));
+ _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+ } else {
+ _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1,
(char*)&data->minT[i]);
+#ifdef __x86_64
+ __m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
+ __m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit,
(char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit,
(char*)&data->hitMatl[i+2]);
+
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit,
(char*)&data->hitPrim[i+2]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
lohit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()),
hihit, (char*)&data->hitTex[i+2]);
+#else
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+ }
}
for(;i<rays.rayEnd;i++){
Vector O(rays.getOrigin(i)-center);
- [MANTA] r1114 - in trunk: Engine/Shadows Interface Model/Materials Model/Primitives, sparker, 06/09/2006
Archive powered by MHonArc 2.6.16.