Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1114 - in trunk: Engine/Shadows Interface Model/Materials Model/Primitives


Chronological Thread 
  • From: sparker@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1114 - in trunk: Engine/Shadows Interface Model/Materials Model/Primitives
  • Date: Fri, 9 Jun 2006 17:47:08 -0600 (MDT)

Author: sparker
Date: Fri Jun  9 17:47:05 2006
New Revision: 1114

Modified:
   trunk/Engine/Shadows/HardShadows.cc
   trunk/Interface/RayPacket.h
   trunk/Model/Materials/Phong.cc
   trunk/Model/Primitives/Parallelogram.cc
   trunk/Model/Primitives/Sphere.cc
Log:
Fix 64-bit SSE build


Modified: trunk/Engine/Shadows/HardShadows.cc
==============================================================================
--- trunk/Engine/Shadows/HardShadows.cc (original)
+++ trunk/Engine/Shadows/HardShadows.cc Fri Jun  9 17:47:05 2006
@@ -100,7 +100,12 @@
         __m128 dir = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, normalx), 
_mm_mul_ps(dy, normaly)), _mm_mul_ps(dz, normalz));
 
         __m128 mask = _mm_cmple_ps(dir, _mm_setzero_ps());
+#ifdef __x86_64
+       _mm_store_ps((float*)&shadowData->hitMatl[i], _mm_unpacklo_ps(mask, 
mask));
+       _mm_store_ps((float*)&shadowData->hitMatl[i+2], _mm_unpackhi_ps(mask, 
mask));
+#else
         _mm_store_ps((float*)&shadowData->hitMatl[i], mask);
+#endif
         if(_mm_movemask_ps(mask) != 0xf){
           // Mask is inverted to make our life easier.  1 means do not 
compute shadow ray
 

Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Fri Jun  9 17:47:05 2006
@@ -376,7 +376,12 @@
           data->minT[i] = MAXT;
         }
         for(;i<e;i+=4){
+#ifdef __x86_64
           _mm_store_ps((float*)&data->hitMatl[i], _mm_setzero_ps());
+          _mm_store_ps((float*)&data->hitMatl[i+2], _mm_setzero_ps());
+#else
+          _mm_store_ps((float*)&data->hitMatl[i], _mm_setzero_ps());
+#endif
           _mm_store_ps(&data->minT[i], _mm_set1_ps(MAXT));
         }
         for(;i<rayEnd;i++){

Modified: trunk/Model/Materials/Phong.cc
==============================================================================
--- trunk/Model/Materials/Phong.cc      (original)
+++ trunk/Model/Materials/Phong.cc      Fri Jun  9 17:47:05 2006
@@ -184,7 +184,13 @@
       RayPacketData* data = rays.data;
       RayPacketData* shadowData = shadowRays.data;
       for(;i<e;i+=4){
+#ifdef __x86_64
+        __m128 masklo = 
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i]), _mm_setzero_pd());
+        __m128 maskhi = 
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i+2]), 
_mm_setzero_pd());
+       __m128 mask = _mm_shuffle_ps(masklo, maskhi, _MM_SHUFFLE(2, 0, 2, 0));
+#else
         __m128 mask = 
_mm_cmpeq_ps(_mm_load_ps((float*)&shadowData->hitMatl[i]), _mm_setzero_ps());
+#endif
         if(_mm_movemask_ps(mask) == 0)
           continue;
         // Not in shadow, so compute the direct and specular contributions.

Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc     (original)
+++ trunk/Model/Primitives/Parallelogram.cc     Fri Jun  9 17:47:05 2006
@@ -135,14 +135,35 @@
 
         if(_mm_movemask_ps(hit) == 15){
           _mm_store_ps(&data->minT[i], t);
+#ifdef __x86_64
+          _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi64x((long)getMaterial()));
+          _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long)getMaterial()));
+          _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi64x((long)this));
+          _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long)this));
+          _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+          _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
           _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
           _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
           _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
         } else {
           _mm_maskmoveu_si128((__m128i)t, (__m128i)hit, 
(char*)&data->minT[i]);
+#ifdef __x86_64
+         __m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
+         __m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
+         _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit, 
(char*)&data->hitMatl[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit, 
(char*)&data->hitMatl[i+2]);
+
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit, 
(char*)&data->hitPrim[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit, 
(char*)&data->hitPrim[i+2]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
lohit, (char*)&data->hitTex[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
hihit, (char*)&data->hitTex[i+2]);
+#else
           _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
           _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
           _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
         }
       }
       for(;i<rays.rayEnd;i++){
@@ -276,10 +297,38 @@
         if(_mm_movemask_ps(hit) == 0)
           continue;
 
-        _mm_maskmoveu_si128((__m128i)t, (__m128i)hit, (char*)&data->minT[i]);
-        _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
-        _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
-        _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+        if(_mm_movemask_ps(hit) == 15){
+          _mm_store_ps(&data->minT[i], t);
+#ifdef __x86_64
+          _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi64x((long)getMaterial()));
+          _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long)getMaterial()));
+          _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi64x((long)this));
+          _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long)this));
+          _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+          _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+          _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
+          _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
+          _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+        } else {
+          _mm_maskmoveu_si128((__m128i)t, (__m128i)hit, 
(char*)&data->minT[i]);
+#ifdef __x86_64
+         __m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
+         __m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
+         _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit, 
(char*)&data->hitMatl[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit, 
(char*)&data->hitMatl[i+2]);
+
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit, 
(char*)&data->hitPrim[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit, 
(char*)&data->hitPrim[i+2]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
lohit, (char*)&data->hitTex[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
hihit, (char*)&data->hitTex[i+2]);
+#else
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+        }
       }
       for(;i<rays.rayEnd;i++){
         Vector dir = rays.getDirection(i);

Modified: trunk/Model/Primitives/Sphere.cc
==============================================================================
--- trunk/Model/Primitives/Sphere.cc    (original)
+++ trunk/Model/Primitives/Sphere.cc    Fri Jun  9 17:47:05 2006
@@ -120,17 +120,38 @@
           __m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0, 
_mm_set1_ps(T_EPSILON)));
           if(_mm_movemask_ps(hit0) != 0){
             hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0, 
_mm_load_ps(&data->minT[i])));
-            if(_mm_movemask_ps(hit0) == 15){
-              _mm_store_ps(&data->minT[i], t0);
-              _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
-              _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
-              _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
-            } else if(_mm_movemask_ps(hit0) != 0) {
-              _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0, 
(char*)&data->minT[i]);
-              _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit0, (char*)&data->hitMatl[i]);
-              _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0, 
(char*)&data->hitPrim[i]);
-              _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit0, (char*)&data->hitTex[i]);
-            }
+           if(_mm_movemask_ps(hit0) == 15){
+             _mm_store_ps(&data->minT[i], t0);
+#ifdef __x86_64
+             _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi64x((long)getMaterial()));
+             _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long)getMaterial()));
+             _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi64x((long)this));
+             _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long)this));
+             _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+             _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+             _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
+             _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
+             _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+           } else {
+             _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0, 
(char*)&data->minT[i]);
+#ifdef __x86_64
+             __m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
+             __m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), 
lohit, (char*)&data->hitMatl[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), 
hihit, (char*)&data->hitMatl[i+2]);
+         
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit, 
(char*)&data->hitPrim[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit, 
(char*)&data->hitPrim[i+2]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
lohit, (char*)&data->hitTex[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
hihit, (char*)&data->hitTex[i+2]);
+#else
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+           }
             // Mask off rays that successfully hit at t0
             hit = _mm_andnot_ps(hit, hit0);
             if(_mm_movemask_ps(hit) == 0)
@@ -140,17 +161,38 @@
           __m128 t1 = _mm_sub_ps(r, B);
           __m128 hit1 = _mm_and_ps(hit, _mm_cmpgt_ps(t1, 
_mm_set1_ps(T_EPSILON)));
           hit1 = _mm_and_ps(hit1, _mm_cmplt_ps(t1, 
_mm_load_ps(&data->minT[i])));
-          if(_mm_movemask_ps(hit1) == 15){
-            _mm_store_ps(&data->minT[i], t1);
-            _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
-            _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
-            _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
-          } else if(_mm_movemask_ps(hit1) != 0){
-            _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1, 
(char*)&data->minT[i]);
-            _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit1, (char*)&data->hitMatl[i]);
-            _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1, 
(char*)&data->hitPrim[i]);
-            _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit1, (char*)&data->hitTex[i]);
-          }
+         if(_mm_movemask_ps(hit1) == 15){
+           _mm_store_ps(&data->minT[i], t1);
+#ifdef __x86_64
+           _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi64x((long)getMaterial()));
+           _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long)getMaterial()));
+           _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi64x((long)this));
+           _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long)this));
+           _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+           _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+           _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
+           _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
+           _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+         } else {
+           _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1, 
(char*)&data->minT[i]);
+#ifdef __x86_64
+           __m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
+           __m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit, 
(char*)&data->hitMatl[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit, 
(char*)&data->hitMatl[i+2]);
+         
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit, 
(char*)&data->hitPrim[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit, 
(char*)&data->hitPrim[i+2]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
lohit, (char*)&data->hitTex[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
hihit, (char*)&data->hitTex[i+2]);
+#else
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+         }
         }
         for(;i<rays.rayEnd;i++){
           Vector D(rays.getDirection(i));
@@ -273,17 +315,38 @@
           __m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0, 
_mm_set1_ps(T_EPSILON)));
           if(_mm_movemask_ps(hit0) != 0){
             hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0, 
_mm_load_ps(&data->minT[i])));
-            if(_mm_movemask_ps(hit0) == 15){
-              _mm_store_ps(&data->minT[i], t0);
-              _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
-              _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
-              _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
-            } else if(_mm_movemask_ps(hit0) != 0) {
-              _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0, 
(char*)&data->minT[i]);
-              _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit0, (char*)&data->hitMatl[i]);
-              _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0, 
(char*)&data->hitPrim[i]);
-              _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit0, (char*)&data->hitTex[i]);
-            }
+           if(_mm_movemask_ps(hit0) == 15){
+             _mm_store_ps(&data->minT[i], t0);
+#ifdef __x86_64
+             _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi64x((long)getMaterial()));
+             _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long)getMaterial()));
+             _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi64x((long)this));
+             _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long)this));
+             _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+             _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+             _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
+             _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
+             _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+           } else {
+             _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0, 
(char*)&data->minT[i]);
+#ifdef __x86_64
+             __m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
+             __m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), 
lohit, (char*)&data->hitMatl[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), 
hihit, (char*)&data->hitMatl[i+2]);
+         
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit, 
(char*)&data->hitPrim[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit, 
(char*)&data->hitPrim[i+2]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
lohit, (char*)&data->hitTex[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
hihit, (char*)&data->hitTex[i+2]);
+#else
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+           }
             // Mask off rays that successfully hit at t0
             hit = _mm_andnot_ps(hit, hit0);
             if(_mm_movemask_ps(hit) == 0)
@@ -293,17 +356,38 @@
           __m128 t1 = _mm_sub_ps(r, B);
           __m128 hit1 = _mm_and_ps(hit, _mm_cmpgt_ps(t1, 
_mm_set1_ps(T_EPSILON)));
           hit1 = _mm_and_ps(hit1, _mm_cmplt_ps(t1, 
_mm_load_ps(&data->minT[i])));
-          if(_mm_movemask_ps(hit1) == 15){
-            _mm_store_ps(&data->minT[i], t1);
-            _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
-            _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
-            _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
-          } else if(_mm_movemask_ps(hit1) != 0){
-            _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1, 
(char*)&data->minT[i]);
-            _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit1, (char*)&data->hitMatl[i]);
-            _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1, 
(char*)&data->hitPrim[i]);
-            _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit1, (char*)&data->hitTex[i]);
-          }
+         if(_mm_movemask_ps(hit1) == 15){
+           _mm_store_ps(&data->minT[i], t1);
+#ifdef __x86_64
+           _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi64x((long)getMaterial()));
+           _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long)getMaterial()));
+           _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi64x((long)this));
+           _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long)this));
+           _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+           _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long)getTexCoordMapper()));
+#else
+           _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
+           _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
+           _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+         } else {
+           _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1, 
(char*)&data->minT[i]);
+#ifdef __x86_64
+           __m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
+           __m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), lohit, 
(char*)&data->hitMatl[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getMaterial()), hihit, 
(char*)&data->hitMatl[i+2]);
+         
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), lohit, 
(char*)&data->hitPrim[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)this), hihit, 
(char*)&data->hitPrim[i+2]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
lohit, (char*)&data->hitTex[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi64x((long)getTexCoordMapper()), 
hihit, (char*)&data->hitTex[i+2]);
+#else
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+#endif
+         }
         }
         for(;i<rays.rayEnd;i++){
           Vector O(rays.getOrigin(i)-center);




  • [MANTA] r1114 - in trunk: Engine/Shadows Interface Model/Materials Model/Primitives, sparker, 06/09/2006

Archive powered by MHonArc 2.6.16.

Top of page