Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1119 - in trunk: . Core/Math Model/Materials Model/Primitives


Chronological Thread 
  • From: abe@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1119 - in trunk: . Core/Math Model/Materials Model/Primitives
  • Date: Thu, 15 Jun 2006 19:22:44 -0600 (MDT)

Author: abe
Date: Thu Jun 15 19:22:42 2006
New Revision: 1119

Modified:
   trunk/CMakeLists.txt
   trunk/Core/Math/SSEDefs.h
   trunk/Model/Materials/Phong.cc
   trunk/Model/Primitives/Parallelogram.cc
   trunk/Model/Primitives/Sphere.cc
Log:


M    Core/Math/SSEDefs.h
Moved epi64x function to this from from Sphere and Parallelogram.

M    Model/Materials/Phong.cc
M    Model/Primitives/Parallelogram.cc
M    Model/Primitives/Sphere.cc
Fixed sse_t casting problems using _mm_castps_si128(...) and similar
intrinsics. Appear to be available on both gcc and intel.

M    CMakeLists.txt
Added check for gcc only intrinsics functions.

Note: CheckeredTexture doesn't compile due to casting problems, but Sphere and
Parallelogram do on icc 9.0 and 9.1.

Modified: trunk/CMakeLists.txt
==============================================================================
--- trunk/CMakeLists.txt        (original)
+++ trunk/CMakeLists.txt        Thu Jun 15 19:22:42 2006
@@ -66,6 +66,8 @@
   ENDFOREACH(arg ${ARGN})
 ENDMACRO(FORCE_ADD_CXX_FLAGS)
 
+
+
 # This MACRO is designed to set variables to default values only on
 # the first configure.  Subsequent configures will produce no ops.
 MACRO(FIRST_TIME_SET VARIABLE VALUE TYPE COMMENT)
@@ -168,15 +170,28 @@
 
 ##################################################################
 # Check to see if the system supports SSE2.
-FILE(WRITE ${CMAKE_BINARY_DIR}/test/sse_test.c "#include 
<emmintrin.h>\nstatic __m128 foo;\n\n")
-
+FILE(WRITE ${CMAKE_BINARY_DIR}/test/test.c "#include <emmintrin.h>\nstatic 
__m128 foo;\n\n")
 EXEC_PROGRAM(${CMAKE_CXX_COMPILER}
-  ARGS -c ${CMAKE_CXX_FLAGS} -o /dev/null ${CMAKE_BINARY_DIR}/test/sse_test.c
+  ARGS -c ${CMAKE_CXX_FLAGS} -o /dev/null ${CMAKE_BINARY_DIR}/test/test.c
   OUTPUT_VARIABLE OUTPUT
   RETURN_VALUE NOT_MANTA_SSE )
 
 IF(NOT NOT_MANTA_SSE)
   SET(MANTA_SSE TRUE CACHE BOOL "Compile SSE code.")
+
+  # Check to see if the system is using gcc sse intrinsics.
+  FILE(WRITE ${CMAKE_BINARY_DIR}/test/test.c "#include <emmintrin.h>\nstatic 
__m128i foo = _mm_set1_epi64x( (long long)1 );\n\n")
+  EXEC_PROGRAM(${CMAKE_CXX_COMPILER}
+    ARGS -c ${CMAKE_CXX_FLAGS} -o /dev/null ${CMAKE_BINARY_DIR}/test/test.c
+    OUTPUT_VARIABLE OUTPUT
+    RETURN_VALUE NOT_MANTA_SSE_GCC )  
+  
+  IF(NOT NOT_MANTA_SSE_GCC)
+    SET(MANTA_SSE_GCC TRUE CACHE BOOL "Found *epi64x intrinsics")
+  ELSE(NOT NOT_MANTA_SSE_GCC)
+    SET(MANTA_SSE_GCC FALSE CACHE BOOL "Couldn't find *epi64x intrinsics" 
FORCE)
+  ENDIF(NOT NOT_MANTA_SSE_GCC)
+
 ELSE(NOT NOT_MANTA_SSE)
   IF(MANTA_SSE)
     # We tried to turn it on and it isn't available
@@ -186,6 +201,7 @@
     MESSAGE(${OUTPUT})
   ENDIF(MANTA_SSE)
   SET(MANTA_SSE FALSE CACHE BOOL "Couldn't compile SSE code." FORCE)
+  SET(MANTA_SSE_GCC FALSE CACHE BOOL "Couldn't find *epi64x intrinsics." 
FORCE)
 ENDIF(NOT NOT_MANTA_SSE)
 
 ##################################################################

Modified: trunk/Core/Math/SSEDefs.h
==============================================================================
--- trunk/Core/Math/SSEDefs.h   (original)
+++ trunk/Core/Math/SSEDefs.h   Thu Jun 15 19:22:42 2006
@@ -49,6 +49,23 @@
 
 namespace Manta
 {
+
+#if defined(__x86_64) && defined(__INTEL_COMPILER)
+
+    static inline
+       __m128i _mm_set1_epi64x(long long val)
+       {
+           const int low  = (0xFFFFFFFF00000000L & val) >> 32 ;
+           const int high = (        0xFFFFFFFFL & val);
+           return _mm_set_epi32(low, high, low, high);
+       }
+    
+#endif
+  
+
+
+
+
     static const MANTA_ALIGN(16) sse_t _mm_eps = _mm_set_ps1(1e-5);
     static const MANTA_ALIGN(16) sse_t _mm_minus_eps = _mm_set_ps1(-1e-5);
     static const MANTA_ALIGN(16) sse_t _mm_epsilon = _mm_set_ps1(1e-5);

Modified: trunk/Model/Materials/Phong.cc
==============================================================================
--- trunk/Model/Materials/Phong.cc      (original)
+++ trunk/Model/Materials/Phong.cc      Thu Jun 15 19:22:42 2006
@@ -184,13 +184,15 @@
       RayPacketData* data = rays.data;
       RayPacketData* shadowData = shadowRays.data;
       for(;i<e;i+=4){
+
 #ifdef __x86_64
-        __m128 masklo = 
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i]), _mm_setzero_pd());
-        __m128 maskhi = 
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i+2]), 
_mm_setzero_pd());
+       __m128 masklo = _mm_castpd_ps( 
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i]), _mm_setzero_pd()) 
);
+        __m128 maskhi = _mm_castpd_ps( 
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i+2]), 
_mm_setzero_pd()) );
        __m128 mask = _mm_shuffle_ps(masklo, maskhi, _MM_SHUFFLE(2, 0, 2, 0));
 #else
         __m128 mask = 
_mm_cmpeq_ps(_mm_load_ps((float*)&shadowData->hitMatl[i]), _mm_setzero_ps());
 #endif
+
         if(_mm_movemask_ps(mask) == 0)
           continue;
         // Not in shadow, so compute the direct and specular contributions.
@@ -210,9 +212,9 @@
           _mm_store_ps(&ambientAndDiffuseLight[1][i], 
_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[1][i]), _mm_mul_ps(lightg, 
cos_theta)));
           _mm_store_ps(&ambientAndDiffuseLight[2][i], 
_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[2][i]), _mm_mul_ps(lightb, 
cos_theta)));
         } else {
-          
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[0][i]),
 _mm_mul_ps(lightr, cos_theta)), (__m128i)mask, 
(char*)&ambientAndDiffuseLight[0][i]);
-          
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[1][i]),
 _mm_mul_ps(lightg, cos_theta)), (__m128i)mask, 
(char*)&ambientAndDiffuseLight[1][i]);
-          
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[2][i]),
 _mm_mul_ps(lightb, cos_theta)), (__m128i)mask, 
(char*)&ambientAndDiffuseLight[2][i]);
+          _mm_maskmoveu_si128((__m128i) 
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[0][i]), 
_mm_mul_ps(lightr, cos_theta))), (__m128i) _mm_castps_si128(mask), 
(char*)&ambientAndDiffuseLight[0][i]);
+          _mm_maskmoveu_si128((__m128i) 
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[1][i]), 
_mm_mul_ps(lightg, cos_theta))), (__m128i) _mm_castps_si128(mask), 
(char*)&ambientAndDiffuseLight[1][i]);
+          _mm_maskmoveu_si128((__m128i) 
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[2][i]), 
_mm_mul_ps(lightb, cos_theta))), (__m128i) _mm_castps_si128(mask), 
(char*)&ambientAndDiffuseLight[2][i]);
         }
         
         __m128 Hx = _mm_sub_ps(sdx, _mm_load_ps(&data->direction[0][i]));
@@ -242,9 +244,9 @@
           _mm_store_ps(&specularLight[1][i], 
_mm_add_ps(_mm_load_ps(&specularLight[1][i]), _mm_mul_ps(lightg, scale)));
           _mm_store_ps(&specularLight[2][i], 
_mm_add_ps(_mm_load_ps(&specularLight[2][i]), _mm_mul_ps(lightb, scale)));
         } else {
-          
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&specularLight[0][i]), 
_mm_mul_ps(lightr, scale)), (__m128i)mask, (char*)&specularLight[0][i]);
-          
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&specularLight[1][i]), 
_mm_mul_ps(lightg, scale)), (__m128i)mask, (char*)&specularLight[1][i]);
-          
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&specularLight[2][i]), 
_mm_mul_ps(lightb, scale)), (__m128i)mask, (char*)&specularLight[2][i]);
+          _mm_maskmoveu_si128((__m128i) 
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&specularLight[0][i]), 
_mm_mul_ps(lightr, scale))), (__m128i) _mm_castps_si128(mask), 
(char*)&specularLight[0][i]);
+          _mm_maskmoveu_si128((__m128i) 
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&specularLight[1][i]), 
_mm_mul_ps(lightg, scale))), (__m128i) _mm_castps_si128(mask), 
(char*)&specularLight[1][i]);
+          _mm_maskmoveu_si128((__m128i) 
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&specularLight[2][i]), 
_mm_mul_ps(lightb, scale))), (__m128i) _mm_castps_si128(mask), 
(char*)&specularLight[2][i]);
         }
       }
       for(;i<rays.rayEnd;i++){

Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc     (original)
+++ trunk/Model/Primitives/Parallelogram.cc     Thu Jun 15 19:22:42 2006
@@ -4,22 +4,11 @@
 #include <Core/Geometry/BBox.h>
 #include <Core/Math/MiscMath.h>
 #include <MantaSSE.h>
+#include <Core/Math/SSEDefs.h>
 
 using namespace Manta;
 using SCIRun::Abs;
 
-#ifdef __x86_64
-#  ifndef __GNUC__
-static inline
-__m128i _mm_set1_epi64x(long long val)
-{
-  int low  = (0xFFFFFFFF00000000L & val) >> 32 ;
-  int high = (        0xFFFFFFFFL & val);
-  return _mm_set_epi32(low, high, low, high);
-}
-#  endif
-#endif
-  
 Parallelogram::Parallelogram(Material* material, const Vector& anchor,
                              const Vector& in_v1, const Vector& in_v2)
   : PrimitiveCommon(material, this), anchor(anchor), v1(in_v1), v2(in_v2)
@@ -160,10 +149,10 @@
           _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
 #endif
         } else {
-          _mm_maskmoveu_si128((__m128i)t, (__m128i)hit, 
(char*)&data->minT[i]);
+          _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t), 
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
 #ifdef __x86_64
-         __m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
-         __m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
+         __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit, 
hit));
+         __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit, 
hit));
          _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
lohit, (char*)&data->hitMatl[i]);
           _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
hihit, (char*)&data->hitMatl[i+2]);
 
@@ -172,9 +161,9 @@
           _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
           _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
 #else
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
 #endif
         }
       }
@@ -324,7 +313,7 @@
           _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
 #endif
         } else {
-          _mm_maskmoveu_si128((__m128i)t, (__m128i)hit, 
(char*)&data->minT[i]);
+          _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t), 
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
 #ifdef __x86_64
          __m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
          __m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
@@ -336,9 +325,9 @@
           _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
           _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
 #else
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit, (char*)&data->hitMatl[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit, 
(char*)&data->hitPrim[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit, (char*)&data->hitTex[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
+          _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
 #endif
         }
       }

Modified: trunk/Model/Primitives/Sphere.cc
==============================================================================
--- trunk/Model/Primitives/Sphere.cc    (original)
+++ trunk/Model/Primitives/Sphere.cc    Thu Jun 15 19:22:42 2006
@@ -6,23 +6,12 @@
 #include <Core/Math/Trig.h>
 #include <Core/Math/Expon.h>
 #include <MantaSSE.h>
+#include <Core/Math/SSEDefs.h>
 
 using namespace Manta;
 using namespace SCIRun;
 using namespace std;
 
-#ifdef __x86_64
-#  ifndef __GNUC__
-static inline
-__m128i _mm_set1_epi64x(long long val)
-{
-  int low  = (0xFFFFFFFF00000000L & val) >> 32 ;
-  int high = (        0xFFFFFFFFL & val);
-  return _mm_set_epi32(low, high, low, high);
-}
-#  endif
-#endif
-  
 Sphere::Sphere(Material* material, const Vector& center, Real radius)
 : PrimitiveCommon(material, this), center(center), radius(radius)
 {
@@ -128,7 +117,7 @@
 
           __m128 r = _mm_sqrt_ps(disc);
           // -(r+B)   The xor negates the value
-          __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B), 
(__m128)_mm_set1_epi32(0x80000000));
+          __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B), 
(__m128)_mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
           __m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0, 
_mm_set1_ps(T_EPSILON)));
           if(_mm_movemask_ps(hit0) != 0){
             hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0, 
_mm_load_ps(&data->minT[i])));
@@ -147,10 +136,10 @@
              _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
 #endif
            } else {
-             _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0, 
(char*)&data->minT[i]);
+             _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t0), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->minT[i]);
 #ifdef __x86_64
-             __m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
-             __m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
+             __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit0, 
hit0));
+             __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit0, 
hit0));
              _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
lohit, (char*)&data->hitMatl[i]);
              _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
hihit, (char*)&data->hitMatl[i+2]);
          
@@ -159,9 +148,9 @@
              _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
              _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
 #else
-             _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit0, (char*)&data->hitMatl[i]);
-             _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0, 
(char*)&data->hitPrim[i]);
-             _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit0, (char*)&data->hitTex[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitMatl[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitPrim[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitTex[i]);
 #endif
            }
             // Mask off rays that successfully hit at t0
@@ -188,10 +177,10 @@
            _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
 #endif
          } else {
-           _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1, 
(char*)&data->minT[i]);
+           _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t1), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->minT[i]);
 #ifdef __x86_64
-           __m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
-           __m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
+           __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit1, 
hit1));
+           __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit1, 
hit1));
            _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
lohit, (char*)&data->hitMatl[i]);
            _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
hihit, (char*)&data->hitMatl[i+2]);
          
@@ -200,9 +189,9 @@
            _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
            _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
 #else
-           _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit1, (char*)&data->hitMatl[i]);
-           _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1, 
(char*)&data->hitPrim[i]);
-           _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit1, (char*)&data->hitTex[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitMatl[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitPrim[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitTex[i]);
 #endif
          }
         }
@@ -323,7 +312,7 @@
 
           __m128 r = _mm_sqrt_ps(disc);
           // -(r+B)   The xor negates the value
-          __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B), 
(__m128)_mm_set1_epi32(0x80000000));
+          __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B), 
(__m128)_mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
           __m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0, 
_mm_set1_ps(T_EPSILON)));
           if(_mm_movemask_ps(hit0) != 0){
             hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0, 
_mm_load_ps(&data->minT[i])));
@@ -342,7 +331,7 @@
              _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
 #endif
            } else {
-             _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0, 
(char*)&data->minT[i]);
+             _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t0), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->minT[i]);
 #ifdef __x86_64
              __m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
              __m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
@@ -354,9 +343,9 @@
              _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
              _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
 #else
-             _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit0, (char*)&data->hitMatl[i]);
-             _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0, 
(char*)&data->hitPrim[i]);
-             _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit0, (char*)&data->hitTex[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitMatl[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitPrim[i]);
+             _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitTex[i]);
 #endif
            }
             // Mask off rays that successfully hit at t0
@@ -383,7 +372,7 @@
            _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
 #endif
          } else {
-           _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1, 
(char*)&data->minT[i]);
+           _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t1), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->minT[i]);
 #ifdef __x86_64
            __m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
            __m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
@@ -395,9 +384,9 @@
            _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
            _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
 #else
-           _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit1, (char*)&data->hitMatl[i]);
-           _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1, 
(char*)&data->hitPrim[i]);
-           _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit1, (char*)&data->hitTex[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitMatl[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitPrim[i]);
+           _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitTex[i]);
 #endif
          }
         }




  • [MANTA] r1119 - in trunk: . Core/Math Model/Materials Model/Primitives, abe, 06/15/2006

Archive powered by MHonArc 2.6.16.

Top of page