Text archives Help
- From: abe@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1119 - in trunk: . Core/Math Model/Materials Model/Primitives
- Date: Thu, 15 Jun 2006 19:22:44 -0600 (MDT)
Author: abe
Date: Thu Jun 15 19:22:42 2006
New Revision: 1119
Modified:
trunk/CMakeLists.txt
trunk/Core/Math/SSEDefs.h
trunk/Model/Materials/Phong.cc
trunk/Model/Primitives/Parallelogram.cc
trunk/Model/Primitives/Sphere.cc
Log:
M Core/Math/SSEDefs.h
Moved epi64x function to this from from Sphere and Parallelogram.
M Model/Materials/Phong.cc
M Model/Primitives/Parallelogram.cc
M Model/Primitives/Sphere.cc
Fixed sse_t casting problems using _mm_castps_si128(...) and similar
intrinsics. Appear to be available on both gcc and intel.
M CMakeLists.txt
Added check for gcc only intrinsics functions.
Note: CheckeredTexture doesn't compile due to casting problems, but Sphere and
Parallelogram do on icc 9.0 and 9.1.
Modified: trunk/CMakeLists.txt
==============================================================================
--- trunk/CMakeLists.txt (original)
+++ trunk/CMakeLists.txt Thu Jun 15 19:22:42 2006
@@ -66,6 +66,8 @@
ENDFOREACH(arg ${ARGN})
ENDMACRO(FORCE_ADD_CXX_FLAGS)
+
+
# This MACRO is designed to set variables to default values only on
# the first configure. Subsequent configures will produce no ops.
MACRO(FIRST_TIME_SET VARIABLE VALUE TYPE COMMENT)
@@ -168,15 +170,28 @@
##################################################################
# Check to see if the system supports SSE2.
-FILE(WRITE ${CMAKE_BINARY_DIR}/test/sse_test.c "#include
<emmintrin.h>\nstatic __m128 foo;\n\n")
-
+FILE(WRITE ${CMAKE_BINARY_DIR}/test/test.c "#include <emmintrin.h>\nstatic
__m128 foo;\n\n")
EXEC_PROGRAM(${CMAKE_CXX_COMPILER}
- ARGS -c ${CMAKE_CXX_FLAGS} -o /dev/null ${CMAKE_BINARY_DIR}/test/sse_test.c
+ ARGS -c ${CMAKE_CXX_FLAGS} -o /dev/null ${CMAKE_BINARY_DIR}/test/test.c
OUTPUT_VARIABLE OUTPUT
RETURN_VALUE NOT_MANTA_SSE )
IF(NOT NOT_MANTA_SSE)
SET(MANTA_SSE TRUE CACHE BOOL "Compile SSE code.")
+
+ # Check to see if the system is using gcc sse intrinsics.
+ FILE(WRITE ${CMAKE_BINARY_DIR}/test/test.c "#include <emmintrin.h>\nstatic
__m128i foo = _mm_set1_epi64x( (long long)1 );\n\n")
+ EXEC_PROGRAM(${CMAKE_CXX_COMPILER}
+ ARGS -c ${CMAKE_CXX_FLAGS} -o /dev/null ${CMAKE_BINARY_DIR}/test/test.c
+ OUTPUT_VARIABLE OUTPUT
+ RETURN_VALUE NOT_MANTA_SSE_GCC )
+
+ IF(NOT NOT_MANTA_SSE_GCC)
+ SET(MANTA_SSE_GCC TRUE CACHE BOOL "Found *epi64x intrinsics")
+ ELSE(NOT NOT_MANTA_SSE_GCC)
+ SET(MANTA_SSE_GCC FALSE CACHE BOOL "Couldn't find *epi64x intrinsics"
FORCE)
+ ENDIF(NOT NOT_MANTA_SSE_GCC)
+
ELSE(NOT NOT_MANTA_SSE)
IF(MANTA_SSE)
# We tried to turn it on and it isn't available
@@ -186,6 +201,7 @@
MESSAGE(${OUTPUT})
ENDIF(MANTA_SSE)
SET(MANTA_SSE FALSE CACHE BOOL "Couldn't compile SSE code." FORCE)
+ SET(MANTA_SSE_GCC FALSE CACHE BOOL "Couldn't find *epi64x intrinsics."
FORCE)
ENDIF(NOT NOT_MANTA_SSE)
##################################################################
Modified: trunk/Core/Math/SSEDefs.h
==============================================================================
--- trunk/Core/Math/SSEDefs.h (original)
+++ trunk/Core/Math/SSEDefs.h Thu Jun 15 19:22:42 2006
@@ -49,6 +49,23 @@
namespace Manta
{
+
+#if defined(__x86_64) && defined(__INTEL_COMPILER)
+
+ static inline
+ __m128i _mm_set1_epi64x(long long val)
+ {
+ const int low = (0xFFFFFFFF00000000L & val) >> 32 ;
+ const int high = ( 0xFFFFFFFFL & val);
+ return _mm_set_epi32(low, high, low, high);
+ }
+
+#endif
+
+
+
+
+
static const MANTA_ALIGN(16) sse_t _mm_eps = _mm_set_ps1(1e-5);
static const MANTA_ALIGN(16) sse_t _mm_minus_eps = _mm_set_ps1(-1e-5);
static const MANTA_ALIGN(16) sse_t _mm_epsilon = _mm_set_ps1(1e-5);
Modified: trunk/Model/Materials/Phong.cc
==============================================================================
--- trunk/Model/Materials/Phong.cc (original)
+++ trunk/Model/Materials/Phong.cc Thu Jun 15 19:22:42 2006
@@ -184,13 +184,15 @@
RayPacketData* data = rays.data;
RayPacketData* shadowData = shadowRays.data;
for(;i<e;i+=4){
+
#ifdef __x86_64
- __m128 masklo =
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i]), _mm_setzero_pd());
- __m128 maskhi =
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i+2]),
_mm_setzero_pd());
+ __m128 masklo = _mm_castpd_ps(
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i]), _mm_setzero_pd())
);
+ __m128 maskhi = _mm_castpd_ps(
_mm_cmpeq_pd(_mm_load_pd((double*)&shadowData->hitMatl[i+2]),
_mm_setzero_pd()) );
__m128 mask = _mm_shuffle_ps(masklo, maskhi, _MM_SHUFFLE(2, 0, 2, 0));
#else
__m128 mask =
_mm_cmpeq_ps(_mm_load_ps((float*)&shadowData->hitMatl[i]), _mm_setzero_ps());
#endif
+
if(_mm_movemask_ps(mask) == 0)
continue;
// Not in shadow, so compute the direct and specular contributions.
@@ -210,9 +212,9 @@
_mm_store_ps(&ambientAndDiffuseLight[1][i],
_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[1][i]), _mm_mul_ps(lightg,
cos_theta)));
_mm_store_ps(&ambientAndDiffuseLight[2][i],
_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[2][i]), _mm_mul_ps(lightb,
cos_theta)));
} else {
-
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[0][i]),
_mm_mul_ps(lightr, cos_theta)), (__m128i)mask,
(char*)&ambientAndDiffuseLight[0][i]);
-
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[1][i]),
_mm_mul_ps(lightg, cos_theta)), (__m128i)mask,
(char*)&ambientAndDiffuseLight[1][i]);
-
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[2][i]),
_mm_mul_ps(lightb, cos_theta)), (__m128i)mask,
(char*)&ambientAndDiffuseLight[2][i]);
+ _mm_maskmoveu_si128((__m128i)
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[0][i]),
_mm_mul_ps(lightr, cos_theta))), (__m128i) _mm_castps_si128(mask),
(char*)&ambientAndDiffuseLight[0][i]);
+ _mm_maskmoveu_si128((__m128i)
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[1][i]),
_mm_mul_ps(lightg, cos_theta))), (__m128i) _mm_castps_si128(mask),
(char*)&ambientAndDiffuseLight[1][i]);
+ _mm_maskmoveu_si128((__m128i)
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&ambientAndDiffuseLight[2][i]),
_mm_mul_ps(lightb, cos_theta))), (__m128i) _mm_castps_si128(mask),
(char*)&ambientAndDiffuseLight[2][i]);
}
__m128 Hx = _mm_sub_ps(sdx, _mm_load_ps(&data->direction[0][i]));
@@ -242,9 +244,9 @@
_mm_store_ps(&specularLight[1][i],
_mm_add_ps(_mm_load_ps(&specularLight[1][i]), _mm_mul_ps(lightg, scale)));
_mm_store_ps(&specularLight[2][i],
_mm_add_ps(_mm_load_ps(&specularLight[2][i]), _mm_mul_ps(lightb, scale)));
} else {
-
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&specularLight[0][i]),
_mm_mul_ps(lightr, scale)), (__m128i)mask, (char*)&specularLight[0][i]);
-
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&specularLight[1][i]),
_mm_mul_ps(lightg, scale)), (__m128i)mask, (char*)&specularLight[1][i]);
-
_mm_maskmoveu_si128((__m128i)_mm_add_ps(_mm_load_ps(&specularLight[2][i]),
_mm_mul_ps(lightb, scale)), (__m128i)mask, (char*)&specularLight[2][i]);
+ _mm_maskmoveu_si128((__m128i)
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&specularLight[0][i]),
_mm_mul_ps(lightr, scale))), (__m128i) _mm_castps_si128(mask),
(char*)&specularLight[0][i]);
+ _mm_maskmoveu_si128((__m128i)
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&specularLight[1][i]),
_mm_mul_ps(lightg, scale))), (__m128i) _mm_castps_si128(mask),
(char*)&specularLight[1][i]);
+ _mm_maskmoveu_si128((__m128i)
_mm_castps_si128(_mm_add_ps(_mm_load_ps(&specularLight[2][i]),
_mm_mul_ps(lightb, scale))), (__m128i) _mm_castps_si128(mask),
(char*)&specularLight[2][i]);
}
}
for(;i<rays.rayEnd;i++){
Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc (original)
+++ trunk/Model/Primitives/Parallelogram.cc Thu Jun 15 19:22:42 2006
@@ -4,22 +4,11 @@
#include <Core/Geometry/BBox.h>
#include <Core/Math/MiscMath.h>
#include <MantaSSE.h>
+#include <Core/Math/SSEDefs.h>
using namespace Manta;
using SCIRun::Abs;
-#ifdef __x86_64
-# ifndef __GNUC__
-static inline
-__m128i _mm_set1_epi64x(long long val)
-{
- int low = (0xFFFFFFFF00000000L & val) >> 32 ;
- int high = ( 0xFFFFFFFFL & val);
- return _mm_set_epi32(low, high, low, high);
-}
-# endif
-#endif
-
Parallelogram::Parallelogram(Material* material, const Vector& anchor,
const Vector& in_v1, const Vector& in_v2)
: PrimitiveCommon(material, this), anchor(anchor), v1(in_v1), v2(in_v2)
@@ -160,10 +149,10 @@
_mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
#endif
} else {
- _mm_maskmoveu_si128((__m128i)t, (__m128i)hit,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t),
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
#ifdef __x86_64
- __m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
- __m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
+ __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit,
hit));
+ __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit,
hit));
_mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
lohit, (char*)&data->hitMatl[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
@@ -172,9 +161,9 @@
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
#endif
}
}
@@ -324,7 +313,7 @@
_mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
#endif
} else {
- _mm_maskmoveu_si128((__m128i)t, (__m128i)hit,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t),
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
#ifdef __x86_64
__m128i lohit = (__m128i)_mm_unpacklo_ps(hit, hit);
__m128i hihit = (__m128i)_mm_unpackhi_ps(hit, hit);
@@ -336,9 +325,9 @@
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
#endif
}
}
Modified: trunk/Model/Primitives/Sphere.cc
==============================================================================
--- trunk/Model/Primitives/Sphere.cc (original)
+++ trunk/Model/Primitives/Sphere.cc Thu Jun 15 19:22:42 2006
@@ -6,23 +6,12 @@
#include <Core/Math/Trig.h>
#include <Core/Math/Expon.h>
#include <MantaSSE.h>
+#include <Core/Math/SSEDefs.h>
using namespace Manta;
using namespace SCIRun;
using namespace std;
-#ifdef __x86_64
-# ifndef __GNUC__
-static inline
-__m128i _mm_set1_epi64x(long long val)
-{
- int low = (0xFFFFFFFF00000000L & val) >> 32 ;
- int high = ( 0xFFFFFFFFL & val);
- return _mm_set_epi32(low, high, low, high);
-}
-# endif
-#endif
-
Sphere::Sphere(Material* material, const Vector& center, Real radius)
: PrimitiveCommon(material, this), center(center), radius(radius)
{
@@ -128,7 +117,7 @@
__m128 r = _mm_sqrt_ps(disc);
// -(r+B) The xor negates the value
- __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B),
(__m128)_mm_set1_epi32(0x80000000));
+ __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B),
(__m128)_mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
__m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0,
_mm_set1_ps(T_EPSILON)));
if(_mm_movemask_ps(hit0) != 0){
hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0,
_mm_load_ps(&data->minT[i])));
@@ -147,10 +136,10 @@
_mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
#endif
} else {
- _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t0),
(__m128i)_mm_castps_si128(hit0), (char*)&data->minT[i]);
#ifdef __x86_64
- __m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
- __m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
+ __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit0,
hit0));
+ __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit0,
hit0));
_mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
lohit, (char*)&data->hitMatl[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
@@ -159,9 +148,9 @@
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit0, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit0, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitTex[i]);
#endif
}
// Mask off rays that successfully hit at t0
@@ -188,10 +177,10 @@
_mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
#endif
} else {
- _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t1),
(__m128i)_mm_castps_si128(hit1), (char*)&data->minT[i]);
#ifdef __x86_64
- __m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
- __m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
+ __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit1,
hit1));
+ __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit1,
hit1));
_mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
lohit, (char*)&data->hitMatl[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
@@ -200,9 +189,9 @@
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit1, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit1, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitTex[i]);
#endif
}
}
@@ -323,7 +312,7 @@
__m128 r = _mm_sqrt_ps(disc);
// -(r+B) The xor negates the value
- __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B),
(__m128)_mm_set1_epi32(0x80000000));
+ __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B),
(__m128)_mm_castsi128_ps(_mm_set1_epi32(0x80000000)));
__m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0,
_mm_set1_ps(T_EPSILON)));
if(_mm_movemask_ps(hit0) != 0){
hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0,
_mm_load_ps(&data->minT[i])));
@@ -342,7 +331,7 @@
_mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
#endif
} else {
- _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t0),
(__m128i)_mm_castps_si128(hit0), (char*)&data->minT[i]);
#ifdef __x86_64
__m128i lohit = (__m128i)_mm_unpacklo_ps(hit0, hit0);
__m128i hihit = (__m128i)_mm_unpackhi_ps(hit0, hit0);
@@ -354,9 +343,9 @@
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit0, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit0, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit0), (char*)&data->hitTex[i]);
#endif
}
// Mask off rays that successfully hit at t0
@@ -383,7 +372,7 @@
_mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
#endif
} else {
- _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t1),
(__m128i)_mm_castps_si128(hit1), (char*)&data->minT[i]);
#ifdef __x86_64
__m128i lohit = (__m128i)_mm_unpacklo_ps(hit1, hit1);
__m128i hihit = (__m128i)_mm_unpackhi_ps(hit1, hit1);
@@ -395,9 +384,9 @@
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
_mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit1, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit1, (char*)&data->hitTex[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit1), (char*)&data->hitTex[i]);
#endif
}
}
- [MANTA] r1119 - in trunk: . Core/Math Model/Materials Model/Primitives, abe, 06/15/2006
Archive powered by MHonArc 2.6.16.