Text archives Help
- From: boulos@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1234 - trunk/Model/Primitives
- Date: Sun, 12 Nov 2006 03:26:45 -0700 (MST)
Author: boulos
Date: Sun Nov 12 03:26:43 2006
New Revision: 1234
Modified:
trunk/Model/Primitives/WaldTriangle.cc
Log:
Removing old code and making WaldTriangle use
new RayPacket.hit functions in SSE. Might fix
bugs on x86_64...
Modified: trunk/Model/Primitives/WaldTriangle.cc
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.cc (original)
+++ trunk/Model/Primitives/WaldTriangle.cc Sun Nov 12 03:26:43 2006
@@ -8,28 +8,6 @@
using namespace Manta;
using namespace std;
-
-#ifdef __POWERPC__
-#define USE_ALTIVEC 1
-#define USE_SSE 0
-#else
-#ifdef MANTA_SSE
-#define USE_ALTIVEC 0
-#define USE_SSE 0
-#define USE_SIMD 0
-#else
-#define USE_ALTIVEC 0
-#define USE_SSE 0
-#define USE_SIMD 0
-#endif
-#endif
-
-#ifndef USE_SIMD
-#define USE_SIMD 0 // off by default
-#endif
-
-#define USE_CSIMD (USE_SIMD && !USE_ALTIVEC && !USE_SSE)
-
WaldTriangle::WaldTriangle(Material* mat,
const Vector& _p1, const Vector& _p2, const
Vector& _p3) : PrimitiveCommon(mat)
{
@@ -171,51 +149,7 @@
mask_test = _mm_and_ps(mask_test, _mm_and_ps( _mm_cmpnlt_ps(mue,
_mm_setzero_ps()),
_mm_cmpnlt_ps(_mm_set1_ps(1.f), _mm_add_ps(mue, lambda))));
- int hit_result = _mm_movemask_ps(mask_test);
- switch (hit_result) {
- case 0x0:
- // all miss
- break;
- case 0xf:
- // all hit:
- _mm_store_ps(&data->minT[ray], f);
-#ifdef __x86_64
- _mm_store_si128((__m128i*)&data->hitMatl[ray],
_mm_set1_epi64x((long long)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitMatl[ray+2],
_mm_set1_epi64x((long long)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[ray],
_mm_set1_epi64x((long long)this));
- _mm_store_si128((__m128i*)&data->hitPrim[ray+2],
_mm_set1_epi64x((long long)this));
- _mm_store_si128((__m128i*)&data->hitTex[ray],
_mm_set1_epi64x((long long)getTexCoordMapper()));
- _mm_store_si128((__m128i*)&data->hitTex[ray+2],
_mm_set1_epi64x((long long)getTexCoordMapper()));
-#else
- _mm_store_si128((__m128i*)&data->hitMatl[ray],
_mm_set1_epi32((int)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[ray],
_mm_set1_epi32((int)this));
- _mm_store_si128((__m128i*)&data->hitTex[ray],
_mm_set1_epi32((int)getTexCoordMapper()));
-#endif
- break;
- default:
- // mixed, TODO(boulos): are these casts really a good idea?
(shouldn't we do *((__m128i*)&f) instead?)
- _mm_maskmoveu_si128((__m128i)_mm_castps_si128(f),
(__m128i)_mm_castps_si128(mask_test),
- (char*)&data->minT[ray]);
-#ifdef __x86_64
- __m128i lohit =
(__m128i)_mm_castps_si128(_mm_unpacklo_ps(mask_test, mask_test));
- __m128i hihit =
(__m128i)_mm_castps_si128(_mm_unpackhi_ps(mask_test, mask_test));
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
lohit, (char*)&data->hitMatl[ray]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
hihit, (char*)&data->hitMatl[ray+2]);
-
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), lohit,
(char*)&data->hitPrim[ray]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), hihit,
(char*)&data->hitPrim[ray+2]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[ray]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[ray+2]);
-#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(mask_test),
- (char*)&data->hitMatl[ray]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(mask_test),
- (char*)&data->hitPrim[ray]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(mask_test),
- (char*)&data->hitTex[ray]);
-#endif
- break;
- }
+ rays.hitWithoutTminCheck(ray, mask_test, f, getMaterial(), this,
getTexCoordMapper());
}
for (int ray = sse_end; ray < ray_end; ++ray) {
@@ -251,7 +185,6 @@
}
}
#else
-#if !(USE_SIMD) // portable C version
void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays)
const
{
const int axis = k;
@@ -311,407 +244,4 @@
rays.hit(i, f, getMaterial(), this, getTexCoordMapper());
}
}
-#else
-// SIMD version
-#include <float.h>
-
-#if USE_ALTIVEC // altivec
-typedef vector float sse_t;
-typedef vector bool sse_mask_t;
-
-inline sse_t set4(float value)
-{
- union { float values[4]; sse_t vec_result; } loader;
- loader.values[0] = value;
- return vec_splat( loader.vec_result, 0 );
-}
-
-inline sse_t set44(float v0, float v1, float v2, float v3)
-{
- union { float values[4]; sse_t vec_result; } loader;
- loader.values[0] = v0;
- loader.values[1] = v1;
- loader.values[2] = v2;
- loader.values[3] = v3;
- return loader.vec_result;
-}
-
-#define load4(p) vec_ld(0, (p))
-#define store4(v, p) vec_st((v), 0, (p))
-
-#define add4 vec_add
-#define sub4 vec_sub
-
-inline sse_t mul4(const sse_t& a, const sse_t& b)
-{
- const sse_t zero = {0.f,0.f,0.f,0.f};
- return vec_madd(a, b, zero);
-}
-
-#define rcp4 vec_re
-#define and4 vec_and
-#define andnot4 vec_andc
-#define or4 vec_or
-
-inline bool none4(const sse_mask_t& mask)
-{
- const sse_mask_t zero = {false, false, false, false};
- return (vec_all_eq(mask, zero) == 1);
-}
-
-#define cmp_gt4 vec_cmpgt
-#define cmp_gte4 vec_cmpge
-#define cmp_lt4 vec_cmplt
-#define cmp_lte4 vec_cmple
-
-inline sse_t mask4(const sse_mask_t &mask, const sse_t& if_true, const sse_t
& if_false)
-{
- return vec_sel(if_false, if_true, mask);
-}
-
-
-#endif
-
-
-#if USE_SSE
-
-#include <xmmintrin.h>
-
-typedef __m128 sse_t;
-typedef __m128 sse_mask_t;
-
-#define set4 _mm_set_ps1
-
-inline sse_t set44(float v0, float v1, float v2, float v3)
-{
- return _mm_set_ps(v3, v2, v1, v0);
-}
-
-#define load4 _mm_load_ps
-#define store4(v,p) _mm_store_ps((p),(v))
-
-#define add4 _mm_add_ps
-#define sub4 _mm_sub_ps
-#define mul4 _mm_mul_ps
-#define rcp4 _mm_rcp_ps
-
-#define and4 _mm_and_ps
-
-inline sse_mask_t andnot4(const sse_t& a, const sse_t& b)
-{
- return _mm_andnot_ps(b, a);
-}
-
-#define or4 _mm_or_ps
-#define none4(mask) (_mm_movemask_ps( (mask) ) == 0)
-#define cmp_gt4 _mm_cmpgt_ps
-#define cmp_gte4 _mm_cmpge_ps
-#define cmp_lt4 _mm_cmplt_ps
-#define cmp_lte4 _mm_cmple_ps
-
-inline sse_t mask4(const sse_mask_t &mask, const sse_t& if_true, const sse_t
& if_false)
-{
- return or4(and4(mask,if_true), andnot4(if_false, mask));
-}
-
-
-#endif
-
-#if USE_CSIMD
-
-typedef struct
-{
- float v0,v1,v2,v3;
-} float4;
-
-typedef float4 sse_t;
-typedef float4 sse_mask_t;
-
-inline sse_t set4(float f)
-{
- sse_t ret;
- ret.v0 = ret.v1 = ret.v2 = ret.v3 = f;
- return ret;
-}
-
-inline sse_t set44(float v0, float v1, float v2, float v3)
-{
- sse_t ret;
- ret.v0 = v0;
- ret.v1 = v1;
- ret.v2 = v2;
- ret.v3 = v3;
- return ret;
-}
-
-// assume perfect alignment
-inline sse_t load4(const float* a)
-{
- sse_t ret = *((const sse_t*)a);
- return ret;
-}
-
-inline void store4(const sse_t& vec, float* addr)
-{
- sse_t* res = (sse_t*)addr;
- *res = vec;
-}
-
-inline sse_t add4(const sse_t& a, const sse_t& b)
-{
- sse_t ret;
- ret.v0 = a.v0 + b.v0;
- ret.v1 = a.v1 + b.v1;
- ret.v2 = a.v2 + b.v2;
- ret.v3 = a.v3 + b.v3;
- return ret;
-}
-
-inline sse_t sub4(const sse_t& a, const sse_t& b)
-{
- sse_t ret;
- ret.v0 = a.v0 - b.v0;
- ret.v1 = a.v1 - b.v1;
- ret.v2 = a.v2 - b.v2;
- ret.v3 = a.v3 - b.v3;
- return ret;
-}
-
-inline sse_t mul4(const sse_t& a, const sse_t& b)
-{
- sse_t ret;
- ret.v0 = a.v0 * b.v0;
- ret.v1 = a.v1 * b.v1;
- ret.v2 = a.v2 * b.v2;
- ret.v3 = a.v3 * b.v3;
- return ret;
-}
-
-inline sse_t rcp4(const sse_t& a)
-{
- sse_t ret;
- ret.v0 = 1.f/a.v0;
- ret.v1 = 1.f/a.v1;
- ret.v2 = 1.f/a.v2;
- ret.v3 = 1.f/a.v3;
- return ret;
-}
-
-inline sse_mask_t and4(const sse_t& a, const sse_t& b)
-{
- sse_mask_t ret;
- const unsigned int* a_u = (const unsigned int*)(&a.v0);
- const unsigned int* b_u = (const unsigned int*)(&b.v0);
- unsigned int* ret_u = (unsigned int*)(&ret.v0);
- ret_u[0] = a_u[0] & b_u[0];
- ret_u[1] = a_u[1] & b_u[1];
- ret_u[2] = a_u[2] & b_u[2];
- ret_u[3] = a_u[3] & b_u[3];
-
- return ret;
-}
-
-// Each element of the result is the logical AND of the corresponding
-// element of arg1 and the one's complement of the corresponding
-// element of arg2. (from apple)
-inline sse_mask_t andnot4(const sse_t& a, const sse_t& b)
-{
- sse_mask_t ret;
- const unsigned int* a_u = (const unsigned int*)(&a.v0);
- const unsigned int* b_u = (const unsigned int*)(&b.v0);
- unsigned int* ret_u = (unsigned int*)(&ret.v0);
- ret_u[0] = a_u[0] & (~b_u[0]);
- ret_u[1] = a_u[1] & (~b_u[1]);
- ret_u[2] = a_u[2] & (~b_u[2]);
- ret_u[3] = a_u[3] & (~b_u[3]);
-
- return ret;
-}
-
-inline sse_mask_t or4(const sse_t& a, const sse_t& b)
-{
- sse_mask_t ret;
- const unsigned int* a_u = (const unsigned int*)(&a.v0);
- const unsigned int* b_u = (const unsigned int*)(&b.v0);
- unsigned int* ret_u = (unsigned int*)(&ret.v0);
- ret_u[0] = a_u[0] | b_u[0];
- ret_u[1] = a_u[1] | b_u[1];
- ret_u[2] = a_u[2] | b_u[2];
- ret_u[3] = a_u[3] | b_u[3];
- return ret;
-}
-
-// if all 4 components == 0
-inline bool none4(const sse_mask_t& mask)
-{
- const unsigned int* mask_u = (const unsigned int*)(&mask.v0);
-
- return (mask_u[0] == 0 &&
- mask_u[1] == 0 &&
- mask_u[2] == 0 &&
- mask_u[3] == 0);
-}
-
-inline sse_mask_t cmp_gt4(const sse_t& a, const sse_t& b)
-{
- sse_mask_t ret;
- unsigned int* ret_u = (unsigned int*)(&ret.v0);
- ret_u[0] = (a.v0 > b.v0) ? 0xffffffff : 0;
- ret_u[1] = (a.v1 > b.v1) ? 0xffffffff : 0;
- ret_u[2] = (a.v2 > b.v2) ? 0xffffffff : 0;
- ret_u[3] = (a.v3 > b.v3) ? 0xffffffff : 0;
- return ret;
-}
-
-inline sse_mask_t cmp_gte4(const sse_t& a, const sse_t& b)
-{
- sse_mask_t ret;
- unsigned int* ret_u = (unsigned int*)(&ret.v0);
- ret_u[0] = (a.v0 >= b.v0) ? 0xffffffff : 0;
- ret_u[1] = (a.v1 >= b.v1) ? 0xffffffff : 0;
- ret_u[2] = (a.v2 >= b.v2) ? 0xffffffff : 0;
- ret_u[3] = (a.v3 >= b.v3) ? 0xffffffff : 0;
- return ret;
-}
-
-inline sse_mask_t cmp_lt4(const sse_t& a, const sse_t& b)
-{
- sse_mask_t ret;
- unsigned int* ret_u = (unsigned int*)(&ret.v0);
- ret_u[0] = (a.v0 < b.v0) ? 0xffffffff : 0;
- ret_u[1] = (a.v1 < b.v1) ? 0xffffffff : 0;
- ret_u[2] = (a.v2 < b.v2) ? 0xffffffff : 0;
- ret_u[3] = (a.v3 < b.v3) ? 0xffffffff : 0;
- return ret;
-}
-
-inline sse_mask_t cmp_lte4(const sse_t& a, const sse_t& b)
-{
- sse_mask_t ret;
- unsigned int* ret_u = (unsigned int*)(&ret.v0);
- ret_u[0] = (a.v0 <= b.v0) ? 0xffffffff : 0;
- ret_u[1] = (a.v1 <= b.v1) ? 0xffffffff : 0;
- ret_u[2] = (a.v2 <= b.v2) ? 0xffffffff : 0;
- ret_u[3] = (a.v3 <= b.v3) ? 0xffffffff : 0;
- return ret;
-}
-
-inline sse_t mask4(const sse_mask_t& mask, const sse_t& if_true, const sse_t
&if_false)
-{
- sse_t ret;
- const unsigned int* mask_u = (const unsigned int*)&mask.v0;
-
- ret.v0 = (mask_u[0] == (0xffffffff)) ? if_true.v0 : if_false.v0;
- ret.v1 = (mask_u[1] == (0xffffffff)) ? if_true.v1 : if_false.v1;
- ret.v2 = (mask_u[2] == (0xffffffff)) ? if_true.v2 : if_false.v2;
- ret.v3 = (mask_u[3] == (0xffffffff)) ? if_true.v3 : if_false.v3;
- return ret;
-}
-
-#endif
-
-inline sse_t accurateRcp4(const sse_t& v)
-{
- const sse_t rcp = rcp4(v);
- return sub4(add4(rcp,rcp),mul4(mul4(rcp,rcp),v));
-}
-
-void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays)
const
-{
- const int kn = k;
- const int ku = (k==2)?0:k+1;
- const int kv = (k==0)?2:k-1;
-
- // what qualifiers go here?
- RayPacketData* data = rays.data;
-
- const sse_t* const dir_kn = (sse_t*)data->direction[kn];
- const sse_t* const dir_ku = (sse_t*)data->direction[ku];
- const sse_t* const dir_kv = (sse_t*)data->direction[kv];
-
- sse_t* minT = (sse_t*)data->minT;
- sse_t org_kn, org_ku, org_kv, f0;
-
- const sse_t sse_nu = set4(n_u);
- const sse_t sse_nv = set4(n_v);
- const sse_t sse_nd = set4(n_d);
-
- const sse_t sse_bnu = set4(b_nu);
- const sse_t sse_bnv = set4(b_nv);
- const sse_t sse_bd = set4(b_d);
-
- const sse_t sse_nk = set4(n_k);
- const sse_t sse_cnu = set4(c_nu);
- const sse_t sse_cnv = set4(c_nv);
- const sse_t sse_cd = set4(c_d);
-
- const sse_t sse_eps = set4(T_EPSILON);
-
- // first SSE aligned ray in the packet of rays:
- const int sse_begin = rays.begin() >> 2; // equivalent to
Floor(rays.begin()/4)
- const int sse_end = ((rays.end()+3) >> 2); // Ceil(rays.end()-1/4)
-
- const int ray_begin = rays.begin();
- const int ray_end = rays.end();
-
- const bool RaysConstantOrigin = rays.getAllFlags() &
RayPacket::ConstantOrigin;
-
- if (RaysConstantOrigin)
- {
- org_kn = set4(data->origin[kn][ray_begin]);
- org_ku = set4(data->origin[ku][ray_begin]);
- org_kv = set4(data->origin[kv][ray_begin]);
-
- f0 = sub4(sse_nd, add4(org_kn, add4(mul4(sse_nu,
org_ku),mul4(sse_nv,org_kv))));
- }
-
- for (int i = sse_begin; i < sse_end; i++ )
- {
- const sse_t nd0 =
add4(mul4(sse_nu,dir_ku[i]),add4(mul4(sse_nv,dir_kv[i]), dir_kn[i]));
- const sse_t nd = accurateRcp4(nd0);
-
- if (!RaysConstantOrigin)
- {
- const int ray_index = i * 4;
- org_kn = load4(&(data->origin[kn][ray_index]));
- org_ku = load4(&(data->origin[ku][ray_index]));
- org_kv = load4(&(data->origin[kv][ray_index]));
-
- f0 = sub4(sse_nd, add4(org_kn, add4(mul4(sse_nu,
org_ku),mul4(sse_nv,org_kv))));
- }
-
- const sse_t f = mul4(f0,nd);
- // plane test
- sse_mask_t mask = and4( cmp_gt4(minT[i], f), cmp_gt4(f, sse_eps));
-
- if (none4(mask))
- continue;
-
- const sse_t hu = add4(org_ku, mul4(f,dir_ku[i]));
- const sse_t hv = add4(org_kv, mul4(f,dir_kv[i]));
- const sse_t lambda = add4(sse_bd,
add4(mul4(hu,sse_bnu),mul4(hv,sse_bnv)));
- const sse_t zero = set4(0.f);
- const sse_t one = set4(1.f);
- // barycentric test (to pass, \lambda must be >= 0)
- mask = and4(mask, cmp_gte4(lambda, zero));
- if (none4(mask))
- continue;
-
- const sse_t mue = add4(sse_cd,
add4(mul4(hu,sse_cnu),mul4(hv,sse_cnv)));
- // barycentric test (to pass, \mue must be >= 0 and \lambda + \mue
must be <= 1.f
- mask = and4(mask, and4(cmp_gte4(mue, zero), cmp_lte4(add4(mue,lambda),
one)));
- if (none4(mask))
- continue;
-
- for ( int r = 4*i; r < (4*(i+1)); r++ )
- {
- const float* hits = (const float*)(&f);
- const float* valid = (const float*)(&mask);
- if ( r >= ray_begin && r < ray_end )
- if ( valid[r-4*i] != 0.f )
- rays.hit(r, hits[r-4*i], getMaterial(), this,
getTexCoordMapper());
- }
- }
-}
-#endif
#endif // MANTA_SSE
- [MANTA] r1234 - trunk/Model/Primitives, boulos, 11/12/2006
Archive powered by MHonArc 2.6.16.