Text archives Help
- From: boulos@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r983 - trunk/Model/Primitives
- Date: Sun, 12 Mar 2006 21:49:43 -0700 (MST)
Author: boulos
Date: Sun Mar 12 21:49:41 2006
New Revision: 983
Modified:
trunk/Model/Primitives/WaldTriangle.cc
trunk/Model/Primitives/WaldTriangle.h
Log:
Adding SSE and Altivec versions of the
intersection test. Committing to see the
altivec image on my laptop (SSE has been
tested, works and is not 4x faster).
Modified: trunk/Model/Primitives/WaldTriangle.cc
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.cc (original)
+++ trunk/Model/Primitives/WaldTriangle.cc Sun Mar 12 21:49:41 2006
@@ -62,3 +62,300 @@
n_k = normal[k];
}
+
+#if 1 // portable C version
+void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays)
const
+{
+ const int axis = k;
+ const int ku = (k==2)?0:k+1;
+ const int kv = (k==0)?2:k-1;
+
+ // what qualifiers go here?
+ RayPacketData* data = rays.data;
+
+ const Real* const dir_k = data->direction[axis];
+ const Real* const dir_ku = data->direction[ku];
+ const Real* const dir_kv = data->direction[kv];
+
+ float org_k, org_ku, org_kv, f0;
+
+ const bool RaysConstantOrigin = rays.getAllFlags() &
RayPacket::ConstantOrigin;
+
+ if (RaysConstantOrigin)
+ {
+ org_k = data->origin[axis][rays.begin()];
+ org_ku = data->origin[ku][rays.begin()];
+ org_kv = data->origin[kv][rays.begin()];
+ f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+ }
+
+ for (int i = rays.begin(); i < rays.end(); i++ )
+ {
+ const float nd0 = n_u * dir_ku[i] + n_v * dir_kv[i] + dir_k[i];
+ const float nd = 1.f/nd0;
+
+ if (!RaysConstantOrigin)
+ {
+ org_k = data->origin[axis][i];
+ org_ku = data->origin[ku][i];
+ org_kv = data->origin[kv][i];
+
+ f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+ }
+
+ const float f = f0 * nd;
+ // plane test
+ if ( f < T_EPSILON || f > data->minT[i] )
+ continue;
+
+ const float hu = org_ku + f*dir_ku[i];
+ const float hv = org_kv + f*dir_kv[i];
+ const float lambda = b_d + hu*b_nu + hv * b_nv;
+
+ // barycentric test
+ if ( lambda < 0.f )
+ continue;
+
+ const float mue = c_d + hu * c_nu + hv * c_nv;
+ if ( mue < 0.f || mue + lambda > 1.f )
+ continue;
+
+ rays.hit(i, f, getMaterial(), this, getTexCoordMapper());
+ }
+}
+#else
+// SSE version
+#include <float.h>
+
+#if 1 // altivec
+typedef vector float sse_t;
+typedef vector bool sse_mask_t;
+
+inline sse_t set4(float value)
+{
+ union { float values[4]; sse_t vec_result; } loader;
+ loader.values[0] = value;
+ return vec_splat( loader.vec_result, 0 );
+}
+
+inline sse_t set44(float v0, float v1, float v2, float v3)
+{
+ union { float values[4]; sse_t vec_result; } loader;
+ loader.values[0] = v0;
+ loader.values[1] = v1;
+ loader.values[2] = v2;
+ loader.values[3] = v3;
+ return loader.vec_result;
+}
+
+#define add4 vec_add
+#define sub4 vec_sub
+
+inline sse_t mul4(const sse_t& a, const sse_t& b)
+{
+ const sse_t zero = {0.f,0.f,0.f,0.f};
+ return vec_madd(a, b, zero);
+}
+
+#define rcp4 vec_re
+#define and4 vec_and
+#define andnot4 vec_andc
+#define or4 vec_or
+
+inline bool none4(const sse_mask_t& mask)
+{
+ const sse_mask_t zero = {false, false, false, false};
+ return (vec_all_eq(mask, zero) == 1);
+}
+
+#define cmp_gt4 vec_cmpgt
+#define cmp_gte4 vec_cmpge
+#define cmp_lt4 vec_cmplt
+#define cmp_lte4 vec_cmple
+
+
+#else
+
+#include <xmmintrin.h>
+
+typedef __m128 sse_t;
+typedef __m128 sse_mask_t;
+
+#define set4 _mm_set_ps1
+//#define set44 _mm_set_ps
+
+inline sse_t set44(float v0, float v1, float v2, float v3)
+{
+ return _mm_set_ps(v3, v2, v1, v0);
+}
+
+#define add4 _mm_add_ps
+#define sub4 _mm_sub_ps
+#define mul4 _mm_mul_ps
+#define rcp4 _mm_rcp_ps
+
+#define and4 _mm_and_ps
+
+inline sse_mask_t andnot4(const sse_t& a, const sse_t& b)
+{
+ return _mm_andnot_ps(b, a);
+}
+
+#define or4 _mm_or_ps
+#define none4(mask) (_mm_movemask_ps( (mask) ) == 0)
+#define cmp_gt4 _mm_cmpgt_ps
+#define cmp_gte4 _mm_cmpge_ps
+#define cmp_lt4 _mm_cmplt_ps
+#define cmp_lte4 _mm_cmple_ps
+
+#endif
+
+inline sse_t mask4(const sse_mask_t &mask, const sse_t& if_true, const sse_t
& if_false)
+{
+ //return or4(and4(mask,if_true), andnot4(mask,if_false));
+ return or4(and4(mask,if_true), andnot4(if_false, mask));
+}
+
+inline sse_t accurateRcp4(const sse_t& v)
+{
+ const sse_t rcp = rcp4(v);
+ return sub4(add4(rcp,rcp),mul4(mul4(rcp,rcp),v));
+}
+
+void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays)
const
+{
+ const int kn = k;
+ const int ku = (k==2)?0:k+1;
+ const int kv = (k==0)?2:k-1;
+
+ // what qualifiers go here?
+ RayPacketData* data = rays.data;
+
+ const sse_t* const dir_kn = (sse_t*)data->direction[kn];
+ const sse_t* const dir_ku = (sse_t*)data->direction[ku];
+ const sse_t* const dir_kv = (sse_t*)data->direction[kv];
+
+ sse_t* minT = (sse_t*)data->minT;
+ sse_t* hitMatl = (sse_t*)data->hitMatl;
+ sse_t* hitPrim = (sse_t*)data->hitPrim;
+ sse_t* hitTex = (sse_t*)data->hitTex;
+
+ union
+ {
+ const void* ptr;
+ float value;
+ } addr_loader;
+
+ addr_loader.ptr = (const void*)getMaterial();
+ const sse_t sse_matl = set4(addr_loader.value);
+ addr_loader.ptr = (const void*)this;
+ const sse_t sse_this = set4(addr_loader.value);
+ addr_loader.ptr = (const void*)getTexCoordMapper();
+ const sse_t sse_tex = set4(addr_loader.value);
+
+ //float org_kn, org_ku, org_kv, f0;
+ sse_t org_kn, org_ku, org_kv, f0;
+
+ const sse_t sse_nu = set4(n_u);
+ const sse_t sse_nv = set4(n_v);
+ const sse_t sse_nd = set4(n_d);
+
+ const sse_t sse_bnu = set4(b_nu);
+ const sse_t sse_bnv = set4(b_nv);
+ const sse_t sse_bd = set4(b_d);
+
+ const sse_t sse_nk = set4(n_k);
+ const sse_t sse_cnu = set4(c_nu);
+ const sse_t sse_cnv = set4(c_nv);
+ const sse_t sse_cd = set4(c_d);
+
+ const sse_t sse_eps = set4(T_EPSILON);
+
+ // first SSE aligned ray in the packet of rays:
+ const int sse_begin = rays.begin() >> 2; // equivalent to
Floor(rays.begin()/4)
+ const int sse_end = (rays.end() >> 2) + 1; // Ceil(rays.end()/4)
+
+ const int ray_begin = rays.begin();
+ const int ray_end = rays.end();
+
+ const bool RaysConstantOrigin = rays.getAllFlags() &
RayPacket::ConstantOrigin;
+
+ for ( int ray = sse_begin * 4; ray < ray_begin; ray++ )
+ {
+ data->minT[ray] = -FLT_MAX; // try to kill the ray
+ }
+
+ for ( int ray = sse_end * 4; ray < ray_end; ray++ )
+ {
+ data->minT[ray] = -FLT_MAX; // try to kill the ray
+ }
+
+
+ if (RaysConstantOrigin)
+ {
+ //org_kn = data->origin[kn][rays.begin()];
+ org_kn = set4(data->origin[kn][ray_begin]);
+ //org_ku = data->origin[ku][rays.begin()];
+ org_ku = set4(data->origin[ku][ray_begin]);
+ //org_kv = data->origin[kv][rays.begin()];
+ org_kv = set4(data->origin[kv][ray_begin]);
+ //f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+ f0 = sub4(sse_nd, add4(org_kn, add4(mul4(sse_nu,
org_ku),mul4(sse_nv,org_kv))));
+ }
+
+ for (int i = sse_begin; i < sse_end; i++ )
+ {
+ const sse_t nd0 =
add4(mul4(sse_nu,dir_ku[i]),add4(mul4(sse_nv,dir_kv[i]), dir_kn[i]));
+ const sse_t nd = accurateRcp4(nd0);
+
+ if (!RaysConstantOrigin)
+ {
+ const int ray_index = i * 4;
+ org_kn = set44(data->origin[kn][ray_index+0],
+ data->origin[kn][ray_index+1],
+ data->origin[kn][ray_index+2],
+ data->origin[kn][ray_index+3]);
+
+ org_ku = set44(data->origin[ku][ray_index+0],
+ data->origin[ku][ray_index+1],
+ data->origin[ku][ray_index+2],
+ data->origin[ku][ray_index+3]);
+
+ org_kv = set44(data->origin[kv][ray_index+0],
+ data->origin[kv][ray_index+1],
+ data->origin[kv][ray_index+2],
+ data->origin[kv][ray_index+3]);
+
+ f0 = sub4(sse_nd, add4(org_kn, add4(mul4(sse_nu,
org_ku),mul4(sse_nv,org_kv))));
+ }
+
+ const sse_t f = mul4(f0,nd);
+ // plane test
+ sse_mask_t mask = and4( cmp_gt4(minT[i], f), cmp_gt4(f, sse_eps));
+
+ if (none4(mask))
+ continue;
+
+ const sse_t hu = add4(org_ku, mul4(f,dir_ku[i]));
+ const sse_t hv = add4(org_kv, mul4(f,dir_kv[i]));
+ const sse_t lambda = add4(sse_bd,
add4(mul4(hu,sse_bnu),mul4(hv,sse_bnv)));
+ const sse_t zero = set4(0.f);
+ // barycentric test (to pass, \lambda must be >= 0)
+ mask = and4(mask, cmp_gte4(lambda, zero));
+ if (none4(mask))
+ continue;
+
+ const sse_t mue = add4(sse_cd,
add4(mul4(hu,sse_cnu),mul4(hv,sse_cnv)));
+ mask = and4(mask, and4(cmp_gte4(mue, zero), cmp_lte4(add4(mue,lambda),
set4(1.f))));
+ if (none4(mask))
+ continue;
+
+ //rays.hit(i, f, getMaterial(), this, getTexCoordMapper());
+ // we already know that the mask contains all the info for setting the
hit
+ minT[i] = mask4(mask, f, minT[i]);
+ hitMatl[i] = mask4(mask, sse_matl, hitMatl[i]);
+ hitPrim[i] = mask4(mask, sse_this, hitPrim[i]);
+ hitTex[i] = mask4(mask, sse_tex, hitTex[i]);
+ }
+}
+#endif
Modified: trunk/Model/Primitives/WaldTriangle.h
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.h (original)
+++ trunk/Model/Primitives/WaldTriangle.h Sun Mar 12 21:49:41 2006
@@ -13,75 +13,17 @@
class WaldTriangle : public PrimitiveCommon
{
public:
- WaldTriangle() { };
- WaldTriangle(Material* mat,
- const Vector& _p1, const Vector& _p2, const Vector& _p3);
+ WaldTriangle() { };
+ WaldTriangle(Material* mat,
+ const Vector& _p1, const Vector& _p2, const Vector& _p3);
+
+ void computeBounds(const PreprocessContext& context,
+ BBox& bbox) const
+ {
+ bbox = box;
+ }
- void computeBounds(const PreprocessContext& context,
- BBox& bbox) const
- {
- bbox = box;
- }
-
- void intersect(const RenderContext& context, RayPacket& rays) const
- {
- const int axis = k;
- const int ku = (k==2)?0:k+1;
- const int kv = (k==0)?2:k-1;
-
- // what qualifiers go here?
- RayPacketData* data = rays.data;
-
- const Real* const dir_k = data->direction[axis];
- const Real* const dir_ku = data->direction[ku];
- const Real* const dir_kv = data->direction[kv];
-
- float org_k, org_ku, org_kv, f0;
-
- const bool RaysConstantOrigin = rays.getAllFlags() &
RayPacket::ConstantOrigin;
-
- if (RaysConstantOrigin)
- {
- org_k = data->origin[axis][rays.begin()];
- org_ku = data->origin[ku][rays.begin()];
- org_kv = data->origin[kv][rays.begin()];
- f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
- }
-
- for (int i = rays.begin(); i < rays.end(); i++ )
- {
- const float nd0 = n_u * dir_ku[i] + n_v * dir_kv[i] + dir_k[i];
- const float nd = 1.f/nd0;
-
- if (!RaysConstantOrigin)
- {
- org_k = data->origin[axis][i];
- org_ku = data->origin[ku][i];
- org_kv = data->origin[kv][i];
-
- f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
- }
-
- const float f = f0 * nd;
- // plane test
- if ( f < T_EPSILON || f > data->minT[i] )
- continue;
-
- const float hu = org_ku + f*dir_ku[i];
- const float hv = org_kv + f*dir_kv[i];
- const float lambda = b_d + hu*b_nu + hv * b_nv;
-
- // barycentric test
- if ( lambda < 0.f )
- continue;
-
- const float mue = c_d + hu * c_nu + hv * c_nv;
- if ( mue < 0.f || mue + lambda > 1.f )
- continue;
-
- rays.hit(i, f, getMaterial(), this, getTexCoordMapper());
- }
- }
+ void intersect(const RenderContext& context, RayPacket& rays) const;
void computeNormal(const RenderContext& context, RayPacket &rays) const
{
- [MANTA] r983 - trunk/Model/Primitives, boulos, 03/12/2006
Archive powered by MHonArc 2.6.16.