Text archives Help
- From: thiago@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1267 - in trunk: Interface Model/Groups/private Model/Primitives
- Date: Tue, 16 Jan 2007 17:58:15 -0700 (MST)
Author: thiago
Date: Tue Jan 16 17:58:14 2007
New Revision: 1267
Modified:
trunk/Interface/RayPacket.h
trunk/Model/Groups/private/CGT.cc
trunk/Model/Groups/private/CGT.h
trunk/Model/Primitives/WaldTriangle.cc
trunk/Model/Primitives/WaldTriangle.h
Log:
Model/Primitives/WaldTriangle.h
Model/Primitives/WaldTriangle.cc : Added Frustum culling (if corner rays
have been set) and made the sse code more sse like.
Model/Groups/private/CGT.h
Model/Groups/private/CGT.cc : Removed internal Wald Triangle code and now
use the WaldTriangle class (although I currently wrap that class in my
own class so that I can include the triangles in an std::vector...).
Interface/RayPacket.h : Added corner rays flag. This is used by the coherent
grid traversal code.
Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Tue Jan 16 17:58:14 2007
@@ -148,6 +148,7 @@
HaveInverseDirections = 0x0800,
HaveSigns = 0x1000,
ConstantSigns = 0x2000,
+ HaveCornerRays = 0x4000,
DebugPacket = 0x8000
};
Modified: trunk/Model/Groups/private/CGT.cc
==============================================================================
--- trunk/Model/Groups/private/CGT.cc (original)
+++ trunk/Model/Groups/private/CGT.cc Tue Jan 16 17:58:14 2007
@@ -49,103 +49,8 @@
void Grid::newFrame()
{
-#ifdef COUNT_ISECS
- WaldTriangleIntersector::newFrame();
-#endif
}
-#ifdef COUNT_ISECS
-int WaldTriangleIntersector::numRayIsecs = 0;
-int WaldTriangleIntersector::numFrustumIsecs = 0;
-#endif
-
-
-void WaldTriangleIntersector::preprocess(const PreprocessContext& context)
-{
-#if defined(NUM_TRI_INTERSECT_THREADS)
- if (!accel)
- {
- static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
- pthread_mutex_lock(&mutex);
- if ( !accel ) {
- cout << "allocating mem for " << getSize() << " triangles" << endl;
- accel = (TriangleAccel*)memalign(16,sizeof(TriangleAccel)*getSize());
- }
- pthread_mutex_unlock(&mutex);
- }
-
- const int startTri = (getSize() * context.proc) / context.numProcs;
- const int endTri = (getSize() * (context.proc+1)) / context.numProcs;
-#else
- if (!accel)
- {
- cout << "allocating mem for " << tri_list->getSize() << " triangles"
<< endl;
- accel = new
TriangleAccel[tri_list->getSize()];//(TriangleAccel*)memalign(16,sizeof(TriangleAccel)*tri_list->getSize());
- }
-
- const int startTri = 0;
- const int endTri = tri_list->getSize();
-#endif // NUM_TRI_INTERSECT_THREADS
-
- for (int i=startTri;i<endTri;i++) {
- //Mesh::Triangle *triangle = mesh->getTriangle(i);
- Triangle *triangle = (Triangle*) tri_list->get(i);
-
-// #ifdef NDEBUG //avoid assert in getPos when past triangle array bounds.
-// prefetch((char*)&mesh->getPos(triangle[2].a),_MM_HINT_T2);
-// prefetch((char*)&mesh->getPos(triangle[2].b),_MM_HINT_T2);
-// prefetch((char*)&mesh->getPos(triangle[2].c),_MM_HINT_T2);
-// #endif
- sse_t a = set44(0, triangle->p1.z(), triangle->p1.y(),
triangle->p1.x());//mesh->getPos(triangle->a);
- sse_t b = set44(0, triangle->p2.z(), triangle->p2.y(),
triangle->p2.x());//mesh->getPos(triangle->b);
- sse_t c = set44(0, triangle->p3.z(), triangle->p3.y(),
triangle->p3.x());//mesh->getPos(triangle->c);
- sse_t normal = calcNormal(a,b,c);
- normalize(normal);
- int k, u, v;
-
- k = 0;
- if (fabsf(((float4&)normal)[1]) > fabsf(((float4&)normal)[k])) k = 1;
- if (fabsf(((float4&)normal)[2]) > fabsf(((float4&)normal)[k])) k = 2;
-
- switch (k)
- {
- case 0: u = 1; v = 2; break;
- case 1: u = 2; v = 0; break;
- default: u = 0; v = 1; break;
- };
- accel[i].k = k;
- accel[i].n_u = ((float4&)normal)[u] / ((float4&)normal)[k];
- accel[i].n_v = ((float4&)normal)[v] / ((float4&)normal)[k];
- accel[i].n_d = (((float4&)a)[u]*accel[i].n_u +
((float4&)a)[v]*accel[i].n_v + ((float4&)a)[k]);
-
- float s;
-
- accel[i].c_nu = + ((float4&)b)[v] - ((float4&)a)[v];
- accel[i].c_nv = - ((float4&)b)[u] + ((float4&)a)[u];
- accel[i].c_d = - (((float4&)a)[u] * accel[i].c_nu +
((float4&)a)[v]*accel[i].c_nv);
-
- s = 1.0f / (((float4&)c)[u] * accel[i].c_nu +
((float4&)c)[v]*accel[i].c_nv + accel[i].c_d);
-
- accel[i].c_nu *= s;
- accel[i].c_nv *= s;
- accel[i].c_d *= s;
-
- accel[i].b_nu = + ((float4&)c)[v] - ((float4&)a)[v];
- accel[i].b_nv = - ((float4&)c)[u] + ((float4&)a)[u];
- accel[i].b_d = - (((float4&)a)[u] * accel[i].b_nu +
((float4&)a)[v]*accel[i].b_nv);
-
- s = 1.0f / (((float4&)b)[u] * accel[i].b_nu +
((float4&)b)[v]*accel[i].b_nv + accel[i].b_d);
-
- accel[i].b_nu *= s;
- accel[i].b_nv *= s;
- accel[i].b_d *= s;
-
- accel[i].n_k = ((float4&)normal)[k];
- accel[i].triangle = triangle;//accel[i].shaderID = triangle->shaderID;
- }
-}
-
-
void Grid::preprocess(const PreprocessContext& context)
{
static bool firstTime = true;
@@ -161,13 +66,12 @@
cout <<"Number of triangles is: " << getSize() <<endl; //DEBUG
- primitives.preprocess(context);
- // primitives.clear();
-// for (int i=0; i<getSize(); ++i) {
-// const Triangle *triangle = (Triangle*) get(i);
-// primitives.push_back(WaldTriangle(triangle->getMaterial(),
-//
triangle->p1,triangle->p2,triangle->p3));
-// }
+ primitives.resize(getSize());
+ for (int i=0; i<getSize(); ++i) {
+ const Triangle *triangle = (Triangle*) get(i);
+ primitives[i].setMaterial(triangle->getMaterial());
+ primitives[i].setPoints(triangle->p1,triangle->p2,triangle->p3);
+ }
firstTimer.start();
@@ -278,18 +182,6 @@
if (firstTime)
cout << "building grid for " << getSize() << " triangles, res = " <<
N[0] << "x" << N[1] << "x" << N[2] << endl << "bounds = " << bounds << endl;
-#ifdef TRI_PREPROCESS
- if (!accel) {
- static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER;
- pthread_mutex_lock(&mutex);
- if ( !accel ) {
- cout << "allocating mem for " << getSize() << " triangles" << endl;
- accel = (TriangleAccel*)memalign(16,sizeof(TriangleAccel)*getSize());
- }
- pthread_mutex_unlock(&mutex);
- }
-#endif
-
firstTimer.stop();
if (firstTime || DISPLAY_BUILD_TIMES)
@@ -492,7 +384,7 @@
cout<<buf;
}
-for (int i=0; i < context.numProcs; ++i)
+ for (int i=0; i < context.numProcs; ++i)
buildQueue_sizes[context.proc][i] = myBuildQueue_sizes[i];
barrier(condition_mutex, condition);
@@ -626,8 +518,9 @@
float minVd;
float maxVd;
- if (SQUARE_PACKETS && COMMON_ORIGIN)
+ if (ray.getFlag(RayPacket::HaveCornerRays) &&
ray.getFlag(RayPacket::ConstantOrigin))
{
+ printf("oops, never expected this to be needed!. Look In CGT.cc...\n");
/*
sse_t dir4[3];
for (int i=0;i<3;i++)
@@ -694,6 +587,7 @@
ray.data->corner_dir[K] = set4(DK);
ray.data->corner_dir[U] = set44(maxDu, maxDu, minDu, minDu);
ray.data->corner_dir[V] = set44(maxDv, minDv, maxDv, minDv);
+ ray.setFlag(RayPacket::HaveCornerRays);
float u_scale = ((float4&)scaleN)[U] * ((float4&)inv_scaleN)[K]*DK;
float v_scale = ((float4&)scaleN)[V] * ((float4&)inv_scaleN)[K]*DK;
@@ -744,7 +638,7 @@
else
s_furthest_k = _mm_infty;
-#if 0
+#if 1
if (DK == +1) {
int b = (ray.begin() + 3) & (~3);
int e = ray.end() & (~3);
@@ -1139,9 +1033,11 @@
mailbox[context.proc][item[i]] = rayID[context.proc].rayID;
#endif
//this->get(item[i])->intersect(context, ray); anyHit = true;
- //anyHit += this->get(item[i])->intersect_and_notify(context,
ray);
- anyHit += primitives.intersectTriangle<false, true,
true>(item[i], ray);
- //anyHit = true; primitives[item[i]].intersect(context, ray);
+
+ //it appears that always setting anyHit to true only causes a
couple
+ //percent performance hit, so for now let's just live with it.
+ //Note: We stil need anyHit since it lets us skip mailboxed hits.
+ anyHit = true; primitives[item[i]].intersect(context, ray);
}
if (K == 0) //if K is 0, then U is Y
@@ -1162,7 +1058,7 @@
s_furthest_k = _mm_minus_infty;
else
s_furthest_k = _mm_infty;
-#if 0
+#if 1
if (DK == +1) {
int b = (ray.begin() + 3) & (~3);
int e = ray.end() & (~3);
Modified: trunk/Model/Groups/private/CGT.h
==============================================================================
--- trunk/Model/Groups/private/CGT.h (original)
+++ trunk/Model/Groups/private/CGT.h Tue Jan 16 17:58:14 2007
@@ -28,304 +28,6 @@
namespace Manta {
//using namespace Manta;
-
-
-/////////////// WALD TRIANGLE INTERSECTOR HACKED IN TO CGT /////////////
-/*! if defined, we will count - and print once per frame - the number
- of actual ray-triangle intersections performed */
-//#define COUNT_ISECS
-
-#if 0
-#define ALL_HIT_HACK
-#endif
-
-#ifdef VISUALIZE_CELLS
-extern int cellID;
-#endif
-
-#ifdef SIMPLE_RCP
-#define isecRCP(a) reciprocal(a)
-#else
-#define isecRCP(a) accurateReciprocal(a)
-#endif
-
-struct WaldTriangleIntersector
-{
- /*! literally copied over from openrt */
- struct TriangleAccel
- {
- // plane:
- float n_u; //!< == normal.u / normal.k
- float n_v; //!< == normal.v / normal.k
- float n_d; //!< constant of plane equation, i.e. distance of plane to
origin
- unsigned int k;
-
- // line equation for line ac
- float b_nu;
- float b_nv;
- float b_d;
- float n_k; // == geomnormal[k];
-
- // line equation for line ab
- float c_nu;
- float c_nv;
- float c_d;
- Triangle *triangle;
- };
- TriangleAccel *accel;
-
-#ifdef COUNT_ISECS
- static int numRayIsecs;
- static int numFrustumIsecs;
- static void newFrame()
- {
- cout << "num ray-tri isecs : " << numRayIsecs << endl;
- cout << "num frustum-tri isecs : " << numFrustumIsecs << endl;
- numRayIsecs = 0;
- numFrustumIsecs = 0;
- };
-
-#endif
-
-
-
- Group *tri_list;
- WaldTriangleIntersector(Group *tri_list)
- : accel(NULL), tri_list(tri_list)
- {
- cout << "Warning: Using WaldTriangleAccelerator: having to preprocess "
<< tri_list->getSize() << " triangles every frame !!!" << endl;
- }
- inline sse_t getGeomNormal(int triID) const
- {
- sse_t normal = zero4();
- const TriangleAccel *const acc = accel + triID;
- const int k = acc->k; // NOTE: make sure those get condmoves, not
'if's...
- const int ku = (k==2)?0:k+1;
- const int kv = (k==0)?2:k-1;
- ((float *)&normal)[k] = 1;
- ((float *)&normal)[ku] = acc->n_u;
- ((float *)&normal)[kv] = acc->n_v;
- return mul4(normal,set4(acc->n_k));
- }
- void preprocess(const PreprocessContext& context);
-
- /*! intersect with packet, but consider only the range of
- ray_begin..ray_end. note: these values are measured in 'SIMD's, not
- in ints. thus, range 1..2 actually means rays 4..7)
-
- \returns true if at least one ray did have a valid interseciton,
- false otherwise
- */
- template<bool SHADOWS_ONLY, bool COMMON_ORIGIN, bool SQUARE_PACKETS>
- inline int intersectTriangle(const int triNum, RayPacket &ray,
- const int pack_begin = 0,
- int pack_end = -1)
- const
- {
- if (pack_end < 0)
- pack_end = ray.end();//curr_packet_simds;
-
-#ifdef COUNT_ISECS
- numFrustumIsecs += 1;
-#endif
-
- const TriangleAccel *const acc = &accel[triNum];
- //const Triangle *triangle = (Triangle*) tri_list->get(triNum);
-
- const int k = acc->k; // NOTE: make sure those get condmoves, not
'if's...
- const int ku = (k==2)?0:k+1;
- const int kv = (k==0)?2:k-1;
-
- RayPacketData *const data = ray.data;
-
- const float * const dir_k = data->direction[k];//ray.dir[k];
- const float * const dir_ku = data->direction[ku];//ray.dir[ku];
- const float * const dir_kv = data->direction[kv];//ray.dir[kv];
-
- int triID = triNum;
- //const sse_int_t triNumSSE = set4int(triID);
-
- const sse_t acc_n_u = set4(acc->n_u);
- const sse_t acc_n_v = set4(acc->n_v);
- const sse_t acc_n_d = set4(acc->n_d);
-
- const sse_t acc_b_nu = set4(acc->b_nu);
- const sse_t acc_b_nv = set4(acc->b_nv);
- const sse_t acc_b_d = set4(acc->b_d);
-
- const sse_t acc_c_nu = set4(acc->c_nu);
- const sse_t acc_c_nv = set4(acc->c_nv);
- const sse_t acc_c_d = set4(acc->c_d);
-
- sse_t org_k, org_ku, org_kv, f0;
- if (COMMON_ORIGIN)
- {
- org_k = load44(data->origin[k]);//org0[k];
- org_ku = load44(data->origin[ku]);//ray.org0[ku];
- org_kv = load44(data->origin[kv]);//ray.org0[kv];
- f0 = sub4(acc_n_d,
- add4(org_k,
- add4(mul4(acc_n_u,org_ku),
- mul4(acc_n_v,org_kv))));
- }
-
-
- if (true || SQUARE_PACKETS && COMMON_ORIGIN)
- {
- const sse_t nd0 = add4(add4(mul4(acc_n_u,data->corner_dir[ku]),
- mul4(acc_n_v,data->corner_dir[kv])),
- data->corner_dir[k]);
- const sse_t nd = isecRCP(nd0);
- const sse_t f = mul4(f0,nd);
- sse_t mask = cmp4_gt(f,_mm_eps);
- if (none4(mask))
- return false;
-
- const sse_t hu = add4(org_ku,mul4(f,data->corner_dir[ku]));
- const sse_t hv = add4(org_kv,mul4(f,data->corner_dir[kv]));
- const sse_t lambda = add4(acc_b_d,
- add4(mul4(hu,acc_b_nu),
- mul4(hv,acc_b_nv)));
- if (all4(lambda))
- return false;
-
- const sse_t mue = add4(acc_c_d,add4(mul4(hu,acc_c_nu),
- mul4(hv,acc_c_nv)));
- if (all4(mue))
- return false;
-
- mask = cmp4_gt(add4(lambda,mue),_mm_one);
- if (all4(mask))
- return false;
- }
-
- int hadHit = 0;
- for (int i=pack_begin;i<pack_end;i+=4) {
-#ifdef COUNT_ISECS
- numRayIsecs += 4;
-#endif
-
- const sse_t nd0 = add4(add4(mul4(acc_n_u,load44(&dir_ku[i])),
- mul4(acc_n_v,load44(&dir_kv[i]))),
- load44(&dir_k[i]));
- const sse_t nd = isecRCP(nd0);
-
-// if (!COMMON_ORIGIN) {
-// org_k = &ray.org[k][i];
-// org_ku = &ray.org[ku][i];
-// org_kv = &ray.org[kv][i];
-
-// f0 = sub4(acc_n_d,
-// add4(org_k,
-// add4(mul4(acc_n_u,org_ku),
-// mul4(acc_n_v,org_kv))));
-
-// }
- const sse_t f = mul4(f0,nd);
- sse_t mask = and4(cmp4_gt(load44(&data->minT[i]),f),
- cmp4_gt(f,_mm_eps));
-
- const sse_t hu = add4(org_ku,mul4(f,load44(&dir_ku[i])));
- const sse_t hv = add4(org_kv,mul4(f,load44(&dir_kv[i])));
- const sse_t lambda = add4(acc_b_d,
- add4(mul4(hu,acc_b_nu),
- mul4(hv,acc_b_nv)));
- mask = and4(mask,cmp4_ge(lambda,zero4()));
- if (none4(mask)) continue;
-
- const sse_t mue = add4(acc_c_d,add4(mul4(hu,acc_c_nu),
- mul4(hv,acc_c_nv)));
- mask = and4(mask,cmp4_ge(mue,zero4()));
- if (none4(mask)) continue;
-
-// union {
- sse_t finalMask;
-// int finalMask_int[4];
-// //sse_int_t finalMask_int;
-// };
-
- finalMask = and4(mask,
- cmp4_le(add4(lambda,mue),_mm_one));
- if (none4(finalMask)) continue;
- if (all4(finalMask)) {
-// for (int k=0; k < 4; ++k) {
-// ray.hit(i+k, ((float4&)f)[k], acc->triangle->getMaterial(),
-// acc->triangle, acc->triangle->getTexCoordMapper());
- store44(&data->minT[i], f);
-
-//#define DYNRT_CMP
-
-#ifdef __x86_64
-#ifndef DYNRT_CMP
- store44i((sse_int_t*)&data->hitMatl[i], set4l((long
long)acc->triangle->getMaterial()));
- store44i((sse_int_t*)&data->hitMatl[i+2], set4l((long
long)acc->triangle->getMaterial()));
- store44i((sse_int_t*)&data->hitPrim[i], set4l((long
long)acc->triangle));
- store44i((sse_int_t*)&data->hitPrim[i+2], set4l((long
long)acc->triangle));
- store44i((sse_int_t*)&data->hitTex[i], set4l((long
long)acc->triangle->getTexCoordMapper()));
- store44i((sse_int_t*)&data->hitTex[i+2], set4l((long
long)acc->triangle->getTexCoordMapper()));
-#endif //DYNRT_CMP
-#else
-#ifndef DYNRT_CMP
- store44i((sse_int_t*)&data->hitPrim[i], set4i((int)acc->triangle));
- store44i((sse_int_t*)&data->hitMatl[i],
set4i((int)acc->triangle->getMaterial()));
- store44i((sse_int_t*)&data->hitTex[i],
set4i((int)acc->triangle->getTexCoordMapper()));
-#endif //DYNRT_CMP
-#endif
-
- if (!SHADOWS_ONLY) {
-// Triangle::TriangleHit& th =
ray.scratchpad<Triangle::TriangleHit>(i+k);
-// th.a = ((float4&)lambda)[k];
-// th.b = ((float4&)mue)[k];
- }
-// }
- hadHit = 1;
- continue;
- }
-
- hadHit = 1;
-// for (int k=0; k < 4; ++k) {
-// if ( finalMask_int[k] ) {
- store44(&data->minT[i], mask4(finalMask, f, load44(&data->minT[i])));
-
-// maskmove4i(cast_f2i(f), cast_f2i(finalMask),
(char*)&data->minT[i]);
-#ifdef __x86_64
- sse_int_t lohit = cast_f2i(unpacklo(finalMask, finalMask));
- sse_int_t hihit = cast_f2i(unpackhi(finalMask, finalMask));
-#ifndef DYNRT_CMP
- maskmove4i(set4l((long long)acc->triangle->getMaterial()),
lohit, (char*)&data->hitMatl[i]);
- maskmove4i(set4l((long long)acc->triangle->getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
-
- maskmove4i(set4l((long long)acc->triangle), lohit,
(char*)&data->hitPrim[i]);
- maskmove4i(set4l((long long)acc->triangle), hihit,
(char*)&data->hitPrim[i+2]);
- maskmove4i(set4l((long long)acc->triangle->getTexCoordMapper()),
lohit, (char*)&data->hitTex[i]);
- maskmove4i(set4l((long long)acc->triangle->getTexCoordMapper()),
hihit, (char*)&data->hitTex[i+2]);
-#endif //DYNRT_CMP
-#else
- maskmove4i(set4i((int)acc->triangle), cast_f2i(finalMask),
(char*)&data->hitPrim[i]);
-#ifndef DYNRT_CMP
- maskmove4i(set4i((int)acc->triangle->getMaterial()),
cast_f2i(finalMask), (char*)&data->hitMatl[i]);
- maskmove4i(set4i((int)acc->triangle->getTexCoordMapper()),
cast_f2i(finalMask), (char*)&data->hitTex[i]);
-#endif //DYNRT_CMP
-#endif
-
-// ray.hit(i+k, ((float4&)f)[k], acc->triangle->getMaterial(),
-// acc->triangle, acc->triangle->getTexCoordMapper());
- if (!SHADOWS_ONLY) {
-// Triangle::TriangleHit& th =
ray.scratchpad<Triangle::TriangleHit>(i+k);
-// th.a = ((float4&)lambda)[k];
-// th.b = ((float4&)mue)[k];
- }
-// }
-// }
- }
- return hadHit;
- }
-};
-
-
-/////////////END wald triangle intersector hack///////////////
-
-
struct MANTA_ALIGN(16) Box4
{
sse_t min, max;
@@ -336,7 +38,23 @@
return o;
}
+ //quick hack to get Wald Triangles inside a vector
+ class WaldTriangle_Copiable : public WaldTriangle
+ {
+ public:
+ WaldTriangle_Copiable() { };
+ //XXXX: This memcpy stuff is ugly and dangerous...
+ WaldTriangle_Copiable( const WaldTriangle_Copiable & wt) {
+ memcpy(this, &wt, sizeof(WaldTriangle_Copiable));
+ }
+ WaldTriangle_Copiable &operator = ( const WaldTriangle_Copiable & wt) {
+ if (this != &wt) {
+ memcpy(this, &wt, sizeof(WaldTriangle_Copiable));
+ }
+ return *this;
+ }
+ };
struct Grid : public Group
{
@@ -376,9 +94,7 @@
#endif //NUM_BUILD_THREADS
vector <CellData> cellVector;
-
- WaldTriangleIntersector primitives; //hardcoded to do triangles for now...
- //vector<WaldTriangle> primitives;
+ vector<WaldTriangle_Copiable> primitives;
static void newFrame();
#ifdef MACRO_CELLS
@@ -443,8 +159,7 @@
return cell;
}
- Grid(
- ) : primitives(this)
+ Grid()
{
#ifdef NUM_BUILD_THREADS
for (int i=0; i < NUM_BUILD_THREADS; ++i)
Modified: trunk/Model/Primitives/WaldTriangle.cc
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.cc (original)
+++ trunk/Model/Primitives/WaldTriangle.cc Tue Jan 16 17:58:14 2007
@@ -11,6 +11,11 @@
WaldTriangle::WaldTriangle(Material* mat,
const Vector& _p1, const Vector& _p2, const
Vector& _p3) : PrimitiveCommon(mat)
{
+ setPoints(_p1, _p2, _p3);
+}
+
+void WaldTriangle::setPoints(const Vector& _p1, const Vector& _p2, const
Vector& _p3)
+{
box.reset();
box.extendByPoint(_p1);
box.extendByPoint(_p2);
@@ -64,124 +69,218 @@
}
#ifdef MANTA_SSE
+
+//These should be defined in SSEDefs.h, but aren't...
+#define none4(mask) (getmask4( (mask) ) == 0x0)
+#define all4(mask) (getmask4( (mask) ) == 0xf)
+#define any4(mask) (getmask4( (mask) ) != 0x0)
+
+
void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays)
const {
const int axis = k;
const int ku = (k==2)?0:k+1;
const int kv = (k==0)?2:k-1;
- // what qualifiers go here?
RayPacketData* data = rays.data;
- const bool RaysConstantOrigin = rays.getFlag(RayPacket::ConstantOrigin);
+ const bool HasCommonOrigin = rays.getFlag(RayPacket::ConstantOrigin);
+ const bool HasCornerRays = rays.getFlag(RayPacket::HaveCornerRays);
const int ray_begin = rays.begin();
const int ray_end = rays.end();
const int sse_begin = (ray_begin + 3) & (~3);
const int sse_end = (ray_end) & (~3);
- for (int ray = ray_begin; ray < sse_begin; ++ray) {
- const float nd0 = ( n_u * data->direction[ku][ray] +
- n_v * data->direction[kv][ray] +
- data->direction[k][ray] );
- const float nd = 1.f/nd0;
+ const sse_t sse_n_u = set4(n_u);
+ const sse_t sse_n_v = set4(n_v);
+ const sse_t sse_n_d = set4(n_d);
+
+ const sse_t sse_b_nu = set4(b_nu);
+ const sse_t sse_b_nv = set4(b_nv);
+ const sse_t sse_b_d = set4(b_d);
+
+ const sse_t sse_c_nu = set4(c_nu);
+ const sse_t sse_c_nv = set4(c_nv);
+ const sse_t sse_c_d = set4(c_d);
+
+ sse_t org_k, org_ku, org_kv, f0;
+ if (HasCommonOrigin) {
+ org_k = load44(&data->origin[axis][ray_begin]);
+ org_ku = load44(&data->origin[ku][ray_begin]);
+ org_kv = load44(&data->origin[kv][ray_begin]);
+ f0 = sub4(sse_n_d,
+ add4(org_k,
+ add4(mul4(sse_n_u, org_ku),
+ mul4(sse_n_v, org_kv))));
+ }
+ if (HasCommonOrigin && HasCornerRays) {
+ const sse_t nd0 = add4(add4(mul4(sse_n_u,data->corner_dir[ku]),
+ mul4(sse_n_v,data->corner_dir[kv])),
+ data->corner_dir[k]);
+ const sse_t nd = oneOver(nd0);
+ const sse_t f = mul4(f0,nd);
+ sse_t mask = cmp4_gt(f,_mm_eps);
+ if (none4(mask))
+ return;
+
+ const sse_t hu = add4(org_ku,mul4(f,data->corner_dir[ku]));
+ const sse_t hv = add4(org_kv,mul4(f,data->corner_dir[kv]));
+ const sse_t lambda = add4(sse_b_d,
+ add4(mul4(hu,sse_b_nu),
+ mul4(hv,sse_b_nv)));
+ if (all4(lambda))
+ return;
+
+ const sse_t mue = add4(sse_c_d,add4(mul4(hu,sse_c_nu),
+ mul4(hv,sse_c_nv)));
+ if (all4(mue))
+ return;
+
+ mask = cmp4_gt(add4(lambda,mue),_mm_one);
+ if (all4(mask))
+ return;
+ }
- float org_k = data->origin[axis][ray];
- float org_ku = data->origin[ku][ray];
- float org_kv = data->origin[kv][ray];
+ if (ray_begin < sse_begin) {
+ const int ray_begin_aligned = ray_begin & (~3);
+ const sse_t nd0 = add4(add4(mul4(sse_n_u,
load44(&data->direction[ku][ray_begin_aligned])),
+ mul4(sse_n_v,
load44(&data->direction[kv][ray_begin_aligned]))),
+ load44(&data->direction[k][ray_begin_aligned]));
+ const sse_t nd = oneOver(nd0);
+
+ if (!HasCommonOrigin) {
+ org_k = load44(&data->origin[axis][ray_begin_aligned]);
+ org_ku = load44(&data->origin[ku][ray_begin_aligned]);
+ org_kv = load44(&data->origin[kv][ray_begin_aligned]);
+ f0 = sub4(set4(n_d),
+ add4(org_k, add4(mul4(sse_n_u,
+ org_ku),
+ mul4(sse_n_v,
+ org_kv))));
+ }
- float f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+ const sse_t f = mul4(f0, nd); // maybe these would be faster as
swizzle after load44
+ // plane test
- const float f = f0 * nd;
- // plane test
- if ( f < T_EPSILON || f > data->minT[ray] )
- continue;
+ const unsigned int active_ray_mask = ~((1<<(4-(sse_begin -
ray_begin))) - 1);
- const float hu = org_ku + f*data->direction[ku][ray];
- const float hv = org_kv + f*data->direction[kv][ray];
- const float lambda = b_d + hu*b_nu + hv * b_nv;
+ sse_t mask_test = and4( _mm_cmpnle_ps(f, set4(T_EPSILON)),
+
_mm_cmpnle_ps(load44(&data->minT[ray_begin_aligned]), f));
+ if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
+
+ const sse_t hu = add4( mul4(f,
load44(&data->direction[ku][ray_begin_aligned])), org_ku);
+ const sse_t hv = add4( mul4(f,
load44(&data->direction[kv][ray_begin_aligned])), org_kv);
+ const sse_t lambda = add4( sse_b_d,
+ add4( mul4(hu, sse_b_nu),
+ mul4(hv, sse_b_nv)));
+ mask_test = and4(mask_test, _mm_cmpnlt_ps(lambda, zero4()));
+ if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
+
+ const sse_t mue = add4( sse_c_d,
+ add4( mul4(hu, sse_c_nu),
+ mul4(hv, sse_c_nv)));
+
+ mask_test = and4(mask_test, and4( _mm_cmpnlt_ps(mue, zero4()),
+ _mm_cmpnlt_ps(set4(1.f), add4(mue,
lambda))));
+
+ const int hit_mask = getmask4(mask_test);
+ for (int ray = ray_begin; ray < sse_begin; ++ray) {
+ if ( hit_mask & (1<<(4 - (ray-ray_begin) )) ) {
+ rays.hit(ray, ((float*)&f)[ray-ray_begin], getMaterial(), this,
getTexCoordMapper());
+ }
+ }
+ }
- // barycentric test
- if ( lambda < 0.f )
- continue;
- const float mue = c_d + hu * c_nu + hv * c_nv;
- if ( mue < 0.f || mue + lambda > 1.f )
- continue;
+ for (int ray = sse_begin; ray < sse_end; ray += 4) {
+ const sse_t nd0 = add4(add4(mul4(sse_n_u,
load44(&data->direction[ku][ray])),
+ mul4(sse_n_v,
load44(&data->direction[kv][ray]))),
+ load44(&data->direction[k][ray]));
+ const sse_t nd = oneOver(nd0);
+
+ if (!HasCommonOrigin) {
+ org_k = load44(&data->origin[axis][ray]);
+ org_ku = load44(&data->origin[ku][ray]);
+ org_kv = load44(&data->origin[kv][ray]);
+ f0 = sub4(set4(n_d),
+ add4(org_k, add4(mul4(sse_n_u,
+ org_ku),
+ mul4(sse_n_v,
+ org_kv))));
+ }
+
+ const sse_t f = mul4(f0, nd); // maybe these would be faster as
swizzle after load44
+ // plane test
+ sse_t mask_test = and4( _mm_cmpnle_ps(f, set4(T_EPSILON)),
+ _mm_cmpnle_ps(load44(&data->minT[ray]), f));
+ if (getmask4(mask_test) == 0x0) continue;
+
+ const sse_t hu = add4( mul4(f, load44(&data->direction[ku][ray])),
org_ku);
+ const sse_t hv = add4( mul4(f, load44(&data->direction[kv][ray])),
org_kv);
+ const sse_t lambda = add4( sse_b_d,
+ add4( mul4(hu, sse_b_nu),
+ mul4(hv, sse_b_nv)));
+ mask_test = and4(mask_test, _mm_cmpnlt_ps(lambda, zero4()));
+ if (getmask4(mask_test) == 0x0) continue;
+
+ const sse_t mue = add4( sse_c_d,
+ add4( mul4(hu, sse_c_nu),
+ mul4(hv, sse_c_nv)));
- rays.hit(ray, f, getMaterial(), this, getTexCoordMapper());
+ mask_test = and4(mask_test, and4( _mm_cmpnlt_ps(mue, zero4()),
+ _mm_cmpnlt_ps(set4(1.f), add4(mue,
lambda))));
+
+ rays.hitWithoutTminCheck(ray, mask_test, f, getMaterial(), this,
getTexCoordMapper());
}
- for (int ray = sse_begin; ray < sse_end; ray += 4) {
- const __m128 nd0 = ( _mm_add_ps( _mm_mul_ps( _mm_set1_ps(n_u),
-
_mm_load_ps(&data->direction[ku][ray])),
- _mm_add_ps(
_mm_mul_ps(_mm_set1_ps(n_v),
-
_mm_load_ps(&data->direction[kv][ray])),
-
_mm_load_ps(&data->direction[k][ray]))));
- const __m128 nd = oneOver(nd0);
-
- const __m128 org_k = _mm_load_ps(&data->origin[axis][ray]);
- const __m128 org_ku = _mm_load_ps(&data->origin[ku][ray]);
- const __m128 org_kv = _mm_load_ps(&data->origin[kv][ray]);
- const __m128 f0 = _mm_sub_ps(_mm_set1_ps(n_d),
- _mm_add_ps(org_k,
_mm_add_ps(_mm_mul_ps(_mm_set1_ps(n_u),
-
org_ku),
-
_mm_mul_ps(_mm_set1_ps(n_v),
-
org_kv))));
-
- const __m128 f = _mm_mul_ps(f0, nd); // maybe these would be faster
as swizzle after _mm_load_ps
- // plane test
- __m128 mask_test = _mm_and_ps( _mm_cmpnle_ps(f,
_mm_set1_ps(T_EPSILON)),
-
_mm_cmpnle_ps(_mm_load_ps(&data->minT[ray]), f));
- if (_mm_movemask_ps(mask_test) == 0x0) continue;
-
- const __m128 hu = _mm_add_ps( _mm_mul_ps(f,
_mm_load_ps(&data->direction[ku][ray])), org_ku);
- const __m128 hv = _mm_add_ps( _mm_mul_ps(f,
_mm_load_ps(&data->direction[kv][ray])), org_kv);
- const __m128 lambda = _mm_add_ps( _mm_set1_ps(b_d),
- _mm_add_ps( _mm_mul_ps(hu,
_mm_set1_ps(b_nu)),
- _mm_mul_ps(hv,
_mm_set1_ps(b_nv))));
- mask_test = _mm_and_ps(mask_test, _mm_cmpnlt_ps(lambda,
_mm_setzero_ps()));
- if (_mm_movemask_ps(mask_test) == 0x0) continue;
-
- const __m128 mue = _mm_add_ps( _mm_set1_ps(c_d),
- _mm_add_ps( _mm_mul_ps(hu,
_mm_set1_ps(c_nu)),
- _mm_mul_ps(hv,
_mm_set1_ps(c_nv))));
+ if (sse_end < ray_end) {
+ const sse_t nd0 = add4(add4(mul4(sse_n_u,
load44(&data->direction[ku][sse_end])),
+ mul4(sse_n_v,
load44(&data->direction[kv][sse_end]))),
+ load44(&data->direction[k][sse_end]));
+ const sse_t nd = oneOver(nd0);
+
+ if (!HasCommonOrigin) {
+ org_k = load44(&data->origin[axis][sse_end]);
+ org_ku = load44(&data->origin[ku][sse_end]);
+ org_kv = load44(&data->origin[kv][sse_end]);
+ f0 = sub4(set4(n_d),
+ add4(org_k, add4(mul4(sse_n_u,
+ org_ku),
+ mul4(sse_n_v,
+ org_kv))));
+ }
- mask_test = _mm_and_ps(mask_test, _mm_and_ps( _mm_cmpnlt_ps(mue,
_mm_setzero_ps()),
-
_mm_cmpnlt_ps(_mm_set1_ps(1.f), _mm_add_ps(mue, lambda))));
+ const sse_t f = mul4(f0, nd); // maybe these would be faster as
swizzle after load44
+ // plane test
- rays.hitWithoutTminCheck(ray, mask_test, f, getMaterial(), this,
getTexCoordMapper());
- }
+ const unsigned int active_ray_mask = (1<<(ray_end - sse_end)) - 1;
+
+ sse_t mask_test = and4( _mm_cmpnle_ps(f, set4(T_EPSILON)),
+ _mm_cmpnle_ps(load44(&data->minT[sse_end]),
f));
- for (int ray = sse_end; ray < ray_end; ++ray) {
- const float nd0 = ( n_u * data->direction[ku][ray] +
- n_v * data->direction[kv][ray] +
- data->direction[k][ray] );
- const float nd = 1.f/nd0;
-
- float org_k = data->origin[axis][ray];
- float org_ku = data->origin[ku][ray];
- float org_kv = data->origin[kv][ray];
-
- float f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
-
- const float f = f0 * nd;
- // plane test
- if ( f < T_EPSILON || f > data->minT[ray] )
- continue;
-
- const float hu = org_ku + f*data->direction[ku][ray];
- const float hv = org_kv + f*data->direction[kv][ray];
- const float lambda = b_d + hu*b_nu + hv * b_nv;
-
- // barycentric test
- if ( lambda < 0.f )
- continue;
-
- const float mue = c_d + hu * c_nu + hv * c_nv;
- if ( mue < 0.f || mue + lambda > 1.f )
- continue;
+ if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
- rays.hit(ray, f, getMaterial(), this, getTexCoordMapper());
+ const sse_t hu = add4( mul4(f, load44(&data->direction[ku][sse_end])),
org_ku);
+ const sse_t hv = add4( mul4(f, load44(&data->direction[kv][sse_end])),
org_kv);
+ const sse_t lambda = add4( sse_b_d,
+ add4( mul4(hu, sse_b_nu),
+ mul4(hv, sse_b_nv)));
+ mask_test = and4(mask_test, _mm_cmpnlt_ps(lambda, zero4()));
+ if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
+
+ const sse_t mue = add4( sse_c_d,
+ add4( mul4(hu, sse_c_nu),
+ mul4(hv, sse_c_nv)));
+
+ mask_test = and4(mask_test, and4( _mm_cmpnlt_ps(mue, zero4()),
+ _mm_cmpnlt_ps(set4(1.f), add4(mue,
lambda))));
+
+ const int hit_mask = getmask4(mask_test);
+ for (int ray = sse_end; ray < ray_end; ++ray) {
+ if ( hit_mask & (1<<(ray-sse_end)) ) {
+ rays.hit(ray, ((float*)&f)[ray-sse_end], getMaterial(), this,
getTexCoordMapper());
+ }
+ }
}
}
#else
@@ -245,3 +344,4 @@
}
}
#endif // MANTA_SSE
+
Modified: trunk/Model/Primitives/WaldTriangle.h
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.h (original)
+++ trunk/Model/Primitives/WaldTriangle.h Tue Jan 16 17:58:14 2007
@@ -17,10 +17,12 @@
WaldTriangle(Material* mat,
const Vector& _p1, const Vector& _p2, const Vector& _p3);
+ void setPoints(const Vector& _p1, const Vector& _p2, const Vector& _p3);
+
void computeBounds(const PreprocessContext& context,
BBox& bbox) const
{
- bbox = box;
+ bbox = box;
}
void intersect(const RenderContext& context, RayPacket& rays) const;
- [MANTA] r1267 - in trunk: Interface Model/Groups/private Model/Primitives, thiago, 01/16/2007
Archive powered by MHonArc 2.6.16.