Text archives Help
- From: thiago@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1530 - trunk/Model/Primitives
- Date: Sat, 21 Jul 2007 22:28:40 -0600 (MDT)
Author: thiago
Date: Sat Jul 21 22:28:39 2007
New Revision: 1530
Modified:
trunk/Model/Primitives/WaldTriangle.cc
Log:
There seems to be a bug in the wald triangle sse intersection code
that handles leading and trailing rays. For now here's a hack that
guarentees to fix that. This is rather slow though (completely
non-sse) and does some redundant computations, so it needs to be
optimized.
Modified: trunk/Model/Primitives/WaldTriangle.cc
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.cc (original)
+++ trunk/Model/Primitives/WaldTriangle.cc Sat Jul 21 22:28:39 2007
@@ -248,65 +248,66 @@
}
if (ray_begin < sse_begin) {
- const int ray_begin_aligned = ray_begin & (~3);
- const sse_t nd0 = add4(add4(mul4(sse_n_u,
load44(&data->direction[ku][ray_begin_aligned])),
- mul4(sse_n_v,
load44(&data->direction[kv][ray_begin_aligned]))),
- load44(&data->direction[k][ray_begin_aligned]));
- const sse_t nd = oneOver(nd0);
-
- if (!HasCommonOrigin) {
- org_k = load44(&data->origin[axis][ray_begin_aligned]);
- org_ku = load44(&data->origin[ku][ray_begin_aligned]);
- org_kv = load44(&data->origin[kv][ray_begin_aligned]);
- f0 = sub4(set4(n_d),
- add4(org_k, add4(mul4(sse_n_u,
- org_ku),
- mul4(sse_n_v,
- org_kv))));
+ //TODO: the trailing code is just copied over from the non-sse
+ //intersection method. There is some redundant work and plenty
+ //that could still be optimized with sse.
+ const Real* const dir_k = data->direction[axis];
+ const Real* const dir_ku = data->direction[ku];
+ const Real* const dir_kv = data->direction[kv];
+
+ float org_k, org_ku, org_kv, f0;
+
+ const bool RaysConstantOrigin = rays.getAllFlags() &
RayPacket::ConstantOrigin;
+
+ if (RaysConstantOrigin)
+ {
+ org_k = data->origin[axis][rays.begin()];
+ org_ku = data->origin[ku][rays.begin()];
+ org_kv = data->origin[kv][rays.begin()];
+ f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
}
- const sse_t f = mul4(f0, nd); // maybe these would be faster as
swizzle after load44
- // plane test
-
- const unsigned int active_ray_mask = ~((1<<(4-(sse_begin -
ray_begin))) - 1);
-
- sse_t mask_test = and4( _mm_cmpnle_ps(f, set4(T_EPSILON)),
-
_mm_cmpnle_ps(load44(&data->minT[ray_begin_aligned]), f));
- if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
-
- const sse_t hu = add4( mul4(f,
load44(&data->direction[ku][ray_begin_aligned])), org_ku);
- const sse_t hv = add4( mul4(f,
load44(&data->direction[kv][ray_begin_aligned])), org_kv);
- const sse_t lambda = add4( sse_b_d,
- add4( mul4(hu, sse_b_nu),
- mul4(hv, sse_b_nv)));
- mask_test = and4(mask_test, _mm_cmpnlt_ps(lambda, zero4()));
- if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
+ for (int i = ray_begin; i < sse_end; i++ )
+ {
+ const float nd0 = n_u * dir_ku[i] + n_v * dir_kv[i] + dir_k[i];
+ const float nd = 1.f/nd0;
+
+ if (!RaysConstantOrigin)
+ {
+ org_k = data->origin[axis][i];
+ org_ku = data->origin[ku][i];
+ org_kv = data->origin[kv][i];
- const sse_t mue = add4( sse_c_d,
- add4( mul4(hu, sse_c_nu),
- mul4(hv, sse_c_nv)));
-
- mask_test = and4(mask_test, and4( _mm_cmpnlt_ps(mue, zero4()),
- _mm_cmpnlt_ps(set4(1.f), add4(mue,
lambda))));
+ f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+ }
- const int hit_mask = getmask4(mask_test);
- for (int ray = ray_begin; ray < sse_begin; ++ray) {
- if ( hit_mask & (1<<(4 - (ray-ray_begin) )) ) {
- const bool hit = rays.hit(ray, ((float*)&f)[ray-sse_end],
mesh->materials[mesh->face_material[myID]], this, this);
- if (hit) {
-
- float *u = &rays.getScratchpad<float>(0)[ray];
- float *v = &rays.getScratchpad<float>(1)[ray];
-// int *which = rays.getScratchpad<int*>(2)[ray];
- *u = ((float*)&lambda)[ray-sse_end];
- *v = ((float*)&mue)[ray-sse_end];
-// *which = myID;
- }
+ const float f = f0 * nd;
+ // plane test
+ if ( f < T_EPSILON || f > data->minT[i] )
+ continue;
+
+ const float hu = org_ku + f*dir_ku[i];
+ const float hv = org_kv + f*dir_kv[i];
+ const float lambda = b_d + hu*b_nu + hv * b_nv;
+
+ // barycentric test
+ if ( lambda < 0.f )
+ continue;
+
+ const float mue = c_d + hu * c_nu + hv * c_nv;
+ if ( mue < 0.f || mue + lambda > 1.f )
+ continue;
+
+ const bool hit = rays.hit(i, f,
mesh->materials[mesh->face_material[myID]], this, this);
+ if (hit) {
+ float *u = &rays.getScratchpad<float>(0)[i];
+ float *v = &rays.getScratchpad<float>(1)[i];
+ *u = lambda;
+ *v = mue;
}
}
}
-
for (int ray = sse_begin; ray < sse_end; ray += 4) {
const sse_t nd0 = add4(add4(mul4(sse_n_u,
load44(&data->direction[ku][ray])),
mul4(sse_n_v,
load44(&data->direction[kv][ray]))),
@@ -357,59 +358,59 @@
}
if (sse_end < ray_end) {
- const sse_t nd0 = add4(add4(mul4(sse_n_u,
load44(&data->direction[ku][sse_end])),
- mul4(sse_n_v,
load44(&data->direction[kv][sse_end]))),
- load44(&data->direction[k][sse_end]));
- const sse_t nd = oneOver(nd0);
-
- if (!HasCommonOrigin) {
- org_k = load44(&data->origin[axis][sse_end]);
- org_ku = load44(&data->origin[ku][sse_end]);
- org_kv = load44(&data->origin[kv][sse_end]);
- f0 = sub4(set4(n_d),
- add4(org_k, add4(mul4(sse_n_u,
- org_ku),
- mul4(sse_n_v,
- org_kv))));
+ const Real* const dir_k = data->direction[axis];
+ const Real* const dir_ku = data->direction[ku];
+ const Real* const dir_kv = data->direction[kv];
+
+ float org_k, org_ku, org_kv, f0;
+
+ const bool RaysConstantOrigin = rays.getAllFlags() &
RayPacket::ConstantOrigin;
+
+ if (RaysConstantOrigin)
+ {
+ org_k = data->origin[axis][rays.begin()];
+ org_ku = data->origin[ku][rays.begin()];
+ org_kv = data->origin[kv][rays.begin()];
+ f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
}
- const sse_t f = mul4(f0, nd); // maybe these would be faster as
swizzle after load44
- // plane test
-
- const unsigned int active_ray_mask = (1<<(ray_end - sse_end)) - 1;
-
- sse_t mask_test = and4( _mm_cmpnle_ps(f, set4(T_EPSILON)),
- _mm_cmpnle_ps(load44(&data->minT[sse_end]),
f));
-
- if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
+ for (int i = sse_end; i < ray_end; i++ )
+ {
+ const float nd0 = n_u * dir_ku[i] + n_v * dir_kv[i] + dir_k[i];
+ const float nd = 1.f/nd0;
+
+ if (!RaysConstantOrigin)
+ {
+ org_k = data->origin[axis][i];
+ org_ku = data->origin[ku][i];
+ org_kv = data->origin[kv][i];
- const sse_t hu = add4( mul4(f, load44(&data->direction[ku][sse_end])),
org_ku);
- const sse_t hv = add4( mul4(f, load44(&data->direction[kv][sse_end])),
org_kv);
- const sse_t lambda = add4( sse_b_d,
- add4( mul4(hu, sse_b_nu),
- mul4(hv, sse_b_nv)));
- mask_test = and4(mask_test, _mm_cmpnlt_ps(lambda, zero4()));
- if ( (getmask4(mask_test) & active_ray_mask) == 0x0) return;
-
- const sse_t mue = add4( sse_c_d,
- add4( mul4(hu, sse_c_nu),
- mul4(hv, sse_c_nv)));
-
- mask_test = and4(mask_test, and4( _mm_cmpnlt_ps(mue, zero4()),
- _mm_cmpnlt_ps(set4(1.f), add4(mue,
lambda))));
+ f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+ }
- const int hit_mask = getmask4(mask_test);
- for (int ray = sse_end; ray < ray_end; ++ray) {
- if ( hit_mask & (1<<(ray-sse_end)) ) {
- const bool hit = rays.hit(ray, ((float*)&f)[ray-sse_end],
mesh->materials[mesh->face_material[myID]], this, this);
- if (hit) {
- float *u = &rays.getScratchpad<float>(0)[ray];
- float *v = &rays.getScratchpad<float>(1)[ray];
-// int *which = rays.getScratchpad<int*>(2)[ray];
- *u = ((float*)&lambda)[ray-sse_end];
- *v = ((float*)&mue)[ray-sse_end];
-// *which = myID;
- }
+ const float f = f0 * nd;
+ // plane test
+ if ( f < T_EPSILON || f > data->minT[i] )
+ continue;
+
+ const float hu = org_ku + f*dir_ku[i];
+ const float hv = org_kv + f*dir_kv[i];
+ const float lambda = b_d + hu*b_nu + hv * b_nv;
+
+ // barycentric test
+ if ( lambda < 0.f )
+ continue;
+
+ const float mue = c_d + hu * c_nu + hv * c_nv;
+ if ( mue < 0.f || mue + lambda > 1.f )
+ continue;
+
+ const bool hit = rays.hit(i, f,
mesh->materials[mesh->face_material[myID]], this, this);
+ if (hit) {
+ float *u = &rays.getScratchpad<float>(0)[i];
+ float *v = &rays.getScratchpad<float>(1)[i];
+ *u = lambda;
+ *v = mue;
}
}
}
- [MANTA] r1530 - trunk/Model/Primitives, thiago, 07/22/2007
Archive powered by MHonArc 2.6.16.