Text archives Help
- From: knolla@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1109 - in trunk/Model: Intersections Primitives
- Date: Fri, 9 Jun 2006 07:55:40 -0600 (MDT)
Author: knolla
Date: Fri Jun 9 07:55:37 2006
New Revision: 1109
Modified:
trunk/Model/Intersections/IsosurfaceImplicit.cc
trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
Implemented leaf node traversal for SSE octree isosurface volumes.
Modified: trunk/Model/Intersections/IsosurfaceImplicit.cc
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.cc (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.cc Fri Jun 9 07:55:37
2006
@@ -164,9 +164,7 @@
int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
if (int_thisvoxelmask == 0) //if none of them hit, don't bother
iterating any more
- {
continue;
- }
#define NEUBAUER_ITERATIONS 3
#pragma unroll(NEUBAUER_ITERATIONS)
@@ -193,8 +191,8 @@
sse_t hit_t = add4(tenter[smd], mul4(t, sub4(texit[smd],
tenter[smd])));
//the mask should only include rays that are active
+ sse_thisvoxelmask = and4(sse_thisvoxelmask, cmp4_ge(hit_t, zero4()));
sse_thisvoxelmask = and4(sse_thisvoxelmask, srp.activeMask[smd]);
-
sse_thisvoxelmask = and4(sse_thisvoxelmask,
cmp4_lt(hit_t,srp.minT[smd]));
srp.minT[smd] = mask4(sse_thisvoxelmask, hit_t, srp.minT[smd]);
int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
@@ -207,11 +205,12 @@
srp.normal[axis][smd] = mask4(sse_thisvoxelmask,
normal[axis], srp.normal[axis][smd]);
#pragma unroll(4);
+ int sse_ray = smd << 2;
for(int ray=0; ray<4; ray++)
{
if (int_thisvoxelmask & (1<<ray))
{
- int realray=(smd<<2)+ray;
+ int realray=sse_ray+ray;
srp.rp->data->hitMatl[realray] = matl;
srp.rp->data->hitPrim[realray] = prim;
}
@@ -220,7 +219,7 @@
//int nonzeros = count_nonzeros(sse_thisvoxelmask);
//cerr << "nonzeros=" << nonzeros << endl;
- srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
+ //srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
//active rays in this smd are ones that were active before, and
did NOT intersect.
srp.activeMask[smd] = andnot4(sse_thisvoxelmask,
srp.activeMask[smd]);
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc Fri Jun 9 07:55:37
2006
@@ -770,44 +770,55 @@
}
srp.rp = &rays;
srp.minT = (sse_t*)(data->minT);
+ srp.activeRays = 0;
const int sse_begin = rays.begin() >> 2; // equivalent to
Floor(rays.begin()/4)
const int sse_end = ((rays.end()-1+3) >> 2); // Ceil(rays.end()-1/4)
- srp.activeRays = (sse_end - sse_begin) << 2;
+
char first = RayPacket::SSE_MaxSize;
char last = -1;
- #pragma unroll(RayPacket::SSE_MaxSize)
+ sse_t octdims[3];
+ for(int axis=0; axis<3; axis++)
+ octdims[axis] = set4(octdata->dims[axis]);
+
for(int smd=sse_begin; smd<sse_end; smd++)
- {
- sse_t dgt0[3];
- sse_t tnear[3];
- sse_t tfar[3];
- sse_t tnear_unpadded[3];
- sse_t tfar_unpadded[3];
-
- #pragma unroll(3)
- for(int axis=0; axis<3; axis++)
- {
- dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());
- sse_t t0 = mul4(sub4(zero4(), srp.orig[axis][smd]),
srp.inv_dir[axis][smd]);
- sse_t t1 = mul4(sub4(set4(octdata->dims[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
- sse_t t1p = mul4(sub4(set4(octdata->padded_dims[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
-
- tnear_unpadded[axis] = mask4(dgt0[axis], t0, t1);
- tfar_unpadded[axis] = mask4(dgt0[axis], t1, t0);
- tnear[axis] = mask4(dgt0[axis], t0, t1p);
- tfar[axis] = mask4(dgt0[axis], t1p, t0);
- }
+ {
+ sse_t t0 = zero4();
+ sse_t t1 = srp.minT[smd];
- sse_t tenter_unpadded = max4(max4(tnear_unpadded[0],
tnear_unpadded[1]), tnear_unpadded[2]);
- sse_t texit_unpadded = min4(min4(tfar_unpadded[0],
tfar_unpadded[1]), tfar_unpadded[2]);
+ sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+ const sse_t b0_x = mask4(signs, zero4(), octdims[0]);
+ const sse_t b1_x = mask4(signs, octdims[0], zero4());
+ signs = cmp4_ge(srp.dir[1][smd],zero4());
+ const sse_t b0_y = mask4(signs, zero4(), octdims[1]);
+ const sse_t b1_y = mask4(signs, octdims[1], zero4());
+ signs = cmp4_ge(srp.dir[2][smd],zero4());
+ const sse_t b0_z = mask4(signs, zero4(), octdims[2]);
+ const sse_t b1_z = mask4(signs, octdims[2], zero4());
- srp.activeMask[smd] = cmp4_lt(tenter_unpadded, texit_unpadded);
- if (_mm_movemask_ps(srp.activeMask[smd])) //if any active
+ const sse_t tBoxNearX = mul4(sub4(b0_x, srp.orig[0][smd]),
srp.inv_dir[0][smd]);
+ const sse_t tBoxNearY = mul4(sub4(b0_y, srp.orig[1][smd]),
srp.inv_dir[1][smd]);
+ const sse_t tBoxNearZ = mul4(sub4(b0_z, srp.orig[2][smd]),
srp.inv_dir[2][smd]);
+
+ t0 = max4(t0,tBoxNearX);
+ t0 = max4(t0,tBoxNearY);
+ t0 = max4(t0,tBoxNearZ);
+
+ const sse_t tBoxFarX = mul4(sub4(b1_x, srp.orig[0][smd]),
srp.inv_dir[0][smd]);
+ const sse_t tBoxFarY = mul4(sub4(b1_y, srp.orig[1][smd]),
srp.inv_dir[1][smd]);
+ const sse_t tBoxFarZ = mul4(sub4(b1_z, srp.orig[2][smd]),
srp.inv_dir[2][smd]);
+
+ t1 = min4(t1,tBoxFarX);
+ t1 = min4(t1,tBoxFarY);
+ t1 = min4(t1,tBoxFarZ);
+
+ srp.activeMask[smd] = cmp4_le(t0,t1);
+ if (_mm_movemask_ps(srp.activeMask[smd])) //if any hit
{
first = MIN(first, smd);
last = smd;
}
- srp.activeRays -= count_zeros(srp.activeMask[smd]);
+
+ srp.activeRays += count_nonzeros(srp.activeMask[smd]);
}
if (first > last)
@@ -825,7 +836,7 @@
{
//cerr << "octnode " << (int)depth << ", " << index << "; first=" <<
(int)first << ",last=" << (int)last << endl;
OctNode& node = octdata->get_node(depth, index);
- Vec3i child_cell = cell;
+ Vec3i child_cell;
int child_bit = octdata->get_child_bit_depth(depth);
index_trace[depth] = index;
int smd_first = first << 2;
@@ -876,9 +887,9 @@
if (octdata->get_isovalue() >= node.mins[target_child] &&
octdata->get_isovalue() <= node.maxs[target_child])
{
- Vector pmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
- Vector pmax(child_cell.data[0]+child_bit,
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
- char newfirst = first_intersects_node_octant(srp, first,
last, pmin, pmax);
+ Vector cmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
+ Vector cmax(child_cell.data[0]+child_bit,
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
+ char newfirst = first_intersects(srp, first, last, cmin,
cmax);
//cerr << "newfirst=" << (int)newfirst << ", last=" <<
(int)last << endl;
@@ -886,7 +897,8 @@
{
if (node.offsets[target_child]==-1)
{
- bvh_octleaf(srp, newfirst, last, child_cell,
stop_depth, depth, node.values[target_child], index_trace);
+ bvh_octleaf(srp, newfirst, last, child_cell,
stop_depth, depth+1,
+ depth, node.values[target_child],
child_cell, index_trace);
}
else
{
@@ -900,12 +912,10 @@
bvh_octnode(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
}
}
+ if (srp.activeRays==0)
+ return;
}
- if (srp.activeRays==0)
- {
- //cerr << "early termination" << endl;
- return;
- }
+
}
}
}
@@ -913,8 +923,207 @@
}
void IsosurfaceOctreeVolume::bvh_octleaf(SSERayPacket& srp, char first, char
last,
- const Vec3i& cell, int stop_depth, int depth, ST value, unsigned
int index_trace[]) const
+ const Vec3i& cell, int stop_depth, int depth,
+ int leaf_depth, ST leaf_value, const Vec3i& leaf_base_cell,
+ unsigned int index_trace[]) const
{
+ int child_bit = octdata->get_child_bit_depth(depth);
+ int unsafe_zone = octdata->get_child_bit_depth(depth-1) -
octdata->get_child_bit_depth(octdata->get_cap_depth());
+ int smd_first = first << 2;
+ Vec3i child_cell;
+
+ //intersect all children in order
+ #pragma unroll(2)
+ for(int midplane_x=0; midplane_x!=2; midplane_x++)
+ {
+ int target_x;
+ if (midplane_x - srp.rp->getSign(smd_first,0))
+ {
+ target_x = 4;
+ child_cell.data[0] = cell.data[0] | child_bit;
+ }
+ else
+ {
+ target_x = 0;
+ child_cell.data[0] = cell.data[0];
+ }
+ #pragma unroll(2)
+ for(int midplane_y=0; midplane_y!=2; midplane_y++)
+ {
+ int target_xy;
+ if (midplane_y - srp.rp->getSign(smd_first,1))
+ {
+ target_xy = target_x | 2;
+ child_cell.data[1] = cell.data[1] | child_bit;
+ }
+ else
+ {
+ target_xy = target_x;
+ child_cell.data[1] = cell.data[1];
+ }
+ #pragma unroll(2)
+ for(int midplane_z=0; midplane_z!=2; midplane_z++)
+ {
+ int target_child;
+ if (midplane_z - srp.rp->getSign(smd_first,2))
+ {
+ target_child = target_xy | 1;
+ child_cell.data[2] = cell.data[2] | child_bit;
+ }
+ else
+ {
+ target_child = target_xy;
+ child_cell.data[2] = cell.data[2];
+ }
+
+ Vec3i local_child_cell = child_cell - leaf_base_cell;
+ if (local_child_cell.data[0] & unsafe_zone ||
local_child_cell.data[1] & unsafe_zone || local_child_cell.data[2] &
unsafe_zone)
+ {
+ if (depth == stop_depth)
+ {
+ sse_t child_tenter[RayPacket::SSE_MaxSize];
+ sse_t child_texit[RayPacket::SSE_MaxSize];
+ sse_t hitmask[RayPacket::SSE_MaxSize];
+ Vector cmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
+ Vector cmax(child_cell.data[0] + child_bit,
child_cell.data[1] + child_bit, child_cell.data[2] + child_bit);
+ char newfirst, newlast;
+ intersect_cap_octant(srp, first, last, newfirst,
newlast, cmin, cmax, child_tenter, child_texit, hitmask);
+
+ if (newfirst > newlast)
+ continue;
+#ifdef USE_OCTREE_DATA
+ //use octree data
+ float rho[2][2][2];
+ ST min_rho, max_rho, this_rho;
+ min_rho = max_rho = this_rho = leaf_value;
+ rho[0][0][0] = static_cast<float>(this_rho);
+ Vec3i offset(0,0,child_bit);
+
+ //0,0,1
+ if (target_child & 1)
+ {
+ this_rho =
octdata->lookup_neighbor<0,0,1>(child_cell, offset, stop_depth, leaf_depth,
index_trace);
+ min_rho = MIN(min_rho, this_rho);
+ max_rho = MAX(max_rho, this_rho);
+ }
+ else
+ this_rho = leaf_value;
+ rho[0][0][1] = static_cast<float>(this_rho);
+
+ //0,1,1
+ offset.data[1] = child_bit;
+ if (target_child & 3)
+ {
+ this_rho =
octdata->lookup_neighbor<0,1,1>(child_cell, offset, stop_depth, leaf_depth,
index_trace);
+ min_rho = MIN(min_rho, this_rho);
+ max_rho = MAX(max_rho, this_rho);
+ }
+ else
+ this_rho = leaf_value;
+ rho[0][1][1] = static_cast<float>(this_rho);
+
+ //1,1,1
+ offset.data[0] = child_bit;
+ if (target_child & 7)
+ {
+ this_rho =
octdata->lookup_neighbor<1,1,1>(child_cell, offset, stop_depth, leaf_depth,
index_trace);
+ min_rho = MIN(min_rho, this_rho);
+ max_rho = MAX(max_rho, this_rho);
+ }
+ else
+ this_rho = leaf_value;
+ rho[1][1][1] = static_cast<float>(this_rho);
+
+ //1,1,0
+ offset.data[2] = 0;
+ if (target_child & 6)
+ {
+ this_rho =
octdata->lookup_neighbor<1,1,0>(child_cell, offset, stop_depth, leaf_depth,
index_trace);
+ min_rho = MIN(min_rho, this_rho);
+ max_rho = MAX(max_rho, this_rho);
+ }
+ else
+ this_rho = leaf_value;
+ rho[1][1][0] = static_cast<float>(this_rho);
+
+ //1,0,0
+ offset.data[1] = 0;
+ if (target_child & 4)
+ {
+ this_rho =
octdata->lookup_neighbor<1,0,0>(child_cell, offset, stop_depth, leaf_depth,
index_trace);
+ min_rho = MIN(min_rho, this_rho);
+ max_rho = MAX(max_rho, this_rho);
+ }
+ else
+ this_rho = leaf_value;
+ rho[1][0][0] = static_cast<float>(this_rho);
+
+ //1,0,1
+ offset.data[2] = child_bit;
+ if (target_child & 5)
+ {
+ this_rho =
octdata->lookup_neighbor<1,0,1>(child_cell, offset, stop_depth, leaf_depth,
index_trace);
+ min_rho = MIN(min_rho, this_rho);
+ max_rho = MAX(max_rho, this_rho);
+ }
+ else
+ this_rho = leaf_value;
+ rho[1][0][1] = static_cast<float>(this_rho);
+
+ //0,1,0
+ offset.data[0] = 0;
+ offset.data[1] = child_bit;
+ offset.data[2] = 0;
+ if (target_child & 2)
+ {
+ this_rho =
octdata->lookup_neighbor<0,1,0>(child_cell, offset, stop_depth, leaf_depth,
index_trace);
+ min_rho = MIN(min_rho, this_rho);
+ max_rho = MAX(max_rho, this_rho);
+ }
+ else
+ this_rho = leaf_value;
+ rho[0][1][0] = static_cast<float>(this_rho);
+#else
+ //use original grid data
+ float rho[2][2][2];
+ ST min_rho, max_rho;
+#define MYDATA octdata->indata //toggle this to octdata if you want to test
pure point location (no neighbor finding)
+ min_rho = max_rho = lookup_safe(MYDATA,
child_cell.data[0], child_cell.data[1], child_cell.data[2]);
+ rho[0][0][0] = static_cast<float>(min_rho);
+ for(int c=1; c<8; c++)
+ {
+ Vec3i offset((c&4)!=0, (c&2)!=0, c&1);
+ Vec3i neighboridx = child_cell + offset;
+ ST this_rho = lookup_safe(MYDATA,
neighboridx.data[0], neighboridx.data[1], neighboridx.data[2]);
+
rho[offset.data[0]][offset.data[1]][offset.data[2]] =
static_cast<float>(this_rho);
+ min_rho = MIN(this_rho, min_rho);
+ max_rho = MAX(this_rho, max_rho);
+ }
+#endif
+
+ if (octdata->get_isovalue() >= min_rho &&
octdata->get_isovalue() <= max_rho)
+ {
+ IsosurfaceImplicit::sse_intersect(srp, newfirst,
newlast, cmin, cmax, rho,
+ octdata->get_isovalue(), child_tenter,
child_texit, hitmask, this, PrimitiveCommon::getMaterial());
+ }
+ }
+ else //not at stop depth
+ {
+ Vector cmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
+ Vector cmax(child_cell.data[0]+child_bit,
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
+ char newfirst = first_intersects(srp, first, last,
cmin, cmax);
+
+ bvh_octleaf(srp, newfirst, last, child_cell,
stop_depth, depth+1,
+ leaf_depth, leaf_value, leaf_base_cell,
index_trace);
+ }
+ if (srp.activeRays==0)
+ return;
+ }
+ }
+ }
+ }
+
+
}
void IsosurfaceOctreeVolume::bvh_octcap(SSERayPacket& srp, char first, char
last,
@@ -922,7 +1131,7 @@
{
//cerr << "octcap " << index << ", first=" << (int)first << ",last=" <<
(int)last << endl;
OctCap& cap = octdata->get_cap(index);
- Vec3i child_cell = cell;
+ Vec3i child_cell;
index_trace[depth] = index;
int smd_first = first << 2;
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h Fri Jun 9 07:55:37
2006
@@ -60,15 +60,16 @@
const Vec3i& cell, int stop_depth, int depth,
unsigned int index,
unsigned int index_trace[]) const;
- void bvh_octleaf(SSERayPacket& srp, char first, char last,
- const Vec3i& cell, int stop_depth,
int depth, ST value,
- unsigned int index_trace[]) const;
+ void bvh_octleaf(SSERayPacket& srp, char first, char last,
+ const Vec3i& cell, int stop_depth, int depth,
+ int leaf_depth, ST leaf_value, const Vec3i&
leaf_base_cell,
+ unsigned int index_trace[]) const;
void bvh_octcap(SSERayPacket& srp, char first, char last,
const Vec3i& cell, int stop_depth, int depth, unsigned int
index,
unsigned int index_trace[]) const;
- inline char first_intersects_node_octant(SSERayPacket& srp, char
first, char last,
+ inline char first_intersects(SSERayPacket& srp, char first, char
last,
const Vector& min, const Vector& max) const
{
sse_t boxmin[3];
@@ -116,6 +117,55 @@
}
return last+1;
}
+
+ inline char last_intersects(SSERayPacket& srp, char first, char
last,
+ const Vector& min, const Vector& max) const
+ {
+ sse_t boxmin[3];
+ sse_t boxmax[3];
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ boxmin[axis] = set4(min[axis]);
+ boxmax[axis] = set4(max[axis]);
+ }
+
+ for(char smd=last; smd>=first; smd--)
+ {
+ sse_t t0 = zero4();
+ sse_t t1 = srp.minT[smd];
+
+ sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+ const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
+ const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
+ signs = cmp4_ge(srp.dir[1][smd],zero4());
+ const sse_t b0_y = mask4(signs, boxmin[1], boxmax[1]);
+ const sse_t b1_y = mask4(signs, boxmax[1], boxmin[1]);
+ signs = cmp4_ge(srp.dir[2][smd],zero4());
+ const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
+ const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
+
+ const sse_t tBoxNearX = mul4(sub4(b0_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+ const sse_t tBoxNearY = mul4(sub4(b0_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+ const sse_t tBoxNearZ = mul4(sub4(b0_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+ t0 = max4(t0,tBoxNearX);
+ t0 = max4(t0,tBoxNearY);
+ t0 = max4(t0,tBoxNearZ);
+
+ const sse_t tBoxFarX = mul4(sub4(b1_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+ const sse_t tBoxFarY = mul4(sub4(b1_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+ const sse_t tBoxFarZ = mul4(sub4(b1_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+ t1 = min4(t1,tBoxFarX);
+ t1 = min4(t1,tBoxFarY);
+ t1 = min4(t1,tBoxFarZ);
+
+ if (_mm_movemask_ps(cmp4_le(t0,t1))) //if any hit
+ return smd;
+ }
+ return -1;
+ }
inline void intersect_cap_octant(SSERayPacket& srp, char first,
char last,
char& newfirst, char& newlast, const Vector& min, const
Vector& max,
- [MANTA] r1109 - in trunk/Model: Intersections Primitives, knolla, 06/09/2006
Archive powered by MHonArc 2.6.16.