Text archives Help
- From: knolla@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1108 - in trunk: Interface Model/Primitives
- Date: Fri, 9 Jun 2006 06:15:12 -0600 (MDT)
Author: knolla
Date: Fri Jun 9 06:15:10 2006
New Revision: 1108
Modified:
trunk/Interface/RayPacket.h
trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
SSE packets implicit now about as fast as single ray... still needs work
Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Fri Jun 9 06:15:10 2006
@@ -73,7 +73,7 @@
// Int-based arrays
int whichEye[MaxSize];
- int signs[3][MaxSize]; // 1=negative, 0=zero, positive
+ MANTA_ALIGN(16) int signs[3][MaxSize]; // 1=negative, 0=zero, positive
// Char-based arrays
char scratchpad_data[MaxSize][MaxScratchpadSize];
@@ -88,6 +88,7 @@
sse_t* orig[3];
sse_t* dir[3];
sse_t* inv_dir[3];
+ sse_t* signs[3];
sse_t* normal[3];
sse_t* minT;
RayPacket* rp;
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc Fri Jun 9 06:15:10
2006
@@ -765,6 +765,7 @@
srp.orig[axis] = (sse_t*)(data->origin[axis]);
srp.dir[axis] = (sse_t*)(data->direction[axis]);
srp.inv_dir[axis] = (sse_t*)(data->inverseDirection[axis]);
+ srp.signs[axis] = (sse_t*)(data->signs[axis]);
srp.normal[axis] = (sse_t*)(data->normal[axis]);
}
srp.rp = &rays;
@@ -827,13 +828,14 @@
Vec3i child_cell = cell;
int child_bit = octdata->get_child_bit_depth(depth);
index_trace[depth] = index;
+ int smd_first = first << 2;
//intersect all children in order
#pragma unroll(2)
for(int midplane_x=0; midplane_x!=2; midplane_x++)
{
int target_x;
- if (midplane_x - srp.rp->getSign(0,0))
+ if (midplane_x - srp.rp->getSign(smd_first,0))
{
target_x = 4;
child_cell.data[0] = cell.data[0] | child_bit;
@@ -847,7 +849,7 @@
for(int midplane_y=0; midplane_y!=2; midplane_y++)
{
int target_xy;
- if (midplane_y - srp.rp->getSign(0,1))
+ if (midplane_y - srp.rp->getSign(smd_first,1))
{
target_xy = target_x | 2;
child_cell.data[1] = cell.data[1] | child_bit;
@@ -861,7 +863,7 @@
for(int midplane_z=0; midplane_z!=2; midplane_z++)
{
int target_child;
- if (midplane_z - srp.rp->getSign(0,2))
+ if (midplane_z - srp.rp->getSign(smd_first,2))
{
target_child = target_xy | 1;
child_cell.data[2] = cell.data[2] | child_bit;
@@ -871,36 +873,39 @@
target_child = target_xy;
child_cell.data[2] = cell.data[2];
}
-
- Vector pmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
- Vector pmax(child_cell.data[0]+child_bit,
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
- char newfirst = first_intersects_node_octant(srp, first,
last, pmin, pmax);
- //cerr << "newfirst=" << (int)newfirst << ", newlast=" <<
(int)newlast << endl;
-
- if (newfirst <= last && octdata->get_isovalue() >=
node.mins[target_child] && octdata->get_isovalue() <= node.maxs[target_child])
+ if (octdata->get_isovalue() >= node.mins[target_child] &&
octdata->get_isovalue() <= node.maxs[target_child])
{
- if (node.offsets[target_child]==-1)
- {
- bvh_octleaf(srp, newfirst, last, child_cell,
stop_depth, depth, node.values[target_child], index_trace);
- }
- else
+ Vector pmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
+ Vector pmax(child_cell.data[0]+child_bit,
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
+ char newfirst = first_intersects_node_octant(srp, first,
last, pmin, pmax);
+
+ //cerr << "newfirst=" << (int)newfirst << ", last=" <<
(int)last << endl;
+
+ if (newfirst <= last)
{
- unsigned int child_idx = node.children_start +
node.offsets[target_child];
- if (depth == octdata->get_pre_cap_depth()) //cap
+ if (node.offsets[target_child]==-1)
{
- bvh_octcap(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
+ bvh_octleaf(srp, newfirst, last, child_cell,
stop_depth, depth, node.values[target_child], index_trace);
}
else
{
- bvh_octnode(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
+ unsigned int child_idx = node.children_start +
node.offsets[target_child];
+ if (depth == octdata->get_pre_cap_depth()) //cap
+ {
+ bvh_octcap(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
+ }
+ else
+ {
+ bvh_octnode(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
+ }
}
}
- }
- if (srp.activeRays==0)
- {
- //cerr << "early termination" << endl;
- return;
+ if (srp.activeRays==0)
+ {
+ //cerr << "early termination" << endl;
+ return;
+ }
}
}
}
@@ -919,13 +924,14 @@
OctCap& cap = octdata->get_cap(index);
Vec3i child_cell = cell;
index_trace[depth] = index;
+ int smd_first = first << 2;
//intersect all children in order
#pragma unroll(2)
for(int midplane_x=0; midplane_x<2; midplane_x++)
{
int target_x;
- if (midplane_x - srp.rp->getSign(0,0))
+ if (midplane_x - srp.rp->getSign(smd_first,0))
{
target_x = 4;
child_cell.data[0] = cell.data[0] | 1;
@@ -939,7 +945,7 @@
for(int midplane_y=0; midplane_y<2; midplane_y++)
{
int target_xy;
- if (midplane_y - srp.rp->getSign(0,1))
+ if (midplane_y - srp.rp->getSign(smd_first,1))
{
target_xy = target_x | 2;
child_cell.data[1] = cell.data[1] | 1;
@@ -953,7 +959,7 @@
for(int midplane_z=0; midplane_z<2; midplane_z++)
{
int target_child;
- if (midplane_z - srp.rp->getSign(0,2))
+ if (midplane_z - srp.rp->getSign(smd_first,2))
{
target_child = target_xy | 1;
child_cell.data[2] = cell.data[2] | 1;
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h Fri Jun 9 06:15:10
2006
@@ -71,62 +71,100 @@
inline char first_intersects_node_octant(SSERayPacket& srp, char
first, char last,
const Vector& min, const Vector& max) const
{
+ sse_t boxmin[3];
+ sse_t boxmax[3];
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ boxmin[axis] = set4(min[axis]);
+ boxmax[axis] = set4(max[axis]);
+ }
+
for(char smd=first; smd<=last; smd++)
{
- sse_t dgt0[3];
- sse_t tnear[3];
- sse_t tfar[3];
-
- #pragma unroll(3)
- for(int axis=0; axis<3; axis++)
- {
- dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());
//use signs?
- sse_t t0 = mul4(sub4(set4(min[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
- sse_t t1 = mul4(sub4(set4(max[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
- tnear[axis] = mask4(dgt0[axis], t0, t1);
- tfar[axis] = mask4(dgt0[axis], t1, t0);
- }
+ sse_t t0 = zero4();
+ sse_t t1 = srp.minT[smd];
- sse_t tenter = max4(max4(tnear[0], tnear[1]), tnear[2]);
- sse_t texit = min4(min4(tfar[0], tfar[1]), tfar[2]);
-
- if (_mm_movemask_ps(cmp4_le(tenter, texit))) //if
any hit
+ sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+ const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
+ const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
+ signs = cmp4_ge(srp.dir[1][smd],zero4());
+ const sse_t b0_y = mask4(signs, boxmin[1], boxmax[1]);
+ const sse_t b1_y = mask4(signs, boxmax[1], boxmin[1]);
+ signs = cmp4_ge(srp.dir[2][smd],zero4());
+ const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
+ const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
+
+ const sse_t tBoxNearX = mul4(sub4(b0_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+ const sse_t tBoxNearY = mul4(sub4(b0_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+ const sse_t tBoxNearZ = mul4(sub4(b0_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+ t0 = max4(t0,tBoxNearX);
+ t0 = max4(t0,tBoxNearY);
+ t0 = max4(t0,tBoxNearZ);
+
+ const sse_t tBoxFarX = mul4(sub4(b1_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+ const sse_t tBoxFarY = mul4(sub4(b1_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+ const sse_t tBoxFarZ = mul4(sub4(b1_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+ t1 = min4(t1,tBoxFarX);
+ t1 = min4(t1,tBoxFarY);
+ t1 = min4(t1,tBoxFarZ);
+
+ if (_mm_movemask_ps(cmp4_le(t0,t1))) //if any hit
return smd;
}
return last+1;
- }
+ }
inline void intersect_cap_octant(SSERayPacket& srp, char first,
char last,
char& newfirst, char& newlast, const Vector& min, const
Vector& max,
sse_t tenter[], sse_t texit[], sse_t hitmask[]) const
{
- #pragma unroll(RayPacket::SSE_MaxSize)
- for(char smd=first; smd<=last; smd++)
- hitmask[smd] = zero4();
-
+ sse_t boxmin[3];
+ sse_t boxmax[3];
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ boxmin[axis] = set4(min[axis]);
+ boxmax[axis] = set4(max[axis]);
+ }
+
newlast = first;
newfirst = last+1;
for(char smd=first; smd<=last; smd++)
{
- sse_t dgt0[3];
- sse_t tnear[3];
- sse_t tfar[3];
-
- #pragma unroll(3)
- for(int axis=0; axis<3; axis++)
- {
- dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());
//use signs?
- sse_t t0 = mul4(sub4(set4(min[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
- sse_t t1 = mul4(sub4(set4(max[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
- tnear[axis] = mask4(dgt0[axis], t0, t1);
- tfar[axis] = mask4(dgt0[axis], t1, t0);
- }
+ tenter[smd] = zero4();
+ texit[smd] = srp.minT[smd];
- tenter[smd] = max4(max4(tnear[0], tnear[1]), tnear[2]);
- texit[smd] = min4(min4(tfar[0], tfar[1]), tfar[2]);
+ sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+ const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
+ const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
+ signs = cmp4_ge(srp.dir[1][smd],zero4());
+ const sse_t b0_y = mask4(signs, boxmin[1], boxmax[1]);
+ const sse_t b1_y = mask4(signs, boxmax[1], boxmin[1]);
+ signs = cmp4_ge(srp.dir[2][smd],zero4());
+ const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
+ const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
- hitmask[smd] = cmp4_le(tenter[smd], texit[smd]);
- if (_mm_movemask_ps(hitmask[smd])) //if any hit
+ const sse_t tBoxNearX = mul4(sub4(b0_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+ const sse_t tBoxNearY = mul4(sub4(b0_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+ const sse_t tBoxNearZ = mul4(sub4(b0_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+ tenter[smd] = max4(tenter[smd],tBoxNearX);
+ tenter[smd] = max4(tenter[smd],tBoxNearY);
+ tenter[smd] = max4(tenter[smd],tBoxNearZ);
+
+ const sse_t tBoxFarX = mul4(sub4(b1_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+ const sse_t tBoxFarY = mul4(sub4(b1_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+ const sse_t tBoxFarZ = mul4(sub4(b1_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+ texit[smd] = min4(texit[smd],tBoxFarX);
+ texit[smd] = min4(texit[smd],tBoxFarY);
+ texit[smd] = min4(texit[smd],tBoxFarZ);
+
+ hitmask[smd] = cmp4_lt(tenter[smd], texit[smd]);
+ if (_mm_movemask_ps(hitmask[smd]))
{
newfirst = MIN(newfirst, smd);
newlast = smd;
@@ -164,8 +202,9 @@
#endif
}
- }
-#endif
+ }
+
+#endif //MANTA_SSE
};
};
- [MANTA] r1108 - in trunk: Interface Model/Primitives, knolla, 06/09/2006
Archive powered by MHonArc 2.6.16.