Text archives Help
- From: knolla@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1105 - in trunk: Core/Math Interface Model/Intersections Model/Primitives
- Date: Thu, 8 Jun 2006 14:12:24 -0600 (MDT)
Author: knolla
Date: Thu Jun 8 14:12:19 2006
New Revision: 1105
Modified:
trunk/Core/Math/SSEDefs.h
trunk/Interface/RayPacket.h
trunk/Model/Intersections/IsosurfaceImplicit.cc
trunk/Model/Intersections/IsosurfaceImplicit.h
trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
partially working early termination. Faster, but not enough.
Modified: trunk/Core/Math/SSEDefs.h
==============================================================================
--- trunk/Core/Math/SSEDefs.h (original)
+++ trunk/Core/Math/SSEDefs.h Thu Jun 8 14:12:19 2006
@@ -246,6 +246,26 @@
_mm_store_ps(f,t);
return Vector(f[2], f[1], f[0]);
}
+
+ inline int count_nonzeros(sse_t t)
+ {
+ int mask = _mm_movemask_ps(t);
+ int nonzeros = 0;
+ #pragma unroll(4)
+ for(int i=0; i<4; i++)
+ nonzeros += (mask & (1 << (i<<2))) ? 1 : 0;
+ return nonzeros;
+ }
+
+ inline int count_zeros(sse_t t)
+ {
+ int mask = _mm_movemask_ps(t);
+ int zeros = 0;
+ #pragma unroll(4)
+ for(int i=0; i<4; i++)
+ zeros += (mask & (1 << (i<<2))) ? 0 : 1;
+ return zeros;
+ }
};
Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Thu Jun 8 14:12:19 2006
@@ -80,13 +80,17 @@
};
#ifdef MANTA_SSE
+ //A wrapper structure for a typical SSE/packet traversal, such as a BVH
struct MANTA_ALIGN(16) SSERayPacket
{
+ sse_t activeMask[RayPacketData::SSE_MaxSize];
+ int activeRays;
sse_t* orig[3];
sse_t* dir[3];
sse_t* inv_dir[3];
sse_t* normal[3];
sse_t* minT;
+ RayPacket* rp;
};
#endif
Modified: trunk/Model/Intersections/IsosurfaceImplicit.cc
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.cc (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.cc Thu Jun 8 14:12:19
2006
@@ -119,7 +119,7 @@
//SSE packet implementation
//Based on Marmitt et al. 04, Wald 05 SSE intersections (OpenRT)
// as well as Knoll DynRT-vol implementation
-void IsosurfaceImplicit::sse_intersect(RayPacket& rays, SSERayPacket& srp,
+void IsosurfaceImplicit::sse_intersect(SSERayPacket& srp,
char first, char last, const Vector& pmin, const Vector& pmax,
float rho[2][2][2],
float isovalue, sse_t tenter[], sse_t texit[], sse_t hitmask[],
const Manta::Primitive* prim, const Manta::Material* matl)
@@ -191,14 +191,17 @@
const sse_t denom = accurateReciprocal(sub4(D0,D1));
sse_t t = add4(t0, mul4(mul4(D0,denom), sub4(t1,t0)));
sse_t hit_t = add4(tenter[smd], mul4(t, sub4(texit[smd],
tenter[smd])));
-
+
+ //the mask should only include rays that are active
+ sse_thisvoxelmask = and4(sse_thisvoxelmask, srp.activeMask[smd]);
+
sse_thisvoxelmask = and4(sse_thisvoxelmask,
cmp4_lt(hit_t,srp.minT[smd]));
srp.minT[smd] = mask4(sse_thisvoxelmask, hit_t, srp.minT[smd]);
int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
if (int_thisvoxelmask)
{
sse_t normal[3];
- sse_normal(rays, srp, smd, normal, pmin, pmax, rho);
+ sse_normal(srp, smd, normal, pmin, pmax, rho);
#pragma unroll(3)
for(int axis=0; axis<3; axis++)
srp.normal[axis][smd] = mask4(sse_thisvoxelmask,
normal[axis], srp.normal[axis][smd]);
@@ -209,15 +212,23 @@
if (int_thisvoxelmask & (1<<ray))
{
int realray=(smd<<2)+ray;
- rays.data->hitMatl[realray] = matl;
- rays.data->hitPrim[realray] = prim;
+ srp.rp->data->hitMatl[realray] = matl;
+ srp.rp->data->hitPrim[realray] = prim;
}
}
+
+ //int nonzeros = count_nonzeros(sse_thisvoxelmask);
+ //cerr << "nonzeros=" << nonzeros << endl;
+
+ srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
+
+ //active rays in this smd are ones that were active before, and
did NOT intersect.
+ srp.activeMask[smd] = andnot4(sse_thisvoxelmask,
srp.activeMask[smd]);
}
}
}
-void IsosurfaceImplicit::sse_normal(RayPacket &ray, SSERayPacket& srp, int
smd,
+void IsosurfaceImplicit::sse_normal(SSERayPacket& srp, int smd,
sse_t normal[], const Vector& pmin, const Vector& pmax,
const float rho[2][2][2])
{
Modified: trunk/Model/Intersections/IsosurfaceImplicit.h
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.h (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.h Thu Jun 8 14:12:19
2006
@@ -26,12 +26,12 @@
//TODO - non-SSE packet intersection
#ifdef MANTA_SSE
- static void sse_intersect(RayPacket& rays, SSERayPacket& srp,
+ static void sse_intersect(SSERayPacket& srp,
char first, char last, const Vector& pmin, const Vector&
pmax, float rho[2][2][2],
float isovalue, sse_t tenter[], sse_t texit[], sse_t
hitmask[],
const Manta::Primitive* prim, const Manta::Material*
matl);
- static void sse_normal(RayPacket &ray, SSERayPacket& srp, int smd,
+ static void sse_normal(SSERayPacket& srp, int smd,
sse_t normal[], const Vector& pmin, const Vector& pmax,
const float rho[2][2][2]);
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc Thu Jun 8 14:12:19
2006
@@ -759,7 +759,6 @@
//intersect the global bounding box: find first, last
// this will require a special-case AABB intersection
-
#pragma unroll(3)
for(int axis=0; axis<3; axis++)
{
@@ -768,12 +767,15 @@
srp.inv_dir[axis] = (sse_t*)(data->inverseDirection[axis]);
srp.normal[axis] = (sse_t*)(data->normal[axis]);
}
+ srp.rp = &rays;
srp.minT = (sse_t*)(data->minT);
-
- int first = RayPacket::SSE_MaxSize;
- int last = -1;
+ const int sse_begin = rays.begin() >> 2; // equivalent to
Floor(rays.begin()/4)
+ const int sse_end = ((rays.end()-1+3) >> 2); // Ceil(rays.end()-1/4)
+ srp.activeRays = (sse_end - sse_begin) << 2;
+ char first = RayPacket::SSE_MaxSize;
+ char last = -1;
#pragma unroll(RayPacket::SSE_MaxSize)
- for(int smd=0; smd<RayPacket::SSE_MaxSize; smd++)
+ for(int smd=sse_begin; smd<sse_end; smd++)
{
sse_t dgt0[3];
sse_t tnear[3];
@@ -798,11 +800,13 @@
sse_t tenter_unpadded = max4(max4(tnear_unpadded[0],
tnear_unpadded[1]), tnear_unpadded[2]);
sse_t texit_unpadded = min4(min4(tfar_unpadded[0],
tfar_unpadded[1]), tfar_unpadded[2]);
- if (_mm_movemask_ps(cmp4_lt(tenter_unpadded, texit_unpadded))==0)
//if none hit
- continue;
-
- first = MIN(first, smd);
- last = smd;
+ srp.activeMask[smd] = cmp4_lt(tenter_unpadded, texit_unpadded);
+ if (_mm_movemask_ps(srp.activeMask[smd])) //if any active
+ {
+ first = MIN(first, smd);
+ last = smd;
+ }
+ srp.activeRays -= count_zeros(srp.activeMask[smd]);
}
if (first > last)
@@ -812,10 +816,10 @@
unsigned int index_trace[octdata->get_max_depth() + 1];
Vec3i cell(0,0,0);
- bvh_octnode(rays, srp, first, last, cell, octdata->get_cap_depth(), 0,
0, index_trace);
+ bvh_octnode(srp, first, last, cell, octdata->get_cap_depth(), 0, 0,
index_trace);
}
-void IsosurfaceOctreeVolume::bvh_octnode(RayPacket& rays, SSERayPacket& srp,
char first, char last,
+void IsosurfaceOctreeVolume::bvh_octnode(SSERayPacket& srp, char first, char
last,
const Vec3i& cell, int stop_depth, int depth, unsigned int
index, unsigned int index_trace[]) const
{
//cerr << "octnode " << (int)depth << ", " << index << "; first=" <<
(int)first << ",last=" << (int)last << endl;
@@ -829,7 +833,7 @@
for(int midplane_x=0; midplane_x!=2; midplane_x++)
{
int target_x;
- if (midplane_x - rays.getSign(0,0))
+ if (midplane_x - srp.rp->getSign(0,0))
{
target_x = 4;
child_cell.data[0] = cell.data[0] | child_bit;
@@ -843,7 +847,7 @@
for(int midplane_y=0; midplane_y!=2; midplane_y++)
{
int target_xy;
- if (midplane_y - rays.getSign(0,1))
+ if (midplane_y - srp.rp->getSign(0,1))
{
target_xy = target_x | 2;
child_cell.data[1] = cell.data[1] | child_bit;
@@ -857,7 +861,7 @@
for(int midplane_z=0; midplane_z!=2; midplane_z++)
{
int target_child;
- if (midplane_z - rays.getSign(0,2))
+ if (midplane_z - srp.rp->getSign(0,2))
{
target_child = target_xy | 1;
child_cell.data[2] = cell.data[2] | child_bit;
@@ -878,32 +882,37 @@
{
if (node.offsets[target_child]==-1)
{
- bvh_octleaf(rays, srp, newfirst, last, child_cell,
stop_depth, depth, node.values[target_child], index_trace);
+ bvh_octleaf(srp, newfirst, last, child_cell,
stop_depth, depth, node.values[target_child], index_trace);
}
else
{
unsigned int child_idx = node.children_start +
node.offsets[target_child];
if (depth == octdata->get_pre_cap_depth()) //cap
{
- bvh_octcap(rays, srp, newfirst, last,
child_cell, stop_depth, depth+1, child_idx, index_trace);
+ bvh_octcap(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
}
else
{
- bvh_octnode(rays, srp, newfirst, last,
child_cell, stop_depth, depth+1, child_idx, index_trace);
+ bvh_octnode(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
}
}
}
+ if (srp.activeRays==0)
+ {
+ //cerr << "early termination" << endl;
+ return;
+ }
}
}
}
}
-void IsosurfaceOctreeVolume::bvh_octleaf(RayPacket& rays, SSERayPacket& srp,
char first, char last,
+void IsosurfaceOctreeVolume::bvh_octleaf(SSERayPacket& srp, char first, char
last,
const Vec3i& cell, int stop_depth, int depth, ST value, unsigned
int index_trace[]) const
{
}
-void IsosurfaceOctreeVolume::bvh_octcap(RayPacket& rays, SSERayPacket& srp,
char first, char last,
+void IsosurfaceOctreeVolume::bvh_octcap(SSERayPacket& srp, char first, char
last,
const Vec3i& cell, int stop_depth, int depth, unsigned int
index, unsigned int index_trace[]) const
{
//cerr << "octcap " << index << ", first=" << (int)first << ",last=" <<
(int)last << endl;
@@ -916,7 +925,7 @@
for(int midplane_x=0; midplane_x<2; midplane_x++)
{
int target_x;
- if (midplane_x - rays.getSign(0,0))
+ if (midplane_x - srp.rp->getSign(0,0))
{
target_x = 4;
child_cell.data[0] = cell.data[0] | 1;
@@ -930,7 +939,7 @@
for(int midplane_y=0; midplane_y<2; midplane_y++)
{
int target_xy;
- if (midplane_y - rays.getSign(0,1))
+ if (midplane_y - srp.rp->getSign(0,1))
{
target_xy = target_x | 2;
child_cell.data[1] = cell.data[1] | 1;
@@ -944,7 +953,7 @@
for(int midplane_z=0; midplane_z<2; midplane_z++)
{
int target_child;
- if (midplane_z - rays.getSign(0,2))
+ if (midplane_z - srp.rp->getSign(0,2))
{
target_child = target_xy | 1;
child_cell.data[2] = cell.data[2] | 1;
@@ -962,7 +971,7 @@
Vector cmax(child_cell.data[0]+1, child_cell.data[1]+1,
child_cell.data[2]+1);
char newfirst, newlast;
intersect_cap_octant(srp, first, last, newfirst, newlast,
cmin, cmax, child_tenter, child_texit, hitmask);
-
+#if 1
if (newfirst > newlast)
continue;
@@ -995,9 +1004,12 @@
if (octdata->get_isovalue() >= min_rho &&
octdata->get_isovalue() <= max_rho)
{
//cerr << "in cap " << (unsigned long)(&cap) << ",
octant " << target_child << endl;
- IsosurfaceImplicit::sse_intersect(rays, srp, newfirst,
newlast, cmin, cmax, rho,
+ IsosurfaceImplicit::sse_intersect(srp, newfirst,
newlast, cmin, cmax, rho,
octdata->get_isovalue(), child_tenter, child_texit,
hitmask, this, PrimitiveCommon::getMaterial());
+ if (srp.activeRays==0)
+ return;
}
+#endif
}
}
}
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h Thu Jun 8 14:12:19
2006
@@ -56,15 +56,15 @@
#ifdef MANTA_SSE
void packet_intersect_implicit_bvh(RayPacket& rays)
const;
- void bvh_octnode(RayPacket& rays, SSERayPacket& srp, char first,
char last,
+ void bvh_octnode(SSERayPacket& srp, char first, char last,
const Vec3i& cell, int stop_depth, int depth,
unsigned int index,
unsigned int index_trace[]) const;
- void bvh_octleaf(RayPacket& rays, SSERayPacket& srp, char first,
char last,
+ void bvh_octleaf(SSERayPacket& srp, char first, char last,
const Vec3i& cell, int stop_depth,
int depth, ST value,
unsigned int index_trace[]) const;
- void bvh_octcap(RayPacket& rays, SSERayPacket& srp, char first,
char last,
+ void bvh_octcap(SSERayPacket& srp, char first, char last,
const Vec3i& cell, int stop_depth, int depth, unsigned int
index,
unsigned int index_trace[]) const;
@@ -130,7 +130,39 @@
{
newfirst = MIN(newfirst, smd);
newlast = smd;
- }
+ }
+
+#if 0
+ sse_t hitmask2 = and4(hitmask[smd], cmp4_lt(tenter[smd],
srp.minT[smd]));
+ srp.minT[smd] = mask4(hitmask2, tenter[smd],
srp.minT[smd]);
+ if (_mm_movemask_ps(hitmask2))
+ {
+ sse_t normal[3];
+ for(int axis=0; axis<3; axis++)
+ {
+ normal[axis] = mask4(cmp4_eq(tenter[smd],
tnear[axis]), mask4(dgt0[axis], set4(-1.0f), _mm_one), zero4());
+ srp.normal[axis][smd] = mask4(hitmask2,
normal[axis], srp.normal[axis][smd]);
+ }
+
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ srp.normal[axis][smd] = mask4(hitmask2,
normal[axis], srp.normal[axis][smd]);
+
+ int int_hitmask2 = _mm_movemask_ps(hitmask2);
+
+ #pragma unroll(4);
+ for(int ray=0; ray<4; ray++)
+ {
+ if (int_hitmask2 & (1<<ray))
+ {
+ int realray=(smd<<2)+ray;
+ srp.rp->data->hitMatl[realray] =
PrimitiveCommon::getMaterial();
+ srp.rp->data->hitPrim[realray] = this;
+ }
+ }
+ }
+
+#endif
}
}
#endif
- [MANTA] r1105 - in trunk: Core/Math Interface Model/Intersections Model/Primitives, knolla, 06/08/2006
Archive powered by MHonArc 2.6.16.