Text archives Help
- From: knolla@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1103 - in trunk/Model: Intersections Primitives
- Date: Thu, 8 Jun 2006 07:54:33 -0600 (MDT)
Author: knolla
Date: Thu Jun 8 07:54:30 2006
New Revision: 1103
Modified:
trunk/Model/Intersections/IsosurfaceImplicit.cc
trunk/Model/Intersections/IsosurfaceImplicit.h
trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
resolved bugs in SSE IsosurfaceOctreeVolume, but still very slow.
Modified: trunk/Model/Intersections/IsosurfaceImplicit.cc
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.cc (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.cc Thu Jun 8 07:54:30
2006
@@ -121,18 +121,20 @@
// as well as Knoll DynRT-vol implementation
void IsosurfaceImplicit::sse_intersect(RayPacket& rays, SSERayPacket& srp,
char first, char last, const Vector& pmin, const Vector& pmax,
float rho[2][2][2],
- float isovalue, sse_t tenter[], sse_t texit[], sse_t hitmask[],
sse_t validmask[],
+ float isovalue, sse_t tenter[], sse_t texit[], sse_t hitmask[],
const Manta::Primitive* prim, const Manta::Material* matl)
{
//cerr << "sse_intersect: first=" << (int)first << ",last=" << (int)last
<< endl;
- for(int smd=first; smd<last; smd++)
+ for(int smd=first; smd<=last; smd++)
{
- sse_t sse_thisvoxelmask = and4(hitmask[smd], validmask[smd]);
+ sse_t sse_thisvoxelmask = hitmask[smd];
int int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
if (int_thisvoxelmask==0)
+ {
continue;
+ }
//compute p0, p1
sse_t p0[3];
@@ -162,7 +164,9 @@
int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
if (int_thisvoxelmask == 0) //if none of them hit, don't bother
iterating any more
+ {
continue;
+ }
#define NEUBAUER_ITERATIONS 3
#pragma unroll(NEUBAUER_ITERATIONS)
@@ -188,7 +192,7 @@
sse_t t = add4(t0, mul4(mul4(D0,denom), sub4(t1,t0)));
sse_t hit_t = add4(tenter[smd], mul4(t, sub4(texit[smd],
tenter[smd])));
- //sse_thisvoxelmask = and4(sse_thisvoxelmask,
cmp4_lt(hit_t,srp.minT[smd]));
+ sse_thisvoxelmask = and4(sse_thisvoxelmask,
cmp4_lt(hit_t,srp.minT[smd]));
srp.minT[smd] = mask4(sse_thisvoxelmask, hit_t, srp.minT[smd]);
int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
if (int_thisvoxelmask)
@@ -209,9 +213,6 @@
rays.data->hitPrim[realray] = prim;
}
}
-
- //set "validmask" to 0 for each ray that hit here.
- validmask[smd] = andnot4(validmask[smd], sse_thisvoxelmask);
}
}
}
Modified: trunk/Model/Intersections/IsosurfaceImplicit.h
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.h (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.h Thu Jun 8 07:54:30
2006
@@ -28,7 +28,7 @@
#ifdef MANTA_SSE
static void sse_intersect(RayPacket& rays, SSERayPacket& srp,
char first, char last, const Vector& pmin, const Vector&
pmax, float rho[2][2][2],
- float isovalue, sse_t tenter[], sse_t texit[], sse_t
hitmask[], sse_t validmask[],
+ float isovalue, sse_t tenter[], sse_t texit[], sse_t
hitmask[],
const Manta::Primitive* prim, const Manta::Material*
matl);
static void sse_normal(RayPacket &ray, SSERayPacket& srp, int smd,
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc Thu Jun 8 07:54:30
2006
@@ -752,11 +752,10 @@
{
rays.computeInverseDirections();
rays.computeSigns();
+ rays.resetHits();
+
RayPacketData* data = rays.data;
SSERayPacket srp;
-
- //tells us whether this ray is still worth computing; this is checked
off if rays miss volume entirely or hit.
- sse_t validmask[RayPacket::SSE_MaxSize];
//intersect the global bounding box: find first, last
// this will require a special-case AABB intersection
@@ -798,34 +797,31 @@
sse_t tenter_unpadded = max4(max4(tnear_unpadded[0],
tnear_unpadded[1]), tnear_unpadded[2]);
sse_t texit_unpadded = min4(min4(tfar_unpadded[0],
tfar_unpadded[1]), tfar_unpadded[2]);
-
- validmask[smd] = cmp4_le(tenter_unpadded, texit_unpadded);
- if (_mm_movemask_ps(validmask[smd]) == 0) //if none of them
were valid
+
+ if (_mm_movemask_ps(cmp4_lt(tenter_unpadded, texit_unpadded))==0)
//if none hit
continue;
first = MIN(first, smd);
last = smd;
}
- last++;
- if (first >= last)
+ if (first > last)
return;
//cerr << "root node: first = " << (int)first << ", last = " <<
(int)last << endl;
unsigned int index_trace[octdata->get_max_depth() + 1];
Vec3i cell(0,0,0);
- bvh_octnode(rays, srp, first, last, validmask, cell,
octdata->get_cap_depth(), 0, 0, index_trace);
+ bvh_octnode(rays, srp, first, last, cell, octdata->get_cap_depth(), 0,
0, index_trace);
}
-bool IsosurfaceOctreeVolume::bvh_octnode(RayPacket& rays, SSERayPacket& srp,
char first, char last, sse_t validmask[],
+void IsosurfaceOctreeVolume::bvh_octnode(RayPacket& rays, SSERayPacket& srp,
char first, char last,
const Vec3i& cell, int stop_depth, int depth, unsigned int
index, unsigned int index_trace[]) const
{
//cerr << "octnode " << (int)depth << ", " << index << "; first=" <<
(int)first << ",last=" << (int)last << endl;
OctNode& node = octdata->get_node(depth, index);
Vec3i child_cell = cell;
int child_bit = octdata->get_child_bit_depth(depth);
-
index_trace[depth] = index;
//intersect all children in order
@@ -833,7 +829,7 @@
for(int midplane_x=0; midplane_x!=2; midplane_x++)
{
int target_x;
- if (midplane_x - rays.getSign(first,0))
+ if (midplane_x - rays.getSign(0,0))
{
target_x = 4;
child_cell.data[0] = cell.data[0] | child_bit;
@@ -847,7 +843,7 @@
for(int midplane_y=0; midplane_y!=2; midplane_y++)
{
int target_xy;
- if (midplane_y - rays.getSign(first,1))
+ if (midplane_y - rays.getSign(0,1))
{
target_xy = target_x | 2;
child_cell.data[1] = cell.data[1] | child_bit;
@@ -861,7 +857,7 @@
for(int midplane_z=0; midplane_z!=2; midplane_z++)
{
int target_child;
- if (midplane_z - rays.getSign(first,2))
+ if (midplane_z - rays.getSign(0,2))
{
target_child = target_xy | 1;
child_cell.data[2] = cell.data[2] | child_bit;
@@ -878,40 +874,36 @@
//cerr << "newfirst=" << (int)newfirst << ", newlast=" <<
(int)newlast << endl;
- if (newfirst < last && octdata->get_isovalue() >=
node.mins[target_child] && octdata->get_isovalue() <= node.maxs[target_child])
+ if (newfirst <= last && octdata->get_isovalue() >=
node.mins[target_child] && octdata->get_isovalue() <= node.maxs[target_child])
{
if (node.offsets[target_child]==-1)
{
- if (bvh_octleaf(rays, srp, newfirst, last,
validmask, child_cell, stop_depth, depth, node.values[target_child],
index_trace))
- return true;
+ bvh_octleaf(rays, srp, newfirst, last, child_cell,
stop_depth, depth, node.values[target_child], index_trace);
}
else
{
unsigned int child_idx = node.children_start +
node.offsets[target_child];
if (depth == octdata->get_pre_cap_depth()) //cap
{
- if (bvh_octcap(rays, srp, newfirst, last,
validmask, child_cell, stop_depth, depth+1, child_idx, index_trace))
- return true;
+ bvh_octcap(rays, srp, newfirst, last,
child_cell, stop_depth, depth+1, child_idx, index_trace);
}
else
{
- if (bvh_octnode(rays, srp, newfirst, last,
validmask, child_cell, stop_depth, depth+1, child_idx, index_trace))
- return true;
+ bvh_octnode(rays, srp, newfirst, last,
child_cell, stop_depth, depth+1, child_idx, index_trace);
}
}
}
}
}
}
- return false;
}
-bool IsosurfaceOctreeVolume::bvh_octleaf(RayPacket& rays, SSERayPacket& srp,
char first, char last, sse_t validmask[],
+void IsosurfaceOctreeVolume::bvh_octleaf(RayPacket& rays, SSERayPacket& srp,
char first, char last,
const Vec3i& cell, int stop_depth, int depth, ST value, unsigned
int index_trace[]) const
{
}
-bool IsosurfaceOctreeVolume::bvh_octcap(RayPacket& rays, SSERayPacket& srp,
char first, char last, sse_t validmask[],
+void IsosurfaceOctreeVolume::bvh_octcap(RayPacket& rays, SSERayPacket& srp,
char first, char last,
const Vec3i& cell, int stop_depth, int depth, unsigned int
index, unsigned int index_trace[]) const
{
//cerr << "octcap " << index << ", first=" << (int)first << ",last=" <<
(int)last << endl;
@@ -924,7 +916,7 @@
for(int midplane_x=0; midplane_x<2; midplane_x++)
{
int target_x;
- if (midplane_x - rays.getSign(first,0))
+ if (midplane_x - rays.getSign(0,0))
{
target_x = 4;
child_cell.data[0] = cell.data[0] | 1;
@@ -938,7 +930,7 @@
for(int midplane_y=0; midplane_y<2; midplane_y++)
{
int target_xy;
- if (midplane_y - rays.getSign(first,1))
+ if (midplane_y - rays.getSign(0,1))
{
target_xy = target_x | 2;
child_cell.data[1] = cell.data[1] | 1;
@@ -952,7 +944,7 @@
for(int midplane_z=0; midplane_z<2; midplane_z++)
{
int target_child;
- if (midplane_z - rays.getSign(first,2))
+ if (midplane_z - rays.getSign(0,2))
{
target_child = target_xy | 1;
child_cell.data[2] = cell.data[2] | 1;
@@ -968,10 +960,10 @@
sse_t hitmask[RayPacket::SSE_MaxSize];
Vector cmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
Vector cmax(child_cell.data[0]+1, child_cell.data[1]+1,
child_cell.data[2]+1);
- char newlast = last_intersects_cap_octant(srp, first, last,
cmin, cmax, child_tenter, child_texit, hitmask);
- newlast = MIN(last, newlast+1);
+ char newfirst, newlast;
+ intersect_cap_octant(srp, first, last, newfirst, newlast,
cmin, cmax, child_tenter, child_texit, hitmask);
- if (first >= newlast)
+ if (newfirst > newlast)
continue;
#ifdef USE_OCTREE_DATA
@@ -1003,13 +995,12 @@
if (octdata->get_isovalue() >= min_rho &&
octdata->get_isovalue() <= max_rho)
{
//cerr << "in cap " << (unsigned long)(&cap) << ",
octant " << target_child << endl;
- IsosurfaceImplicit::sse_intersect(rays, srp, first,
newlast, cmin, cmax, rho,
- octdata->get_isovalue(), child_tenter, child_texit,
hitmask, validmask, this, PrimitiveCommon::getMaterial());
+ IsosurfaceImplicit::sse_intersect(rays, srp, newfirst,
newlast, cmin, cmax, rho,
+ octdata->get_isovalue(), child_tenter, child_texit,
hitmask, this, PrimitiveCommon::getMaterial());
}
}
}
}
- return false;
}
#endif //#ifdef MANTA_SSE
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h Thu Jun 8 07:54:30
2006
@@ -56,22 +56,22 @@
#ifdef MANTA_SSE
void packet_intersect_implicit_bvh(RayPacket& rays)
const;
- bool bvh_octnode(RayPacket& rays, SSERayPacket& srp, char first,
char last, sse_t validmask[],
+ void bvh_octnode(RayPacket& rays, SSERayPacket& srp, char first,
char last,
const Vec3i& cell, int stop_depth, int depth,
unsigned int index,
unsigned int index_trace[]) const;
- bool bvh_octleaf(RayPacket& rays, SSERayPacket& srp, char first,
char last, sse_t validmask[],
+ void bvh_octleaf(RayPacket& rays, SSERayPacket& srp, char first,
char last,
const Vec3i& cell, int stop_depth,
int depth, ST value,
unsigned int index_trace[]) const;
- bool bvh_octcap(RayPacket& rays, SSERayPacket& srp, char first,
char last, sse_t validmask[],
+ void bvh_octcap(RayPacket& rays, SSERayPacket& srp, char first,
char last,
const Vec3i& cell, int stop_depth, int depth, unsigned int
index,
unsigned int index_trace[]) const;
inline char first_intersects_node_octant(SSERayPacket& srp, char
first, char last,
const Vector& min, const Vector& max) const
{
- for(char smd=first; smd<last; smd++)
+ for(char smd=first; smd<=last; smd++)
{
sse_t dgt0[3];
sse_t tnear[3];
@@ -90,21 +90,23 @@
sse_t tenter = max4(max4(tnear[0], tnear[1]), tnear[2]);
sse_t texit = min4(min4(tfar[0], tfar[1]), tfar[2]);
- if (_mm_movemask_ps(cmp4_le(tenter, texit)) != 0) //if
any hit
+ if (_mm_movemask_ps(cmp4_le(tenter, texit))) //if
any hit
return smd;
}
- return last;
+ return last+1;
}
- inline char last_intersects_cap_octant(SSERayPacket& srp, char
first, char last,
- const Vector& min, const Vector& max,
+ inline void intersect_cap_octant(SSERayPacket& srp, char first,
char last,
+ char& newfirst, char& newlast, const Vector& min, const
Vector& max,
sse_t tenter[], sse_t texit[], sse_t hitmask[]) const
{
#pragma unroll(RayPacket::SSE_MaxSize)
- for(char smd=first; smd<last; smd++)
+ for(char smd=first; smd<=last; smd++)
hitmask[smd] = zero4();
- for(char smd=last-1; smd>=first; smd--)
+ newlast = first;
+ newfirst = last+1;
+ for(char smd=first; smd<=last; smd++)
{
sse_t dgt0[3];
sse_t tnear[3];
@@ -124,10 +126,12 @@
texit[smd] = min4(min4(tfar[0], tfar[1]), tfar[2]);
hitmask[smd] = cmp4_le(tenter[smd], texit[smd]);
- if (_mm_movemask_ps(hitmask[smd]) != 0) //if any hit
- return smd;
+ if (_mm_movemask_ps(hitmask[smd])) //if any hit
+ {
+ newfirst = MIN(newfirst, smd);
+ newlast = smd;
+ }
}
- return first;
}
#endif
};
- [MANTA] r1103 - in trunk/Model: Intersections Primitives, knolla, 06/08/2006
Archive powered by MHonArc 2.6.16.