Text archives Help
- From: knolla@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1116 - in trunk: Core/Math Interface Model/Intersections Model/Primitives
- Date: Sun, 11 Jun 2006 02:48:59 -0600 (MDT)
Author: knolla
Date: Sun Jun 11 02:48:54 2006
New Revision: 1116
Added:
trunk/Interface/SSERayPacket.h
Modified:
trunk/Core/Math/SSEDefs.h
trunk/Interface/RayPacket.h
trunk/Model/Intersections/IsosurfaceImplicit.cc
trunk/Model/Intersections/IsosurfaceImplicit.h
trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
trunk/Model/Primitives/IsosurfaceOctreeVolume.h
trunk/Model/Primitives/OctreeVolume.h
Log:
Early ray termination working in IsosurfaceOctreeVolume; slight speedup but
still only marginally faster than single-ray. Far too much traversal time
spent.
Modified: trunk/Core/Math/SSEDefs.h
==============================================================================
--- trunk/Core/Math/SSEDefs.h (original)
+++ trunk/Core/Math/SSEDefs.h Sun Jun 11 02:48:54 2006
@@ -253,7 +253,7 @@
int nonzeros = 0;
#pragma unroll(4)
for(int i=0; i<4; i++)
- nonzeros += (mask & (1 << (i<<2))) ? 1 : 0;
+ nonzeros += (mask & (1 << i)) ? 1 : 0;
return nonzeros;
}
@@ -263,7 +263,7 @@
int zeros = 0;
#pragma unroll(4)
for(int i=0; i<4; i++)
- zeros += (mask & (1 << (i<<2))) ? 0 : 1;
+ zeros += (mask & (1 << i)) ? 0 : 1;
return zeros;
}
Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Sun Jun 11 02:48:54 2006
@@ -78,22 +78,6 @@
// Char-based arrays
char scratchpad_data[MaxSize][MaxScratchpadSize];
};
-
-#ifdef MANTA_SSE
- //A wrapper structure for a typical SSE/packet traversal, such as a BVH
- struct MANTA_ALIGN(16) SSERayPacket
- {
- sse_t activeMask[RayPacketData::SSE_MaxSize];
- int activeRays;
- sse_t* orig[3];
- sse_t* dir[3];
- sse_t* inv_dir[3];
- sse_t* signs[3];
- sse_t* normal[3];
- sse_t* minT;
- RayPacket* rp;
- };
-#endif
class RayPacket {
public:
Added: trunk/Interface/SSERayPacket.h
==============================================================================
--- (empty file)
+++ trunk/Interface/SSERayPacket.h Sun Jun 11 02:48:54 2006
@@ -0,0 +1,29 @@
+
+//A wrapper structure for a typical SSE/packet traversal, such as a BVH
+
+#ifndef Manta_SSERayPacket_h
+#define Manta_SSERayPacket_h
+
+#ifdef MANTA_SSE
+
+#include <Interface/RayPacket.h>
+
+namespace Manta {
+
+ struct MANTA_ALIGN(16) SSERayPacket
+ {
+ sse_t activeMask[RayPacketData::SSE_MaxSize];
+ int activeRays;
+ sse_t* orig[3];
+ sse_t* dir[3];
+ sse_t* inv_dir[3];
+ sse_t* signs[3];
+ sse_t* normal[3];
+ sse_t* minT;
+ RayPacket* rp;
+ };
+}
+
+#endif // MANTA_SSE
+
+#endif
Modified: trunk/Model/Intersections/IsosurfaceImplicit.cc
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.cc (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.cc Sun Jun 11 02:48:54
2006
@@ -220,7 +220,8 @@
//int nonzeros = count_nonzeros(sse_thisvoxelmask);
//cerr << "nonzeros=" << nonzeros << endl;
- //srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
+ //this line of code should implement early termination, but this
is broken.
+ srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
//active rays in this smd are ones that were active before, and
did NOT intersect.
srp.activeMask[smd] = andnot4(sse_thisvoxelmask,
srp.activeMask[smd]);
Modified: trunk/Model/Intersections/IsosurfaceImplicit.h
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.h (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.h Sun Jun 11 02:48:54
2006
@@ -10,6 +10,7 @@
#ifdef MANTA_SSE
#include <Core/Math/SSEDefs.h>
+#include <Interface/SSERayPacket.h>
#endif
namespace Manta
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc Sun Jun 11 02:48:54
2006
@@ -11,7 +11,7 @@
#include <Core/Math/SSEDefs.h>
#endif
-#define USE_OCTREE_DATA
+//#define USE_OCTREE_DATA
#define MIN4(a,b,c,d) min(min(a,b), min(c,d));
#define MAX4(a,b,c,d) max(max(a,b), max(c,d));
@@ -114,6 +114,7 @@
void IsosurfaceOctreeVolume::intersect(RenderContext const &context,
RayPacket &packet) const
{
#ifdef MANTA_SSE
+//#if 1
packet_intersect_implicit_bvh(packet);
#else
for ( int i = packet.rayBegin; i < packet.rayEnd; i++ )
@@ -912,7 +913,7 @@
bvh_octnode(srp, newfirst, last, child_cell,
stop_depth, depth+1, child_idx, index_trace);
}
}
- if (srp.activeRays==0)
+ if (srp.activeRays<=0)
return;
}
@@ -1116,7 +1117,7 @@
bvh_octleaf(srp, newfirst, last, child_cell,
stop_depth, depth+1,
leaf_depth, leaf_value, leaf_base_cell,
index_trace);
}
- if (srp.activeRays==0)
+ if (srp.activeRays<=0)
return;
}
}
@@ -1186,7 +1187,7 @@
Vector cmax(child_cell.data[0]+1, child_cell.data[1]+1,
child_cell.data[2]+1);
char newfirst, newlast;
intersect_cap_octant(srp, first, last, newfirst, newlast,
cmin, cmax, child_tenter, child_texit, hitmask);
-#if 1
+#ifndef IOV_BOX_CELLS
if (newfirst > newlast)
continue;
@@ -1221,7 +1222,7 @@
//cerr << "in cap " << (unsigned long)(&cap) << ",
octant " << target_child << endl;
IsosurfaceImplicit::sse_intersect(srp, newfirst,
newlast, cmin, cmax, rho,
octdata->get_isovalue(), child_tenter, child_texit,
hitmask, this, PrimitiveCommon::getMaterial());
- if (srp.activeRays==0)
+ if (srp.activeRays<=0)
return;
}
#endif
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h Sun Jun 11 02:48:54
2006
@@ -14,8 +14,12 @@
#ifdef MANTA_SSE
#include <Core/Math/SSEDefs.h>
+#include <Interface/SSERayPacket.h>
#endif
+//define this to show box cells, not the cap-value cells (and the implicit)
+//#define IOV_BOX_CELLS
+
namespace Manta
{
@@ -173,6 +177,9 @@
{
sse_t boxmin[3];
sse_t boxmax[3];
+ sse_t tnear[3];
+ sse_t tfar[3];
+ sse_t dgt0[3];
#pragma unroll(3)
for(int axis=0; axis<3; axis++)
{
@@ -187,31 +194,31 @@
tenter[smd] = zero4();
texit[smd] = srp.minT[smd];
- sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
- const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
- const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
- signs = cmp4_ge(srp.dir[1][smd],zero4());
- const sse_t b0_y = mask4(signs, boxmin[1], boxmax[1]);
- const sse_t b1_y = mask4(signs, boxmax[1], boxmin[1]);
- signs = cmp4_ge(srp.dir[2][smd],zero4());
- const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
- const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
-
- const sse_t tBoxNearX = mul4(sub4(b0_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
- const sse_t tBoxNearY = mul4(sub4(b0_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
- const sse_t tBoxNearZ = mul4(sub4(b0_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
-
- tenter[smd] = max4(tenter[smd],tBoxNearX);
- tenter[smd] = max4(tenter[smd],tBoxNearY);
- tenter[smd] = max4(tenter[smd],tBoxNearZ);
-
- const sse_t tBoxFarX = mul4(sub4(b1_x,
srp.orig[0][smd]), srp.inv_dir[0][smd]);
- const sse_t tBoxFarY = mul4(sub4(b1_y,
srp.orig[1][smd]), srp.inv_dir[1][smd]);
- const sse_t tBoxFarZ = mul4(sub4(b1_z,
srp.orig[2][smd]), srp.inv_dir[2][smd]);
-
- texit[smd] = min4(texit[smd],tBoxFarX);
- texit[smd] = min4(texit[smd],tBoxFarY);
- texit[smd] = min4(texit[smd],tBoxFarZ);
+ dgt0[0] = cmp4_ge(srp.dir[0][smd],zero4());
+ const sse_t b0_x = mask4(dgt0[0], boxmin[0], boxmax[0]);
+ const sse_t b1_x = mask4(dgt0[0], boxmax[0], boxmin[0]);
+ dgt0[1] = cmp4_ge(srp.dir[1][smd],zero4());
+ const sse_t b0_y = mask4(dgt0[1], boxmin[1], boxmax[1]);
+ const sse_t b1_y = mask4(dgt0[1], boxmax[1], boxmin[1]);
+ dgt0[2] = cmp4_ge(srp.dir[2][smd],zero4());
+ const sse_t b0_z = mask4(dgt0[2], boxmin[2], boxmax[2]);
+ const sse_t b1_z = mask4(dgt0[2], boxmax[2], boxmin[2]);
+
+ tnear[0] = mul4(sub4(b0_x, srp.orig[0][smd]),
srp.inv_dir[0][smd]);
+ tnear[1] = mul4(sub4(b0_y, srp.orig[1][smd]),
srp.inv_dir[1][smd]);
+ tnear[2] = mul4(sub4(b0_z, srp.orig[2][smd]),
srp.inv_dir[2][smd]);
+
+ tenter[smd] = max4(tenter[smd],tnear[0]);
+ tenter[smd] = max4(tenter[smd],tnear[1]);
+ tenter[smd] = max4(tenter[smd],tnear[2]);
+
+ tfar[0] = mul4(sub4(b1_x, srp.orig[0][smd]),
srp.inv_dir[0][smd]);
+ tfar[1] = mul4(sub4(b1_y, srp.orig[1][smd]),
srp.inv_dir[1][smd]);
+ tfar[2] = mul4(sub4(b1_z, srp.orig[2][smd]),
srp.inv_dir[2][smd]);
+
+ texit[smd] = min4(texit[smd],tfar[0]);
+ texit[smd] = min4(texit[smd],tfar[1]);
+ texit[smd] = min4(texit[smd],tfar[2]);
hitmask[smd] = cmp4_lt(tenter[smd], texit[smd]);
if (_mm_movemask_ps(hitmask[smd]))
@@ -220,7 +227,7 @@
newlast = smd;
}
-#if 0
+#ifdef IOV_BOX_CELLS
sse_t hitmask2 = and4(hitmask[smd], cmp4_lt(tenter[smd],
srp.minT[smd]));
srp.minT[smd] = mask4(hitmask2, tenter[smd],
srp.minT[smd]);
if (_mm_movemask_ps(hitmask2))
Modified: trunk/Model/Primitives/OctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/OctreeVolume.h (original)
+++ trunk/Model/Primitives/OctreeVolume.h Sun Jun 11 02:48:54 2006
@@ -33,7 +33,7 @@
#endif
//enable this only when testing octree data and original data side by side
-//#define TEST_INDATA
+#define TEST_INDATA
//enable dynamic multires via the "stop_depth"
//#define OCTVOL_DYNAMIC_MULTIRES
- [MANTA] r1116 - in trunk: Core/Math Interface Model/Intersections Model/Primitives, knolla, 06/11/2006
Archive powered by MHonArc 2.6.16.