Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1105 - in trunk: Core/Math Interface Model/Intersections Model/Primitives


Chronological Thread 
  • From: knolla@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1105 - in trunk: Core/Math Interface Model/Intersections Model/Primitives
  • Date: Thu, 8 Jun 2006 14:12:24 -0600 (MDT)

Author: knolla
Date: Thu Jun  8 14:12:19 2006
New Revision: 1105

Modified:
   trunk/Core/Math/SSEDefs.h
   trunk/Interface/RayPacket.h
   trunk/Model/Intersections/IsosurfaceImplicit.cc
   trunk/Model/Intersections/IsosurfaceImplicit.h
   trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
   trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
partially working early termination. Faster, but not enough.

Modified: trunk/Core/Math/SSEDefs.h
==============================================================================
--- trunk/Core/Math/SSEDefs.h   (original)
+++ trunk/Core/Math/SSEDefs.h   Thu Jun  8 14:12:19 2006
@@ -246,6 +246,26 @@
         _mm_store_ps(f,t);
         return Vector(f[2], f[1], f[0]);
     }
+    
+    inline int count_nonzeros(sse_t t)
+    {
+        int mask = _mm_movemask_ps(t);
+        int nonzeros = 0;
+        #pragma unroll(4)
+        for(int i=0; i<4; i++)
+            nonzeros += (mask & (1 << (i<<2))) ? 1 : 0;
+        return nonzeros;
+    }
+    
+    inline int count_zeros(sse_t t)
+    {
+        int mask = _mm_movemask_ps(t);
+        int zeros = 0;
+        #pragma unroll(4)
+        for(int i=0; i<4; i++)
+            zeros += (mask & (1 << (i<<2))) ? 0 : 1;
+        return zeros;
+    }
 
 };
 

Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Thu Jun  8 14:12:19 2006
@@ -80,13 +80,17 @@
   };
   
 #ifdef MANTA_SSE
+  //A wrapper structure for a typical SSE/packet traversal, such as a BVH
   struct MANTA_ALIGN(16) SSERayPacket
   {
+    sse_t activeMask[RayPacketData::SSE_MaxSize];
+    int activeRays;
     sse_t* orig[3];
     sse_t* dir[3];
     sse_t* inv_dir[3];
     sse_t* normal[3];
     sse_t* minT;
+    RayPacket* rp;
   };
 #endif  
 

Modified: trunk/Model/Intersections/IsosurfaceImplicit.cc
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.cc     (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.cc     Thu Jun  8 14:12:19 
2006
@@ -119,7 +119,7 @@
 //SSE packet implementation
 //Based on Marmitt et al. 04, Wald 05 SSE intersections (OpenRT)
 //  as well as Knoll DynRT-vol implementation
-void IsosurfaceImplicit::sse_intersect(RayPacket& rays, SSERayPacket& srp, 
+void IsosurfaceImplicit::sse_intersect(SSERayPacket& srp, 
             char first, char last, const Vector& pmin, const Vector& pmax, 
float rho[2][2][2], 
             float isovalue, sse_t tenter[], sse_t texit[], sse_t hitmask[],
             const Manta::Primitive* prim, const Manta::Material* matl)
@@ -191,14 +191,17 @@
         const sse_t denom = accurateReciprocal(sub4(D0,D1));
         sse_t t = add4(t0, mul4(mul4(D0,denom), sub4(t1,t0)));
         sse_t hit_t = add4(tenter[smd], mul4(t, sub4(texit[smd], 
tenter[smd])));
-            
+        
+        //the mask should only include rays that are active
+        sse_thisvoxelmask = and4(sse_thisvoxelmask, srp.activeMask[smd]);
+        
         sse_thisvoxelmask = and4(sse_thisvoxelmask, 
cmp4_lt(hit_t,srp.minT[smd]));
         srp.minT[smd] = mask4(sse_thisvoxelmask, hit_t, srp.minT[smd]);
         int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
         if (int_thisvoxelmask)
         {
             sse_t normal[3];
-            sse_normal(rays, srp, smd, normal, pmin, pmax, rho);
+            sse_normal(srp, smd, normal, pmin, pmax, rho);
             #pragma unroll(3)
             for(int axis=0; axis<3; axis++)
                 srp.normal[axis][smd] = mask4(sse_thisvoxelmask, 
normal[axis], srp.normal[axis][smd]);
@@ -209,15 +212,23 @@
                 if (int_thisvoxelmask & (1<<ray))
                 {
                     int realray=(smd<<2)+ray;
-                    rays.data->hitMatl[realray] = matl;
-                    rays.data->hitPrim[realray] = prim;
+                    srp.rp->data->hitMatl[realray] = matl;
+                    srp.rp->data->hitPrim[realray] = prim;
                 }
             }
+            
+            //int nonzeros = count_nonzeros(sse_thisvoxelmask);
+            //cerr << "nonzeros=" << nonzeros << endl;
+            
+            srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
+            
+            //active rays in this smd are ones that were active before, and 
did NOT intersect.
+            srp.activeMask[smd] = andnot4(sse_thisvoxelmask, 
srp.activeMask[smd]);
         }
     }
 }
             
-void IsosurfaceImplicit::sse_normal(RayPacket &ray, SSERayPacket& srp, int 
smd, 
+void IsosurfaceImplicit::sse_normal(SSERayPacket& srp, int smd, 
             sse_t normal[], const Vector& pmin, const Vector& pmax,
             const float rho[2][2][2])
 {

Modified: trunk/Model/Intersections/IsosurfaceImplicit.h
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.h      (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.h      Thu Jun  8 14:12:19 
2006
@@ -26,12 +26,12 @@
         //TODO - non-SSE packet intersection              
                       
 #ifdef MANTA_SSE
-        static void sse_intersect(RayPacket& rays, SSERayPacket& srp, 
+        static void sse_intersect(SSERayPacket& srp, 
                     char first, char last, const Vector& pmin, const Vector& 
pmax, float rho[2][2][2], 
                     float isovalue, sse_t tenter[], sse_t texit[], sse_t 
hitmask[],
                     const Manta::Primitive* prim, const Manta::Material* 
matl);
                     
-        static void sse_normal(RayPacket &ray, SSERayPacket& srp, int smd, 
+        static void sse_normal(SSERayPacket& srp, int smd, 
                     sse_t normal[], const Vector& pmin, const Vector& pmax,
                     const float rho[2][2][2]);
                     

Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    Thu Jun  8 14:12:19 
2006
@@ -759,7 +759,6 @@
 
     //intersect the global bounding box: find first, last
     //  this will require a special-case AABB intersection
-
     #pragma unroll(3)
     for(int axis=0; axis<3; axis++)
     {
@@ -768,12 +767,15 @@
         srp.inv_dir[axis] = (sse_t*)(data->inverseDirection[axis]);
         srp.normal[axis] = (sse_t*)(data->normal[axis]);
     }
+    srp.rp = &rays;
     srp.minT = (sse_t*)(data->minT);
-    
-    int first = RayPacket::SSE_MaxSize;
-    int last = -1;
+    const int sse_begin = rays.begin() >> 2; // equivalent to 
Floor(rays.begin()/4)
+    const int sse_end = ((rays.end()-1+3) >> 2); // Ceil(rays.end()-1/4)
+    srp.activeRays = (sse_end - sse_begin) << 2;
+    char first = RayPacket::SSE_MaxSize;
+    char last = -1;
     #pragma unroll(RayPacket::SSE_MaxSize)    
-    for(int smd=0; smd<RayPacket::SSE_MaxSize; smd++)
+    for(int smd=sse_begin; smd<sse_end; smd++)
     {
         sse_t dgt0[3];
         sse_t tnear[3];
@@ -798,11 +800,13 @@
         sse_t tenter_unpadded = max4(max4(tnear_unpadded[0], 
tnear_unpadded[1]), tnear_unpadded[2]);
         sse_t texit_unpadded = min4(min4(tfar_unpadded[0], 
tfar_unpadded[1]), tfar_unpadded[2]);
         
-        if (_mm_movemask_ps(cmp4_lt(tenter_unpadded, texit_unpadded))==0)   
//if none hit
-            continue;
-            
-        first = MIN(first, smd);
-        last = smd;
+        srp.activeMask[smd] = cmp4_lt(tenter_unpadded, texit_unpadded);
+        if (_mm_movemask_ps(srp.activeMask[smd]))   //if any active
+        {
+            first = MIN(first, smd);
+            last = smd;
+        }
+        srp.activeRays -= count_zeros(srp.activeMask[smd]);
     }
     
     if (first > last)
@@ -812,10 +816,10 @@
        
     unsigned int index_trace[octdata->get_max_depth() + 1];
     Vec3i cell(0,0,0);
-    bvh_octnode(rays, srp, first, last, cell, octdata->get_cap_depth(), 0, 
0, index_trace);
+    bvh_octnode(srp, first, last, cell, octdata->get_cap_depth(), 0, 0, 
index_trace);
 }
 
-void IsosurfaceOctreeVolume::bvh_octnode(RayPacket& rays, SSERayPacket& srp, 
char first, char last, 
+void IsosurfaceOctreeVolume::bvh_octnode(SSERayPacket& srp, char first, char 
last, 
             const Vec3i& cell, int stop_depth, int depth, unsigned int 
index, unsigned int index_trace[]) const
 {
     //cerr << "octnode " << (int)depth << ", " << index << "; first=" << 
(int)first << ",last=" << (int)last << endl;
@@ -829,7 +833,7 @@
     for(int midplane_x=0; midplane_x!=2; midplane_x++)
     {
         int target_x;
-        if (midplane_x - rays.getSign(0,0))
+        if (midplane_x - srp.rp->getSign(0,0))
         {
             target_x = 4;
             child_cell.data[0] = cell.data[0] | child_bit;
@@ -843,7 +847,7 @@
         for(int midplane_y=0; midplane_y!=2; midplane_y++)
         {
             int target_xy;
-            if (midplane_y - rays.getSign(0,1))
+            if (midplane_y - srp.rp->getSign(0,1))
             {
                 target_xy = target_x | 2;
                 child_cell.data[1] = cell.data[1] | child_bit;
@@ -857,7 +861,7 @@
             for(int midplane_z=0; midplane_z!=2; midplane_z++)
             {
                 int target_child;
-                if (midplane_z - rays.getSign(0,2))
+                if (midplane_z - srp.rp->getSign(0,2))
                 {
                     target_child = target_xy | 1;
                     child_cell.data[2] = cell.data[2] | child_bit;
@@ -878,32 +882,37 @@
                 {
                     if (node.offsets[target_child]==-1)
                     {
-                        bvh_octleaf(rays, srp, newfirst, last, child_cell, 
stop_depth, depth, node.values[target_child], index_trace);
+                        bvh_octleaf(srp, newfirst, last, child_cell, 
stop_depth, depth, node.values[target_child], index_trace);
                     }
                     else
                     {
                         unsigned int child_idx = node.children_start + 
node.offsets[target_child];
                         if (depth == octdata->get_pre_cap_depth())     //cap
                         {
-                            bvh_octcap(rays, srp, newfirst, last, 
child_cell, stop_depth, depth+1, child_idx, index_trace);
+                            bvh_octcap(srp, newfirst, last, child_cell, 
stop_depth, depth+1, child_idx, index_trace);
                         }
                         else
                         {
-                            bvh_octnode(rays, srp, newfirst, last, 
child_cell, stop_depth, depth+1, child_idx, index_trace);
+                            bvh_octnode(srp, newfirst, last, child_cell, 
stop_depth, depth+1, child_idx, index_trace);
                         }
                     }
                 }
+                if (srp.activeRays==0)
+                {
+                    //cerr << "early termination" << endl;
+                    return;
+                }
             }
         }
     }
 }
 
-void IsosurfaceOctreeVolume::bvh_octleaf(RayPacket& rays, SSERayPacket& srp, 
char first, char last, 
+void IsosurfaceOctreeVolume::bvh_octleaf(SSERayPacket& srp, char first, char 
last, 
             const Vec3i& cell, int stop_depth, int depth, ST value, unsigned 
int index_trace[]) const
 {
 }
 
-void IsosurfaceOctreeVolume::bvh_octcap(RayPacket& rays, SSERayPacket& srp, 
char first, char last,
+void IsosurfaceOctreeVolume::bvh_octcap(SSERayPacket& srp, char first, char 
last,
             const Vec3i& cell, int stop_depth, int depth, unsigned int 
index, unsigned int index_trace[]) const
 {
     //cerr << "octcap " << index << ", first=" << (int)first << ",last=" << 
(int)last << endl;
@@ -916,7 +925,7 @@
     for(int midplane_x=0; midplane_x<2; midplane_x++)
     {
         int target_x;
-        if (midplane_x - rays.getSign(0,0))
+        if (midplane_x - srp.rp->getSign(0,0))
         {
             target_x = 4;
             child_cell.data[0] = cell.data[0] | 1;
@@ -930,7 +939,7 @@
         for(int midplane_y=0; midplane_y<2; midplane_y++)
         {
             int target_xy;
-            if (midplane_y - rays.getSign(0,1))
+            if (midplane_y - srp.rp->getSign(0,1))
             {
                 target_xy = target_x | 2;
                 child_cell.data[1] = cell.data[1] | 1;
@@ -944,7 +953,7 @@
             for(int midplane_z=0; midplane_z<2; midplane_z++)
             {
                 int target_child;
-                if (midplane_z - rays.getSign(0,2))
+                if (midplane_z - srp.rp->getSign(0,2))
                 {
                     target_child = target_xy | 1;
                     child_cell.data[2] = cell.data[2] | 1;
@@ -962,7 +971,7 @@
                 Vector cmax(child_cell.data[0]+1, child_cell.data[1]+1, 
child_cell.data[2]+1);
                 char newfirst, newlast;
                 intersect_cap_octant(srp, first, last, newfirst, newlast, 
cmin, cmax, child_tenter, child_texit, hitmask);
-                
+#if 1
                 if (newfirst > newlast)
                     continue;
                     
@@ -995,9 +1004,12 @@
                 if (octdata->get_isovalue() >= min_rho && 
octdata->get_isovalue() <= max_rho)
                 {
                     //cerr << "in cap " << (unsigned long)(&cap) << ", 
octant " << target_child << endl;
-                    IsosurfaceImplicit::sse_intersect(rays, srp, newfirst, 
newlast, cmin, cmax, rho, 
+                    IsosurfaceImplicit::sse_intersect(srp, newfirst, 
newlast, cmin, cmax, rho, 
                         octdata->get_isovalue(), child_tenter, child_texit, 
hitmask, this, PrimitiveCommon::getMaterial());
+                    if (srp.activeRays==0)
+                        return;
                 }
+#endif                
             }
         }
     }

Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h     (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h     Thu Jun  8 14:12:19 
2006
@@ -56,15 +56,15 @@
 #ifdef MANTA_SSE
                        void packet_intersect_implicit_bvh(RayPacket& rays) 
const;
             
-            void bvh_octnode(RayPacket& rays, SSERayPacket& srp, char first, 
char last,
+            void bvh_octnode(SSERayPacket& srp, char first, char last,
                             const Vec3i& cell, int stop_depth, int depth, 
unsigned int index, 
                             unsigned int index_trace[]) const;
                             
-            void bvh_octleaf(RayPacket& rays, SSERayPacket& srp, char first, 
char last,
+            void bvh_octleaf(SSERayPacket& srp, char first, char last,
                                         const Vec3i& cell, int stop_depth, 
int depth, ST value, 
                                         unsigned int index_trace[]) const;
             
-            void bvh_octcap(RayPacket& rays, SSERayPacket& srp, char first, 
char last,
+            void bvh_octcap(SSERayPacket& srp, char first, char last,
                 const Vec3i& cell, int stop_depth, int depth, unsigned int 
index, 
                 unsigned int index_trace[]) const;
             
@@ -130,7 +130,39 @@
                     {
                         newfirst = MIN(newfirst, smd);
                         newlast = smd;
-                    }    
+                    } 
+                    
+#if 0
+                    sse_t hitmask2 = and4(hitmask[smd], cmp4_lt(tenter[smd], 
srp.minT[smd]));
+                    srp.minT[smd] = mask4(hitmask2, tenter[smd], 
srp.minT[smd]);
+                    if (_mm_movemask_ps(hitmask2))
+                    {
+                        sse_t normal[3];
+                        for(int axis=0; axis<3; axis++)
+                        {
+                            normal[axis] = mask4(cmp4_eq(tenter[smd], 
tnear[axis]), mask4(dgt0[axis], set4(-1.0f), _mm_one), zero4());
+                            srp.normal[axis][smd] = mask4(hitmask2, 
normal[axis], srp.normal[axis][smd]);
+                        }
+                        
+                        #pragma unroll(3)
+                        for(int axis=0; axis<3; axis++)
+                            srp.normal[axis][smd] = mask4(hitmask2, 
normal[axis], srp.normal[axis][smd]);
+                            
+                        int int_hitmask2 = _mm_movemask_ps(hitmask2);
+                            
+                        #pragma unroll(4);
+                        for(int ray=0; ray<4; ray++)
+                        {
+                            if (int_hitmask2 & (1<<ray))
+                            {
+                                int realray=(smd<<2)+ray;
+                                srp.rp->data->hitMatl[realray] = 
PrimitiveCommon::getMaterial();
+                                srp.rp->data->hitPrim[realray] = this;
+                            }
+                        }
+                    }
+
+#endif
                 }
             }                 
 #endif                                           




  • [MANTA] r1105 - in trunk: Core/Math Interface Model/Intersections Model/Primitives, knolla, 06/08/2006

Archive powered by MHonArc 2.6.16.

Top of page