Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1109 - in trunk/Model: Intersections Primitives


Chronological Thread 
  • From: knolla@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1109 - in trunk/Model: Intersections Primitives
  • Date: Fri, 9 Jun 2006 07:55:40 -0600 (MDT)

Author: knolla
Date: Fri Jun  9 07:55:37 2006
New Revision: 1109

Modified:
   trunk/Model/Intersections/IsosurfaceImplicit.cc
   trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
   trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
Implemented leaf node traversal for SSE octree isosurface volumes.

Modified: trunk/Model/Intersections/IsosurfaceImplicit.cc
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.cc     (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.cc     Fri Jun  9 07:55:37 
2006
@@ -164,9 +164,7 @@
         int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
         
         if (int_thisvoxelmask == 0)    //if none of them hit, don't bother 
iterating any more
-        {
             continue;
-        }
                     
         #define NEUBAUER_ITERATIONS 3                    
         #pragma unroll(NEUBAUER_ITERATIONS)
@@ -193,8 +191,8 @@
         sse_t hit_t = add4(tenter[smd], mul4(t, sub4(texit[smd], 
tenter[smd])));
         
         //the mask should only include rays that are active
+        sse_thisvoxelmask = and4(sse_thisvoxelmask, cmp4_ge(hit_t, zero4()));
         sse_thisvoxelmask = and4(sse_thisvoxelmask, srp.activeMask[smd]);
-        
         sse_thisvoxelmask = and4(sse_thisvoxelmask, 
cmp4_lt(hit_t,srp.minT[smd]));
         srp.minT[smd] = mask4(sse_thisvoxelmask, hit_t, srp.minT[smd]);
         int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
@@ -207,11 +205,12 @@
                 srp.normal[axis][smd] = mask4(sse_thisvoxelmask, 
normal[axis], srp.normal[axis][smd]);
                 
             #pragma unroll(4);
+            int sse_ray = smd << 2;
             for(int ray=0; ray<4; ray++)
             {
                 if (int_thisvoxelmask & (1<<ray))
                 {
-                    int realray=(smd<<2)+ray;
+                    int realray=sse_ray+ray;
                     srp.rp->data->hitMatl[realray] = matl;
                     srp.rp->data->hitPrim[realray] = prim;
                 }
@@ -220,7 +219,7 @@
             //int nonzeros = count_nonzeros(sse_thisvoxelmask);
             //cerr << "nonzeros=" << nonzeros << endl;
             
-            srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
+            //srp.activeRays -= count_nonzeros(sse_thisvoxelmask);
             
             //active rays in this smd are ones that were active before, and 
did NOT intersect.
             srp.activeMask[smd] = andnot4(sse_thisvoxelmask, 
srp.activeMask[smd]);

Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    Fri Jun  9 07:55:37 
2006
@@ -770,44 +770,55 @@
     }
     srp.rp = &rays;
     srp.minT = (sse_t*)(data->minT);
+    srp.activeRays = 0;
     const int sse_begin = rays.begin() >> 2; // equivalent to 
Floor(rays.begin()/4)
     const int sse_end = ((rays.end()-1+3) >> 2); // Ceil(rays.end()-1/4)
-    srp.activeRays = (sse_end - sse_begin) << 2;
+    
     char first = RayPacket::SSE_MaxSize;
     char last = -1;
-    #pragma unroll(RayPacket::SSE_MaxSize)    
+    sse_t octdims[3];
+    for(int axis=0; axis<3; axis++)
+        octdims[axis] = set4(octdata->dims[axis]);
+        
     for(int smd=sse_begin; smd<sse_end; smd++)
-    {
-        sse_t dgt0[3];
-        sse_t tnear[3];
-        sse_t tfar[3];
-        sse_t tnear_unpadded[3];
-        sse_t tfar_unpadded[3];
-            
-        #pragma unroll(3)
-        for(int axis=0; axis<3; axis++)
-        {
-            dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());
-            sse_t t0 = mul4(sub4(zero4(), srp.orig[axis][smd]), 
srp.inv_dir[axis][smd]);
-            sse_t t1 = mul4(sub4(set4(octdata->dims[axis]), 
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
-            sse_t t1p = mul4(sub4(set4(octdata->padded_dims[axis]), 
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
-
-            tnear_unpadded[axis] = mask4(dgt0[axis], t0, t1);
-            tfar_unpadded[axis] = mask4(dgt0[axis], t1, t0);
-            tnear[axis] = mask4(dgt0[axis], t0, t1p);
-            tfar[axis] = mask4(dgt0[axis], t1p, t0);
-        }
+    {    
+        sse_t t0 = zero4();
+        sse_t t1 = srp.minT[smd];
         
-        sse_t tenter_unpadded = max4(max4(tnear_unpadded[0], 
tnear_unpadded[1]), tnear_unpadded[2]);
-        sse_t texit_unpadded = min4(min4(tfar_unpadded[0], 
tfar_unpadded[1]), tfar_unpadded[2]);
+        sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+        const sse_t b0_x = mask4(signs, zero4(), octdims[0]);
+        const sse_t b1_x = mask4(signs, octdims[0], zero4());
+        signs = cmp4_ge(srp.dir[1][smd],zero4());
+        const sse_t b0_y = mask4(signs, zero4(), octdims[1]);
+        const sse_t b1_y = mask4(signs, octdims[1], zero4());
+        signs = cmp4_ge(srp.dir[2][smd],zero4());
+        const sse_t b0_z = mask4(signs, zero4(), octdims[2]);
+        const sse_t b1_z = mask4(signs, octdims[2], zero4());
         
-        srp.activeMask[smd] = cmp4_lt(tenter_unpadded, texit_unpadded);
-        if (_mm_movemask_ps(srp.activeMask[smd]))   //if any active
+        const sse_t tBoxNearX = mul4(sub4(b0_x, srp.orig[0][smd]), 
srp.inv_dir[0][smd]);
+        const sse_t tBoxNearY = mul4(sub4(b0_y, srp.orig[1][smd]), 
srp.inv_dir[1][smd]);
+        const sse_t tBoxNearZ = mul4(sub4(b0_z, srp.orig[2][smd]), 
srp.inv_dir[2][smd]);
+
+        t0 = max4(t0,tBoxNearX);
+        t0 = max4(t0,tBoxNearY);
+        t0 = max4(t0,tBoxNearZ);
+
+        const sse_t tBoxFarX = mul4(sub4(b1_x, srp.orig[0][smd]), 
srp.inv_dir[0][smd]);
+        const sse_t tBoxFarY = mul4(sub4(b1_y, srp.orig[1][smd]), 
srp.inv_dir[1][smd]);
+        const sse_t tBoxFarZ = mul4(sub4(b1_z, srp.orig[2][smd]), 
srp.inv_dir[2][smd]);
+
+        t1 = min4(t1,tBoxFarX);
+        t1 = min4(t1,tBoxFarY);
+        t1 = min4(t1,tBoxFarZ);
+        
+        srp.activeMask[smd] = cmp4_le(t0,t1);
+        if (_mm_movemask_ps(srp.activeMask[smd]))    //if any hit
         {
             first = MIN(first, smd);
             last = smd;
         }
-        srp.activeRays -= count_zeros(srp.activeMask[smd]);
+        
+        srp.activeRays += count_nonzeros(srp.activeMask[smd]);    
     }
     
     if (first > last)
@@ -825,7 +836,7 @@
 {
     //cerr << "octnode " << (int)depth << ", " << index << "; first=" << 
(int)first << ",last=" << (int)last << endl;
     OctNode& node = octdata->get_node(depth, index);
-    Vec3i child_cell = cell;
+    Vec3i child_cell;
     int child_bit = octdata->get_child_bit_depth(depth);
     index_trace[depth] = index;
     int smd_first = first << 2;
@@ -876,9 +887,9 @@
                 
                 if (octdata->get_isovalue() >= node.mins[target_child] && 
octdata->get_isovalue() <= node.maxs[target_child])
                 {
-                    Vector pmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
-                    Vector pmax(child_cell.data[0]+child_bit, 
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
-                    char newfirst = first_intersects_node_octant(srp, first, 
last, pmin, pmax);
+                    Vector cmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
+                    Vector cmax(child_cell.data[0]+child_bit, 
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
+                    char newfirst = first_intersects(srp, first, last, cmin, 
cmax);
                     
                     //cerr << "newfirst=" << (int)newfirst << ", last=" << 
(int)last << endl;
                     
@@ -886,7 +897,8 @@
                     {
                         if (node.offsets[target_child]==-1)
                         {
-                            bvh_octleaf(srp, newfirst, last, child_cell, 
stop_depth, depth, node.values[target_child], index_trace);
+                            bvh_octleaf(srp, newfirst, last, child_cell, 
stop_depth, depth+1, 
+                                depth, node.values[target_child], 
child_cell, index_trace);
                         }
                         else
                         {
@@ -900,12 +912,10 @@
                                 bvh_octnode(srp, newfirst, last, child_cell, 
stop_depth, depth+1, child_idx, index_trace);
                             }
                         }
+                        if (srp.activeRays==0)
+                            return;
                     }
-                    if (srp.activeRays==0)
-                    {
-                        //cerr << "early termination" << endl;
-                        return;
-                    }
+
                 }
             }
         }
@@ -913,8 +923,207 @@
 }
 
 void IsosurfaceOctreeVolume::bvh_octleaf(SSERayPacket& srp, char first, char 
last, 
-            const Vec3i& cell, int stop_depth, int depth, ST value, unsigned 
int index_trace[]) const
+            const Vec3i& cell, int stop_depth, int depth, 
+            int leaf_depth, ST leaf_value, const Vec3i& leaf_base_cell,
+            unsigned int index_trace[]) const
 {
+    int child_bit = octdata->get_child_bit_depth(depth);
+    int unsafe_zone = octdata->get_child_bit_depth(depth-1) - 
octdata->get_child_bit_depth(octdata->get_cap_depth());
+    int smd_first = first << 2;
+    Vec3i child_cell;
+
+    //intersect all children in order
+    #pragma unroll(2)
+    for(int midplane_x=0; midplane_x!=2; midplane_x++)
+    {
+        int target_x;
+        if (midplane_x - srp.rp->getSign(smd_first,0))
+        {
+            target_x = 4;
+            child_cell.data[0] = cell.data[0] | child_bit;
+        }
+        else
+        {
+            target_x = 0;
+            child_cell.data[0] = cell.data[0];
+        }
+        #pragma unroll(2)
+        for(int midplane_y=0; midplane_y!=2; midplane_y++)
+        {
+            int target_xy;
+            if (midplane_y - srp.rp->getSign(smd_first,1))
+            {
+                target_xy = target_x | 2;
+                child_cell.data[1] = cell.data[1] | child_bit;
+            }
+            else
+            {
+                target_xy = target_x;
+                child_cell.data[1] = cell.data[1];
+            }
+            #pragma unroll(2)
+            for(int midplane_z=0; midplane_z!=2; midplane_z++)
+            {
+                int target_child;
+                if (midplane_z - srp.rp->getSign(smd_first,2))
+                {
+                    target_child = target_xy | 1;
+                    child_cell.data[2] = cell.data[2] | child_bit;
+                }
+                else
+                {
+                    target_child = target_xy;
+                    child_cell.data[2] = cell.data[2];
+                }
+                
+                Vec3i local_child_cell = child_cell - leaf_base_cell;
+                if (local_child_cell.data[0] & unsafe_zone || 
local_child_cell.data[1] & unsafe_zone || local_child_cell.data[2] & 
unsafe_zone)
+                {
+                    if (depth == stop_depth)
+                    {
+                        sse_t child_tenter[RayPacket::SSE_MaxSize];
+                        sse_t child_texit[RayPacket::SSE_MaxSize];
+                        sse_t hitmask[RayPacket::SSE_MaxSize];
+                        Vector cmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
+                        Vector cmax(child_cell.data[0] + child_bit, 
child_cell.data[1] + child_bit, child_cell.data[2] + child_bit);
+                        char newfirst, newlast;
+                        intersect_cap_octant(srp, first, last, newfirst, 
newlast, cmin, cmax, child_tenter, child_texit, hitmask);
+
+                        if (newfirst > newlast)
+                            continue;
+#ifdef USE_OCTREE_DATA
+                        //use octree data
+                        float rho[2][2][2];
+                        ST min_rho, max_rho, this_rho;
+                        min_rho = max_rho = this_rho = leaf_value;
+                        rho[0][0][0] = static_cast<float>(this_rho);
+                        Vec3i offset(0,0,child_bit);
+                                                        
+                        //0,0,1
+                        if (target_child & 1)
+                        {
+                            this_rho = 
octdata->lookup_neighbor<0,0,1>(child_cell, offset, stop_depth, leaf_depth, 
index_trace);
+                            min_rho = MIN(min_rho, this_rho);
+                            max_rho = MAX(max_rho, this_rho);
+                        }
+                        else
+                            this_rho = leaf_value;
+                        rho[0][0][1] = static_cast<float>(this_rho);
+                
+                        //0,1,1
+                        offset.data[1] = child_bit;
+                        if (target_child & 3)
+                        {
+                            this_rho = 
octdata->lookup_neighbor<0,1,1>(child_cell, offset, stop_depth, leaf_depth, 
index_trace);
+                            min_rho = MIN(min_rho, this_rho);
+                            max_rho = MAX(max_rho, this_rho);
+                        }
+                        else
+                            this_rho = leaf_value;
+                        rho[0][1][1] = static_cast<float>(this_rho);
+                        
+                        //1,1,1
+                        offset.data[0] = child_bit;
+                        if (target_child & 7)
+                        {
+                            this_rho = 
octdata->lookup_neighbor<1,1,1>(child_cell, offset, stop_depth, leaf_depth, 
index_trace);
+                            min_rho = MIN(min_rho, this_rho);
+                            max_rho = MAX(max_rho, this_rho);
+                        }
+                        else
+                            this_rho = leaf_value;
+                        rho[1][1][1] = static_cast<float>(this_rho);   
+                        
+                        //1,1,0
+                        offset.data[2] = 0;
+                        if (target_child & 6)
+                        {
+                            this_rho = 
octdata->lookup_neighbor<1,1,0>(child_cell, offset, stop_depth, leaf_depth, 
index_trace);
+                            min_rho = MIN(min_rho, this_rho);
+                            max_rho = MAX(max_rho, this_rho);
+                        }
+                        else
+                            this_rho = leaf_value;
+                        rho[1][1][0] = static_cast<float>(this_rho);
+                        
+                        //1,0,0
+                        offset.data[1] = 0;
+                        if (target_child & 4)
+                        {
+                            this_rho = 
octdata->lookup_neighbor<1,0,0>(child_cell, offset, stop_depth, leaf_depth, 
index_trace);
+                            min_rho = MIN(min_rho, this_rho);
+                            max_rho = MAX(max_rho, this_rho);
+                        }
+                        else
+                            this_rho = leaf_value;     
+                        rho[1][0][0] = static_cast<float>(this_rho);
+                        
+                        //1,0,1
+                        offset.data[2] = child_bit;
+                        if (target_child & 5)
+                        {
+                            this_rho = 
octdata->lookup_neighbor<1,0,1>(child_cell, offset, stop_depth, leaf_depth, 
index_trace);
+                            min_rho = MIN(min_rho, this_rho);
+                            max_rho = MAX(max_rho, this_rho);
+                        }
+                        else
+                            this_rho = leaf_value;
+                        rho[1][0][1] = static_cast<float>(this_rho);   
+                    
+                        //0,1,0
+                        offset.data[0] = 0;
+                        offset.data[1] = child_bit;
+                        offset.data[2] = 0;
+                        if (target_child & 2)
+                        {
+                            this_rho = 
octdata->lookup_neighbor<0,1,0>(child_cell, offset, stop_depth, leaf_depth, 
index_trace);
+                            min_rho = MIN(min_rho, this_rho);
+                            max_rho = MAX(max_rho, this_rho);
+                        }
+                        else
+                            this_rho = leaf_value;
+                        rho[0][1][0] = static_cast<float>(this_rho);         
          
+#else  
+                        //use original grid data
+                        float rho[2][2][2];
+                        ST min_rho, max_rho;
+#define MYDATA octdata->indata //toggle this to octdata if you want to test 
pure point location (no neighbor finding)
+                        min_rho = max_rho = lookup_safe(MYDATA, 
child_cell.data[0], child_cell.data[1], child_cell.data[2]);
+                        rho[0][0][0] = static_cast<float>(min_rho);
+                        for(int c=1; c<8; c++)
+                        {
+                            Vec3i offset((c&4)!=0, (c&2)!=0, c&1);
+                            Vec3i neighboridx = child_cell + offset;
+                            ST this_rho = lookup_safe(MYDATA, 
neighboridx.data[0], neighboridx.data[1], neighboridx.data[2]);
+                            
rho[offset.data[0]][offset.data[1]][offset.data[2]] = 
static_cast<float>(this_rho);
+                            min_rho = MIN(this_rho, min_rho);
+                            max_rho = MAX(this_rho, max_rho);
+                        }
+#endif
+
+                        if (octdata->get_isovalue() >= min_rho && 
octdata->get_isovalue() <= max_rho)
+                        {                        
+                            IsosurfaceImplicit::sse_intersect(srp, newfirst, 
newlast, cmin, cmax, rho, 
+                                octdata->get_isovalue(), child_tenter, 
child_texit, hitmask, this, PrimitiveCommon::getMaterial());
+                        }
+                    }
+                    else    //not at stop depth
+                    {
+                        Vector cmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
+                        Vector cmax(child_cell.data[0]+child_bit, 
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
+                        char newfirst = first_intersects(srp, first, last, 
cmin, cmax);
+                    
+                        bvh_octleaf(srp, newfirst, last, child_cell, 
stop_depth, depth+1, 
+                            leaf_depth, leaf_value, leaf_base_cell, 
index_trace);
+                    }
+                    if (srp.activeRays==0)
+                        return;
+                }
+            }
+        }
+    }
+
+
 }
 
 void IsosurfaceOctreeVolume::bvh_octcap(SSERayPacket& srp, char first, char 
last,
@@ -922,7 +1131,7 @@
 {
     //cerr << "octcap " << index << ", first=" << (int)first << ",last=" << 
(int)last << endl;
     OctCap& cap = octdata->get_cap(index);
-    Vec3i child_cell = cell;
+    Vec3i child_cell;
     index_trace[depth] = index;
     int smd_first = first << 2;
     

Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h     (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h     Fri Jun  9 07:55:37 
2006
@@ -60,15 +60,16 @@
                             const Vec3i& cell, int stop_depth, int depth, 
unsigned int index, 
                             unsigned int index_trace[]) const;
                             
-            void bvh_octleaf(SSERayPacket& srp, char first, char last,
-                                        const Vec3i& cell, int stop_depth, 
int depth, ST value, 
-                                        unsigned int index_trace[]) const;
+            void bvh_octleaf(SSERayPacket& srp, char first, char last, 
+                            const Vec3i& cell, int stop_depth, int depth, 
+                            int leaf_depth, ST leaf_value, const Vec3i& 
leaf_base_cell,
+                            unsigned int index_trace[]) const;
             
             void bvh_octcap(SSERayPacket& srp, char first, char last,
                 const Vec3i& cell, int stop_depth, int depth, unsigned int 
index, 
                 unsigned int index_trace[]) const;
             
-            inline char first_intersects_node_octant(SSERayPacket& srp, char 
first, char last, 
+            inline char first_intersects(SSERayPacket& srp, char first, char 
last, 
                 const Vector& min, const Vector& max) const
             {
                 sse_t boxmin[3];
@@ -116,6 +117,55 @@
                 }
                 return last+1;
             }
+            
+            inline char last_intersects(SSERayPacket& srp, char first, char 
last, 
+                const Vector& min, const Vector& max) const
+            {
+                sse_t boxmin[3];
+                sse_t boxmax[3];
+                #pragma unroll(3)
+                for(int axis=0; axis<3; axis++)
+                {
+                    boxmin[axis] = set4(min[axis]);
+                    boxmax[axis] = set4(max[axis]);
+                }    
+            
+                for(char smd=last; smd>=first; smd--)
+                {
+                    sse_t t0 = zero4();
+                    sse_t t1 = srp.minT[smd];
+                    
+                    sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+                    const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
+                    const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
+                    signs = cmp4_ge(srp.dir[1][smd],zero4());
+                    const sse_t b0_y = mask4(signs, boxmin[1], boxmax[1]);
+                    const sse_t b1_y = mask4(signs, boxmax[1], boxmin[1]);
+                    signs = cmp4_ge(srp.dir[2][smd],zero4());
+                    const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
+                    const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
+                    
+                    const sse_t tBoxNearX = mul4(sub4(b0_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+                    const sse_t tBoxNearY = mul4(sub4(b0_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+                    const sse_t tBoxNearZ = mul4(sub4(b0_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+                    t0 = max4(t0,tBoxNearX);
+                    t0 = max4(t0,tBoxNearY);
+                    t0 = max4(t0,tBoxNearZ);
+
+                    const sse_t tBoxFarX = mul4(sub4(b1_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+                    const sse_t tBoxFarY = mul4(sub4(b1_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+                    const sse_t tBoxFarZ = mul4(sub4(b1_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+                    t1 = min4(t1,tBoxFarX);
+                    t1 = min4(t1,tBoxFarY);
+                    t1 = min4(t1,tBoxFarZ);
+                    
+                    if (_mm_movemask_ps(cmp4_le(t0,t1)))    //if any hit
+                        return smd;
+                }
+                return -1;
+            }            
             
             inline void intersect_cap_octant(SSERayPacket& srp, char first, 
char last, 
                     char& newfirst, char& newlast, const Vector& min, const 
Vector& max, 




  • [MANTA] r1109 - in trunk/Model: Intersections Primitives, knolla, 06/09/2006

Archive powered by MHonArc 2.6.16.

Top of page