Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1605 - trunk/Model/Primitives


Chronological Thread 
  • From: boulos@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1605 - trunk/Model/Primitives
  • Date: Fri, 3 Aug 2007 13:01:07 -0600 (MDT)

Author: boulos
Date: Fri Aug  3 13:01:07 2007
New Revision: 1605

Modified:
   trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
   trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
Switching the pragma unrolls to new MANTA_UNROLL


Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    Fri Aug  3 13:01:07 
2007
@@ -21,70 +21,70 @@
 // offset = Vec3i(0,0,1)
 // every other variable must be either locally or globall declared. See 
caller examples below.
 #define octvol_fill_cell(NODE, cw) \
-       if (target_child & 1) \
-               this_rho = octdata->lookup_neighbor<0,0,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
-       else \
-               this_rho = NODE.values[target_child | 1]; \
-       min_rho = MIN(min_rho, this_rho); \
-       max_rho = MAX(max_rho, this_rho); \
-       rho[0][0][1] = static_cast<float>(this_rho); \
-       offset.data[1] = cw; \
-       if (target_child & 3) \
-               this_rho = octdata->lookup_neighbor<0,1,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
-       else \
-               this_rho = NODE.values[target_child | 3]; \
-       min_rho = MIN(min_rho, this_rho); \
-       max_rho = MAX(max_rho, this_rho); \
-       rho[0][1][1] = static_cast<float>(this_rho); \
-       offset.data[0] = cw; \
-       if (target_child & 7) \
-               this_rho = octdata->lookup_neighbor<1,1,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
-       else \
-               this_rho = NODE.values[target_child | 7]; \
-       min_rho = MIN(min_rho, this_rho); \
-       max_rho = MAX(max_rho, this_rho); \
-       rho[1][1][1] = static_cast<float>(this_rho); \
-       offset.data[2] = 0; \
-       if (target_child & 6) \
-               this_rho = octdata->lookup_neighbor<1,1,0>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
-       else \
-               this_rho = NODE.values[target_child | 6]; \
-       min_rho = MIN(min_rho, this_rho); \
-       max_rho = MAX(max_rho, this_rho); \
-       rho[1][1][0] = static_cast<float>(this_rho); \
-       offset.data[1] = 0; \
-       if (target_child & 4) \
-               this_rho = octdata->lookup_neighbor<1,0,0>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
-       else \
-               this_rho = NODE.values[target_child | 4]; \
-       min_rho = MIN(min_rho, this_rho); \
-       max_rho = MAX(max_rho, this_rho); \
-       rho[1][0][0] = static_cast<float>(this_rho); \
-       offset.data[2] = cw; \
-       if (target_child & 5) \
-               this_rho = octdata->lookup_neighbor<1,0,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
-       else \
-               this_rho = NODE.values[target_child | 5]; \
-       min_rho = MIN(min_rho, this_rho); \
-       max_rho = MAX(max_rho, this_rho); \
-       rho[1][0][1] = static_cast<float>(this_rho); \
-       offset.data[0] = 0; \
-       offset.data[1] = cw; \
-       offset.data[2] = 0; \
-       if (target_child & 2) \
-               this_rho = octdata->lookup_neighbor<0,1,0>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
-       else \
-               this_rho = NODE.values[target_child | 2]; \
-       min_rho = MIN(min_rho, this_rho); \
-       max_rho = MAX(max_rho, this_rho); \
-       rho[0][1][0] = static_cast<float>(this_rho); \
-    
+        if (target_child & 1) \
+                this_rho = octdata->lookup_neighbor<0,0,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
+        else \
+                this_rho = NODE.values[target_child | 1]; \
+        min_rho = MIN(min_rho, this_rho); \
+        max_rho = MAX(max_rho, this_rho); \
+        rho[0][0][1] = static_cast<float>(this_rho); \
+        offset.data[1] = cw; \
+        if (target_child & 3) \
+                this_rho = octdata->lookup_neighbor<0,1,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
+        else \
+                this_rho = NODE.values[target_child | 3]; \
+        min_rho = MIN(min_rho, this_rho); \
+        max_rho = MAX(max_rho, this_rho); \
+        rho[0][1][1] = static_cast<float>(this_rho); \
+        offset.data[0] = cw; \
+        if (target_child & 7) \
+                this_rho = octdata->lookup_neighbor<1,1,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
+        else \
+                this_rho = NODE.values[target_child | 7]; \
+        min_rho = MIN(min_rho, this_rho); \
+        max_rho = MAX(max_rho, this_rho); \
+        rho[1][1][1] = static_cast<float>(this_rho); \
+        offset.data[2] = 0; \
+        if (target_child & 6) \
+                this_rho = octdata->lookup_neighbor<1,1,0>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
+        else \
+                this_rho = NODE.values[target_child | 6]; \
+        min_rho = MIN(min_rho, this_rho); \
+        max_rho = MAX(max_rho, this_rho); \
+        rho[1][1][0] = static_cast<float>(this_rho); \
+        offset.data[1] = 0; \
+        if (target_child & 4) \
+                this_rho = octdata->lookup_neighbor<1,0,0>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
+        else \
+                this_rho = NODE.values[target_child | 4]; \
+        min_rho = MIN(min_rho, this_rho); \
+        max_rho = MAX(max_rho, this_rho); \
+        rho[1][0][0] = static_cast<float>(this_rho); \
+        offset.data[2] = cw; \
+        if (target_child & 5) \
+                this_rho = octdata->lookup_neighbor<1,0,1>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
+        else \
+                this_rho = NODE.values[target_child | 5]; \
+        min_rho = MIN(min_rho, this_rho); \
+        max_rho = MAX(max_rho, this_rho); \
+        rho[1][0][1] = static_cast<float>(this_rho); \
+        offset.data[0] = 0; \
+        offset.data[1] = cw; \
+        offset.data[2] = 0; \
+        if (target_child & 2) \
+                this_rho = octdata->lookup_neighbor<0,1,0>(child_cell, 
offset, stop_depth, prev_depth, index_trace); \
+        else \
+                this_rho = NODE.values[target_child | 2]; \
+        min_rho = MIN(min_rho, this_rho); \
+        max_rho = MAX(max_rho, this_rho); \
+        rho[0][1][0] = static_cast<float>(this_rho); \
+
 
 static const int axis_table[] = {4, 2, 1};
-    
+
 using namespace Manta;
 
-IsosurfaceOctreeVolume::IsosurfaceOctreeVolume(OctreeVolume* _octdata, 
Material* _matl) 
+IsosurfaceOctreeVolume::IsosurfaceOctreeVolume(OctreeVolume* _octdata, 
Material* _matl)
 : PrimitiveCommon(_matl), octdata(_octdata)
 {
 }
@@ -106,7 +106,7 @@
 void IsosurfaceOctreeVolume::computeNormal(const RenderContext& context, 
RayPacket& rays) const
 {
 }
-            
+
 BBox IsosurfaceOctreeVolume::getBounds() const
 {
     return octdata->get_bounds();
@@ -116,11 +116,11 @@
 {
 //#ifdef MANTA_SSE
 #if 0
-       packet_intersect_sse(packet);
+        packet_intersect_sse(packet);
 #else
     for ( int i = packet.rayBegin; i < packet.rayEnd; i++ )
         single_intersect(packet, i);
-#endif         
+#endif
 }
 
 void IsosurfaceOctreeVolume::single_intersect(RayPacket& rays, int 
which_one) const
@@ -128,13 +128,13 @@
     Vector t0;
     Vector t1;
     Vector t1p;
-    
+
     Vector orig = rays.getOrigin(which_one);
     Vector dir = rays.getDirection(which_one);
     //Vector inv_dir = rays.getInverseDirection(which_one);
     Vector inv_dir = dir.inverse();
-        
-#pragma unroll(3)
+
+    MANTA_UNROLL(3);
     for(int axis=0; axis<3; axis++)
     {
         t0.data[axis] = -orig.data[axis] * inv_dir.data[axis];
@@ -152,9 +152,10 @@
     {
         tenter = t1.data[0];
         tenter_padded = t1p.data[0];
-        texit = texit_padded = t0.data[0];        
+        texit = texit_padded = t0.data[0];
     }
-#pragma unroll(2)
+
+    MANTA_UNROLL(2);
     for(int axis=1; axis<3; axis++)
     {
         float ft0, ft1, ft0p, ft1p;
@@ -170,55 +171,55 @@
             ft0 = t1.data[axis];
             ft1 = ft1p = t0.data[axis];
         }
-        
+
         tenter = MAX(tenter, ft0);
         texit = MIN(texit, ft1);
         tenter_padded = MAX(tenter_padded, ft0p);
         texit_padded = MIN(texit_padded, ft1p);
-        
+
         if (tenter > texit)
             return;
     }
-        
+
     if (texit < 0.f)
         return;
-        
+
     //tenter_padded = MAX(0.f, tenter_padded);
 
     unsigned int index_trace[octdata->get_max_depth()+1];
-       
-       int stop_depth = octdata->get_cap_depth() - 0;
 
-    Vec3i cell(0,0,0);    
-    single_traverse_node(rays, which_one, orig, dir, inv_dir, stop_depth, 
+        int stop_depth = octdata->get_cap_depth() - 0;
+
+    Vec3i cell(0,0,0);
+    single_traverse_node(rays, which_one, orig, dir, inv_dir, stop_depth,
                         0, 0, index_trace, cell, tenter_padded, 
texit_padded);
-}      
-       
+}
+
 bool IsosurfaceOctreeVolume::single_traverse_node(RayPacket& rays, int 
which_one,
-                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir, 
-                        int stop_depth, int depth, unsigned int node_index, 
-                        unsigned int index_trace[], Vec3i& cell, const float 
tenter, 
+                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir,
+                        int stop_depth, int depth, unsigned int node_index,
+                        unsigned int index_trace[], Vec3i& cell, const float 
tenter,
                         const float texit) const
 {
-    //cerr << "single_traverse_node, depth=" << depth << ", node_index=" << 
node_index << ", cell=" << cell.data[0] << "," << cell.data[1] << "," << 
cell.data[2] << endl; 
+    //cerr << "single_traverse_node, depth=" << depth << ", node_index=" << 
node_index << ", cell=" << cell.data[0] << "," << cell.data[1] << "," << 
cell.data[2] << endl;
     OctNode& node = octdata->get_node(depth, node_index);
-               
+
     index_trace[depth] = node_index;
-    
+
     int child_bit = octdata->get_child_bit_depth(depth);
-    Vector center(static_cast<float>(cell.data[0] | child_bit), 
+    Vector center(static_cast<float>(cell.data[0] | child_bit),
                 static_cast<float>(cell.data[1] | child_bit), 
static_cast<float>(cell.data[2] | child_bit));
     Vector tcenter = inv_dir * (center - orig);
-    
+
     Vector penter = orig + (dir*tenter);
-        
+
     Vec3i child_cell = cell;
-    Vec3i tc( penter.x() >= center.x(), penter.y() >= center.y(), penter.z() 
>= center.z() );          
+    Vec3i tc( penter.x() >= center.x(), penter.y() >= center.y(), penter.z() 
>= center.z() );
     int target_child = (tc.data[0] << 2) | (tc.data[1] << 1) | tc.data[2];
     child_cell.data[0] |= tc.data[0] ? child_bit : 0;
     child_cell.data[1] |= tc.data[1] ? child_bit : 0;
     child_cell.data[2] |= tc.data[2] ? child_bit : 0;
-            
+
     Vec3i axis_isects;
     if (tcenter.data[0] < tcenter.data[1] && tcenter.data[0] < 
tcenter.data[2]){
         axis_isects.data[0] = 0;
@@ -275,55 +276,55 @@
                 axis = -1;
             }
         }
-            
+
         if (octdata->get_isovalue() >= node.mins[target_child] && 
octdata->get_isovalue() <= node.maxs[target_child])
         {
-                       
+
 #ifdef OCTVOL_DYNAMIC_MULTIRES
-                       if (depth == stop_depth)
-                       {
-                               Vector cmin(child_cell.data[0], 
child_cell.data[1], child_cell.data[2]);
-                               Vector cmax(child_cell.data[0] + child_bit, 
child_cell.data[1] + child_bit, child_cell.data[2] + child_bit);
-
-                               float rho[2][2][2];
-                               ST min_rho, max_rho, this_rho;
-                               min_rho = max_rho = this_rho = 
node.values[target_child];
-                               rho[0][0][0] = static_cast<float>(this_rho);
-                               int prev_depth = depth-1;
-                               Vec3i offset(0,0,child_bit);
-                               octvol_fill_cell(node, child_bit);
-                               
-                               if (octdata->get_isovalue() >= min_rho && 
octdata->get_isovalue() <= max_rho)
-                               {
-                                       float hit_t;
-                                       if 
(IsosurfaceImplicit::single_intersect(orig, dir, cmin, cmax, rho, 
-                                               octdata->get_isovalue(), 
child_tenter, child_texit, hit_t))
-                                       {
-                                               if (rays.hit(which_one, 
hit_t, PrimitiveCommon::getMaterial(), this, 0)) 
-                                               {
-                                                       Vector normal;
-                                                       Vector phit = orig + 
dir*hit_t;
-                                                       
IsosurfaceImplicit::single_normal(normal, cmin, cmax, phit, rho);
-                                                       normal.normalize();
-                                                       
rays.setNormal(which_one, normal);
-                                                       return true;
-                                               }
-                                       }
-                               }                                             
                                  
-                       }
-                       else
+                        if (depth == stop_depth)
+                        {
+                                Vector cmin(child_cell.data[0], 
child_cell.data[1], child_cell.data[2]);
+                                Vector cmax(child_cell.data[0] + child_bit, 
child_cell.data[1] + child_bit, child_cell.data[2] + child_bit);
+
+                                float rho[2][2][2];
+                                ST min_rho, max_rho, this_rho;
+                                min_rho = max_rho = this_rho = 
node.values[target_child];
+                                rho[0][0][0] = static_cast<float>(this_rho);
+                                int prev_depth = depth-1;
+                                Vec3i offset(0,0,child_bit);
+                                octvol_fill_cell(node, child_bit);
+
+                                if (octdata->get_isovalue() >= min_rho && 
octdata->get_isovalue() <= max_rho)
+                                {
+                                        float hit_t;
+                                        if 
(IsosurfaceImplicit::single_intersect(orig, dir, cmin, cmax, rho,
+                                                octdata->get_isovalue(), 
child_tenter, child_texit, hit_t))
+                                        {
+                                                if (rays.hit(which_one, 
hit_t, PrimitiveCommon::getMaterial(), this, 0))
+                                                {
+                                                        Vector normal;
+                                                        Vector phit = orig + 
dir*hit_t;
+                                                        
IsosurfaceImplicit::single_normal(normal, cmin, cmax, phit, rho);
+                                                        normal.normalize();
+                                                        
rays.setNormal(which_one, normal);
+                                                        return true;
+                                                }
+                                        }
+                                }
+                        }
+                        else
 #endif
             if (node.offsets[target_child]==-1)
             {
-                if (single_traverse_leaf(rays, which_one, orig, dir, 
inv_dir, stop_depth, 
-                    next_depth, depth, node.values[target_child], 
+                if (single_traverse_leaf(rays, which_one, orig, dir, 
inv_dir, stop_depth,
+                    next_depth, depth, node.values[target_child],
                     child_cell, index_trace, child_cell, child_tenter, 
child_texit))
                     return true;
             }
             else
             {
                 unsigned int child_idx = node.children_start + 
node.offsets[target_child];
-                if (depth == octdata->get_pre_cap_depth())     //cap
+                if (depth == octdata->get_pre_cap_depth())      //cap
                 {
                     if (single_traverse_cap(rays, which_one, orig, dir, 
inv_dir, stop_depth, next_depth, child_idx,
                         index_trace, child_cell, child_tenter, child_texit))
@@ -337,48 +338,48 @@
                 }
             }
         }
-    
+
         if (axis==-1)
             return false;
-            
+
         //move to the next target_child, update tenter and texit
         child_tenter = child_texit;
         int trueaxisbit = axis_table[axis];
-        if (target_child & trueaxisbit)                //going from true to 
false
+        if (target_child & trueaxisbit)         //going from true to false
         {
             target_child &= ~trueaxisbit;
             child_cell.data[axis] &= ~child_bit;
         }
-        else                                                           
//going from false to true
+        else                                                            
//going from false to true
         {
             target_child |= trueaxisbit;
             child_cell.data[axis] |= child_bit;
-        }                      
+        }
     }
     return false;
 }
 
 bool IsosurfaceOctreeVolume::single_traverse_leaf(RayPacket& rays, int 
which_one,
-                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir, int stop_depth, 
+                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir, int stop_depth,
                         int depth, int leaf_depth, ST scalar, Vec3i& 
leaf_base_cell,
                         unsigned int index_trace[], Vec3i& cell, const float 
tenter, const float texit) const
 {
     //using pretty much the same algorithm, find which "implicit" cell 
(-->voxel) we're in at octdata->get_max_depth()
-            
+
     int child_bit = octdata->get_child_bit_depth(depth);
     int unsafe_zone = octdata->get_child_bit_depth(depth-1) - 
octdata->get_child_bit_depth(octdata->get_cap_depth());
-    
+
     Vector center(static_cast<float>(cell.data[0] | child_bit), 
static_cast<float>(cell.data[1] | child_bit), static_cast<float>(cell.data[2] 
| child_bit));
     Vector tcenter = inv_dir * (center - orig);
     Vector penter = orig + (dir*tenter);
-        
+
     Vec3i child_cell = cell;
-    Vec3i tc( penter.x() >= center.x(), penter.y() >= center.y(), penter.z() 
>= center.z() );          
+    Vec3i tc( penter.x() >= center.x(), penter.y() >= center.y(), penter.z() 
>= center.z() );
     int target_child = (tc.data[0] << 2) | (tc.data[1] << 1) | tc.data[2];
     child_cell.data[0] |= tc.data[0] ? child_bit : 0;
     child_cell.data[1] |= tc.data[1] ? child_bit : 0;
     child_cell.data[2] |= tc.data[2] ? child_bit : 0;
-            
+
     Vec3i axis_isects;
     if (tcenter.data[0] < tcenter.data[1] && tcenter.data[0] < 
tcenter.data[2]){
         axis_isects.data[0] = 0;
@@ -413,7 +414,7 @@
             axis_isects.data[2] = 0;
         }
     }
-    
+
     float child_tenter = tenter;
     float child_texit;
 
@@ -435,7 +436,7 @@
                 axis = -1;
             }
         }
-        
+
         Vec3i local_child_cell = child_cell - leaf_base_cell;
         if (local_child_cell.data[0] & unsafe_zone || 
local_child_cell.data[1] & unsafe_zone || local_child_cell.data[2] & 
unsafe_zone)
         {
@@ -443,7 +444,7 @@
             {
                 //try isosurface intersection
                 Vector cmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
-                               Vector cmax(child_cell.data[0] + child_bit, 
child_cell.data[1] + child_bit, child_cell.data[2] + child_bit);
+                                Vector cmax(child_cell.data[0] + child_bit, 
child_cell.data[1] + child_bit, child_cell.data[2] + child_bit);
 
 #ifdef USE_OCTREE_DATA
                 //use octree data
@@ -452,7 +453,7 @@
                 min_rho = max_rho = this_rho = scalar;
                 rho[0][0][0] = static_cast<float>(this_rho);
                 Vec3i offset(0,0,child_bit);
-                                                                             
  
+
                 //0,0,1
                 if (target_child & 1)
                 {
@@ -463,7 +464,7 @@
                 else
                     this_rho = scalar;
                 rho[0][0][1] = static_cast<float>(this_rho);
-        
+
                 //0,1,1
                 offset.data[1] = child_bit;
                 if (target_child & 3)
@@ -475,7 +476,7 @@
                 else
                     this_rho = scalar;
                 rho[0][1][1] = static_cast<float>(this_rho);
-                
+
                 //1,1,1
                 offset.data[0] = child_bit;
                 if (target_child & 7)
@@ -486,8 +487,8 @@
                 }
                 else
                     this_rho = scalar;
-                rho[1][1][1] = static_cast<float>(this_rho);   
-                
+                rho[1][1][1] = static_cast<float>(this_rho);
+
                 //1,1,0
                 offset.data[2] = 0;
                 if (target_child & 6)
@@ -499,7 +500,7 @@
                 else
                     this_rho = scalar;
                 rho[1][1][0] = static_cast<float>(this_rho);
-                
+
                 //1,0,0
                 offset.data[1] = 0;
                 if (target_child & 4)
@@ -509,9 +510,9 @@
                     max_rho = MAX(max_rho, this_rho);
                 }
                 else
-                    this_rho = scalar; 
+                    this_rho = scalar;
                 rho[1][0][0] = static_cast<float>(this_rho);
-                
+
                 //1,0,1
                 offset.data[2] = child_bit;
                 if (target_child & 5)
@@ -522,8 +523,8 @@
                 }
                 else
                     this_rho = scalar;
-                rho[1][0][1] = static_cast<float>(this_rho);   
-            
+                rho[1][0][1] = static_cast<float>(this_rho);
+
                 //0,1,0
                 offset.data[0] = 0;
                 offset.data[1] = child_bit;
@@ -536,12 +537,12 @@
                 }
                 else
                     this_rho = scalar;
-                rho[0][1][0] = static_cast<float>(this_rho);                 
  
-#else  
+                rho[0][1][0] = static_cast<float>(this_rho);
+#else
                 //use original grid data
                 float rho[2][2][2];
                 ST min_rho, max_rho;
-#define MYDATA octdata->indata //toggle this to octdata if you want to test 
pure point location (no neighbor finding)
+#define MYDATA octdata->indata  //toggle this to octdata if you want to test 
pure point location (no neighbor finding)
                 min_rho = max_rho = lookup_safe(MYDATA, child_cell.data[0], 
child_cell.data[1], child_cell.data[2]);
                 rho[0][0][0] = static_cast<float>(min_rho);
                 for(int c=1; c<8; c++)
@@ -557,10 +558,10 @@
                 if (octdata->get_isovalue() >= min_rho && 
octdata->get_isovalue() <= max_rho)
                 {
                     float hit_t;
-                    if (IsosurfaceImplicit::single_intersect_neubauer(orig, 
dir, cmin, cmax, rho, 
+                    if (IsosurfaceImplicit::single_intersect_neubauer(orig, 
dir, cmin, cmax, rho,
                         octdata->get_isovalue(), child_tenter, child_texit, 
hit_t))
                     {
-                        if (rays.hit(which_one, hit_t, 
PrimitiveCommon::getMaterial(), this, 0)) 
+                        if (rays.hit(which_one, hit_t, 
PrimitiveCommon::getMaterial(), this, 0))
                         {
                             Vector normal;
                             Vector phit = orig + dir*hit_t;
@@ -574,54 +575,54 @@
             }
             else //not at cap-level depth
             {
-                if (single_traverse_leaf(rays, which_one, orig, dir, 
inv_dir, stop_depth, 
-                    next_depth, leaf_depth, scalar, leaf_base_cell, 
+                if (single_traverse_leaf(rays, which_one, orig, dir, 
inv_dir, stop_depth,
+                    next_depth, leaf_depth, scalar, leaf_base_cell,
                     index_trace, child_cell, child_tenter, child_texit))
                     return true;
             }
         }
-    
+
         if (axis==-1)
             return false;
-            
+
         //move to the next target_child, update tenter and texit
         child_tenter = child_texit;
         int trueaxisbit = axis_table[axis];
-        if (target_child & trueaxisbit)                //going from true to 
false
+        if (target_child & trueaxisbit)         //going from true to false
         {
             target_child &= ~trueaxisbit;
             child_cell.data[axis] &= ~child_bit;
         }
-        else                                                           
//going from false to true
+        else                                                            
//going from false to true
         {
             target_child |= trueaxisbit;
             child_cell.data[axis] |= child_bit;
-        }                      
+        }
     }
     return false;
 }
 
 bool IsosurfaceOctreeVolume::single_traverse_cap(RayPacket& rays, int 
which_one,
-                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir, int stop_depth, 
-                        int depth, unsigned int cap_index, unsigned int 
index_trace[], Vec3i& cell, 
+                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir, int stop_depth,
+                        int depth, unsigned int cap_index, unsigned int 
index_trace[], Vec3i& cell,
                         const float tenter, const float texit) const
-{      
-    //cerr << "single_traverse_cap, depth=" << depth << ", cap_index=" << 
cap_index << ", cell=" << cell[0] << "," << cell[1] << "," << cell[2] << 
endl; 
+{
+    //cerr << "single_traverse_cap, depth=" << depth << ", cap_index=" << 
cap_index << ", cell=" << cell[0] << "," << cell[1] << "," << cell[2] << endl;
 
-    OctCap& cap = octdata->get_cap(cap_index);        
+    OctCap& cap = octdata->get_cap(cap_index);
     index_trace[depth] = cap_index;
-    
+
     Vector penter = orig + (dir*tenter);
     Vector center(static_cast<float>(cell.data[0] | 1), 
static_cast<float>(cell.data[1] | 1), static_cast<float>(cell.data[2] | 1));
     Vector tcenter = inv_dir * (center - orig);
-        
+
     Vec3i child_cell = cell;
-    Vec3i tc( penter.x() >= center.x(), penter.y() >= center.y(), penter.z() 
>= center.z() );          
+    Vec3i tc( penter.x() >= center.x(), penter.y() >= center.y(), penter.z() 
>= center.z() );
     int target_child = (tc.data[0] << 2) | (tc.data[1] << 1) | tc.data[2];
     child_cell.data[0] |= tc.data[0];
     child_cell.data[1] |= tc.data[1];
     child_cell.data[2] |= tc.data[2];
-        
+
     Vec3i axis_isects;
     if (tcenter.data[0] < tcenter.data[1] && tcenter.data[0] < 
tcenter.data[2]){
         axis_isects.data[0] = 0;
@@ -656,7 +657,7 @@
             axis_isects.data[2] = 0;
         }
     }
-    
+
     float child_tenter = tenter;
     float child_texit;
 
@@ -679,8 +680,8 @@
 
         //try isosurface intersection in this node
         Vector cmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
-               Vector cmax(child_cell.data[0] + 1, child_cell.data[1] + 1, 
child_cell.data[2] + 1);
-        
+                Vector cmax(child_cell.data[0] + 1, child_cell.data[1] + 1, 
child_cell.data[2] + 1);
+
 #ifdef USE_OCTREE_DATA
         float rho[2][2][2];
         ST min_rho, max_rho, this_rho;
@@ -688,8 +689,8 @@
         rho[0][0][0] = static_cast<float>(this_rho);
         int prev_depth = depth-1;
         Vec3i offset(0,0,1);
-               octvol_fill_cell(cap, 1);
-#else  
+                octvol_fill_cell(cap, 1);
+#else
         //use original grid data
         float rho[2][2][2];
         ST min_rho, max_rho;
@@ -709,10 +710,10 @@
         if (octdata->get_isovalue() >= min_rho && octdata->get_isovalue() <= 
max_rho)
         {
             float hit_t;
-            if (IsosurfaceImplicit::single_intersect_neubauer(orig, dir, 
cmin, cmax, rho, 
+            if (IsosurfaceImplicit::single_intersect_neubauer(orig, dir, 
cmin, cmax, rho,
                 octdata->get_isovalue(), child_tenter, child_texit, hit_t))
             {
-                if (rays.hit(which_one, hit_t, 
PrimitiveCommon::getMaterial(), this, 0)) 
+                if (rays.hit(which_one, hit_t, 
PrimitiveCommon::getMaterial(), this, 0))
                 {
                     Vector normal;
                     Vector phit = orig + dir*hit_t;
@@ -726,26 +727,26 @@
 
         if (axis==-1)
             return false;
-        
+
         //move to the next target_child, update tenter and texit
         child_tenter = child_texit;
         int trueaxisbit = axis_table[axis];
-        if (target_child & trueaxisbit)                //going from true to 
false
+        if (target_child & trueaxisbit)         //going from true to false
         {
             target_child &= ~trueaxisbit;
             child_cell.data[axis] &= ~1;
         }
-        else                                                           
//going from false to true
+        else                                                            
//going from false to true
         {
             target_child |= trueaxisbit;
             child_cell.data[axis] |= 1;
-        }      
+        }
     }
     return false;
 }
 
 /*
-       Begin packet intersection code, for SSE packets only.
+        Begin packet intersection code, for SSE packets only.
 */
 
 //AARONBAD - This SSE octree traverser turned out to be very slow. Not 
recommended.
@@ -758,13 +759,13 @@
     rays.computeInverseDirections();
     rays.computeSigns();
     rays.resetHits();
-    
+
     RayPacketData* data = rays.data;
     SSERayPacket srp;
 
     //intersect the global bounding box: find first, last
     //  this will require a special-case AABB intersection
-    #pragma unroll(3)
+    MANTA_UNROLL(3);
     for(int axis=0; axis<3; axis++)
     {
         srp.orig[axis] = (sse_t*)(data->origin[axis]);
@@ -778,18 +779,18 @@
     srp.activeRays = 0;
     const int sse_begin = rays.begin() >> 2; // equivalent to 
Floor(rays.begin()/4)
     const int sse_end = ((rays.end()-1+3) >> 2); // Ceil(rays.end()-1/4)
-    
+
     char first = RayPacket::SSE_MaxSize;
     char last = -1;
     sse_t octdims[3];
     for(int axis=0; axis<3; axis++)
         octdims[axis] = set4(octdata->dims[axis]);
-        
+
     for(int smd=sse_begin; smd<sse_end; smd++)
-    {    
+    {
         sse_t t0 = zero4();
         sse_t t1 = srp.minT[smd];
-        
+
         sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
         const sse_t b0_x = mask4(signs, zero4(), octdims[0]);
         const sse_t b1_x = mask4(signs, octdims[0], zero4());
@@ -799,7 +800,7 @@
         signs = cmp4_ge(srp.dir[2][smd],zero4());
         const sse_t b0_z = mask4(signs, zero4(), octdims[2]);
         const sse_t b1_z = mask4(signs, octdims[2], zero4());
-        
+
         const sse_t tBoxNearX = mul4(sub4(b0_x, srp.orig[0][smd]), 
srp.inv_dir[0][smd]);
         const sse_t tBoxNearY = mul4(sub4(b0_y, srp.orig[1][smd]), 
srp.inv_dir[1][smd]);
         const sse_t tBoxNearZ = mul4(sub4(b0_z, srp.orig[2][smd]), 
srp.inv_dir[2][smd]);
@@ -815,19 +816,19 @@
         t1 = min4(t1,tBoxFarX);
         t1 = min4(t1,tBoxFarY);
         t1 = min4(t1,tBoxFarZ);
-        
+
         srp.activeMask[smd] = cmp4_le(t0,t1);
         if (_mm_movemask_ps(srp.activeMask[smd]))    //if any hit
         {
             first = MIN(first, smd);
             last = smd;
-            srp.activeRays += count_nonzeros(srp.activeMask[smd]);    
+            srp.activeRays += count_nonzeros(srp.activeMask[smd]);
         }
     }
-    
+
     if (first > last)
         return;
-        
+
     Vector direction = rays.getDirection(first<<2);
     Vector dir2 = direction * direction;
     if (dir2[0] > dir2[1] && dir2[0] > dir2[2])
@@ -863,50 +864,50 @@
     sse_t smin_dir[3];
     sse_t smax_orig[3];
     sse_t smax_dir[3];
-    
-    #pragma unroll(3)
+
+    MANTA_UNROLL(3);
     for(int axis=0; axis<3; axis++)
     {
         smin_orig[axis] = _mm_infty;
         smin_dir[axis] = _mm_infty;
         smax_orig[axis] = _mm_minus_infty;
-        smax_dir[axis] = _mm_minus_infty;        
+        smax_dir[axis] = _mm_minus_infty;
     }
     for (int smd=first; smd<=last; smd++)
     {
-        #pragma unroll(3)
-        for(int axis=0; axis<3; axis++)
+      MANTA_UNROLL(3);
+      for(int axis=0; axis<3; axis++)
         {
-            smin_orig[axis] = min4(smin_orig[axis], srp.orig[axis][smd]);
-            smax_orig[axis] = max4(smax_orig[axis], srp.orig[axis][smd]);
-            smin_dir[axis] = min4(smin_dir[axis], srp.dir[axis][smd]);
-            smax_dir[axis] = max4(smax_dir[axis], srp.dir[axis][smd]);
+          smin_orig[axis] = min4(smin_orig[axis], srp.orig[axis][smd]);
+          smax_orig[axis] = max4(smax_orig[axis], srp.orig[axis][smd]);
+          smin_dir[axis] = min4(smin_dir[axis], srp.dir[axis][smd]);
+          smax_dir[axis] = max4(smax_dir[axis], srp.dir[axis][smd]);
         }
     }
-    
+
     FrustumInterval fi;
     fi.uvminmax_dir = set44(min4f(smin_dir[U]), min4f(smin_dir[V]), 
max4f(smax_dir[U]), max4f(smax_dir[V]));
     fi.uvminmax_invdir = oneOver(fi.uvminmax_dir);
     fi.uvminmax_orig = set44(min4f(smin_orig[U]), min4f(smin_orig[V]), 
max4f(smax_orig[U]), max4f(smax_orig[V]));
-    
+
     #if DBGP
     cerr << "fi.uvminmax_orig "; simd_cerr(fi.uvminmax_orig);
     cerr << "fi.uvminmax_dir "; simd_cerr(fi.uvminmax_dir);
     #endif
-    
+
     float komin = min4f(smin_orig[K]);
     float komax = max4f(smax_orig[K]);
     float kdmin = min4f(smin_dir[K]);
     float kdmax = max4f(smax_dir[K]);
     fi.kminmax_orig = set44(komin, komin, komax, komax);
     fi.kminmax_dir = set44(kdmin, kdmin, kdmax, kdmax);
-    fi.kminmax_invdir = oneOver(fi.kminmax_dir);    
-    
+    fi.kminmax_invdir = oneOver(fi.kminmax_dir);
+
     #if DBGP
     cerr << "fi.kminmax_orig "; simd_cerr(fi.kminmax_orig);
     cerr << "fi.kminmax_dir "; simd_cerr(fi.kminmax_dir);
-    #endif    
-        
+    #endif
+
     unsigned int index_trace[octdata->get_max_depth() + 1];
     Vec3i cell(0,0,0);
     sse_traverse_node<K,U,V,DK>(srp, first, last, DU, DV, fi, cell, 
octdata->get_cap_depth(), 0, 0, index_trace);
@@ -915,184 +916,184 @@
 
 
 template<char K, char U, char V, char DK>
-void IsosurfaceOctreeVolume::sse_traverse_node(SSERayPacket& srp, char 
first, char last, 
+void IsosurfaceOctreeVolume::sse_traverse_node(SSERayPacket& srp, char 
first, char last,
         char DU, char DV, const FrustumInterval& fi,
         Vec3i& cell, char stop_depth, char depth, unsigned int index, 
unsigned int index_trace[]) const
 {
 #if DBGP
     cerr << "octnode, depth " << (int)depth << ", index " << (int)index << 
", first=" << (int)first << ",last=" << (int)last << "cell " << cell[0] << ", 
" << cell[1] << ", " << cell[2] << endl;
-    cerr << "(with K=" << (int)(K) << ", U=" << (int)(U) << ", V=" << 
(int)(V) << ", DK=" << (int)(DK) << endl; 
+    cerr << "(with K=" << (int)(K) << ", U=" << (int)(U) << ", V=" << 
(int)(V) << ", DK=" << (int)(DK) << endl;
 #endif
 
     OctNode& node = octdata->get_node(depth, index);
     int child_bit = octdata->get_child_bit_depth(depth);
     Vec3i child_cell;
     index_trace[depth] = index;
-    Vector pcenter( static_cast<float>(cell[0] | child_bit), 
-                    static_cast<float>(cell[1] | child_bit), 
+    Vector pcenter( static_cast<float>(cell[0] | child_bit),
+                    static_cast<float>(cell[1] | child_bit),
                     static_cast<float>(cell[2] | child_bit));
-                    
-    #pragma unroll(2)
+
+    MANTA_UNROLL(2);
     for(int k=0; k<2; k++)
-       {       
+        {
         sse_t child_tkenter;
         sse_t child_tkexit;
         int tc_K;
-                
-               if (k)  //AFTER THE K MIDPLANE
-               {
-                       if (DK)
-                       {
+
+                if (k)  //AFTER THE K MIDPLANE
+                {
+                        if (DK)
+                        {
                 child_tkenter = mul4(sub4(set4(pcenter[K]), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(cell[K]+(child_bit<<1)), 
fi.kminmax_orig), fi.kminmax_invdir);
-                               tc_K = axis_table[K];
-                               child_cell[K] = cell[K] | child_bit;
+                                tc_K = axis_table[K];
+                                child_cell[K] = cell[K] | child_bit;
                 #if DBGP
                 cerr << "kenter = pcenter[K] =" << pcenter[K] << endl;
                 cerr << "kexit = cell[K] + depth_bit =" << 
cell[K]+(child_bit<<1) << endl;
                 #endif
-                       }
-                       else
-                       {
+                        }
+                        else
+                        {
                 child_tkenter = mul4(sub4(set4(pcenter[K]), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(cell[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = 0;
-                               child_cell[K] = cell[K];       
-                #if DBGP     
+                                tc_K = 0;
+                                child_cell[K] = cell[K];
+                #if DBGP
                 cerr << "kenter = pcenter[K] =" << pcenter[K] << endl;
                 cerr << "kexit = cell[K] =" << cell[K] << endl;
                 #endif
-                       }
-                       
-               }
-               else    //BEFORE THE K MIDPLANE
-               {
-                       if (DK)
-                       {
+                        }
+
+                }
+                else    //BEFORE THE K MIDPLANE
+                {
+                        if (DK)
+                        {
                 child_tkenter = mul4(sub4(set4(cell[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(pcenter[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = 0;
-                               child_cell[K] = cell[K];
+                                tc_K = 0;
+                                child_cell[K] = cell[K];
                 #if DBGP
                 cerr << "kenter = cell[K] =" << cell[K] << endl;
                 cerr << "kexit = pcenter[K] =" << pcenter[K] << endl;
                 #endif
-                       }
-                       else
-                       {
+                        }
+                        else
+                        {
                 child_tkenter = mul4(sub4(set4(cell[K]+(child_bit<<1)), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(pcenter[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = axis_table[K];
-                               child_cell[K] = cell[K] | child_bit;   
+                                tc_K = axis_table[K];
+                                child_cell[K] = cell[K] | child_bit;
                 #if DBGP
                 cerr << "kenter = cell[K]+depth_bit =" << 
cell[K]+(child_bit<<1) << endl;
                 cerr << "kexit = pcenter[K] =" << pcenter[K] << endl;
                 #endif
-                       }
-               }
-        
+                        }
+                }
+
         #if DBGP
         cerr << "child_tkenter "; simd_cerr(child_tkenter);
         cerr << "child_tkexit "; simd_cerr(child_tkexit);
         cerr << "child_pkenter[K] = "; simd_cerr( 
mul4(oneOver(fi.kminmax_invdir), add4(fi.kminmax_orig, child_tkenter)) );
         cerr << "child_pkexit[K] = "; simd_cerr( 
mul4(oneOver(fi.kminmax_invdir), add4(fi.kminmax_orig, child_tkexit)) );
         #endif
-     
+
         //we have child_tkenter, child_tkexit.
         if (_mm_movemask_ps(cmp4_ge(child_tkexit, zero4()))==0)
         {
             #if DBGP
             cerr << "texit was negative; continuing." << endl;
             #endif
-                       continue;
+                        continue;
         }
-         
+
         //find pkenter_uvminmax, pkexit[u], pkexit[v]
         //child_pkenter = dir*(orig/dir) + dir*t = dir(orig+t)
         const sse_t child_pkenter_uvminmax = add4(fi.uvminmax_orig, 
mul4(fi.uvminmax_dir, child_tkenter));
         const sse_t child_pkexit_uvminmax = add4(fi.uvminmax_orig, 
mul4(fi.uvminmax_dir, child_tkexit));
-        
+
         #if DBGP
         cerr << "child_pkenter_uvminmax "; simd_cerr(child_pkenter_uvminmax);
-        cerr << "child_pkexit_uvminmax "; simd_cerr(child_pkexit_uvminmax);  
      
+        cerr << "child_pkexit_uvminmax "; simd_cerr(child_pkexit_uvminmax);
         #endif
 
         sse_union tmp_min, tmp_max;
         tmp_min.sse = min4(child_pkenter_uvminmax, child_pkexit_uvminmax);
         tmp_max.sse = max4(child_pkenter_uvminmax, child_pkexit_uvminmax);
-        
+
         #if DBGP
         cerr << "tmp_min "; simd_cerr(tmp_min.sse);
         cerr << "tmp_max "; simd_cerr(tmp_max.sse);
         #endif
-        
+
         const float umin = MIN(tmp_min.f[3], tmp_min.f[1]);
         const float vmin = MIN(tmp_min.f[2], tmp_min.f[0]);
         const float umax = MAX(tmp_max.f[3], tmp_max.f[1]);
         const float vmax = MAX(tmp_max.f[2], tmp_max.f[0]);
         sse_t sse_fuvminmax = set44(umin, vmin, umax, vmax);
-        
+
         #if DBGP
         cerr << "sse_fuvminmax (before clamp) = "; simd_cerr(sse_fuvminmax);
         #endif
-    
-               sse_fuvminmax = sub4(sse_fuvminmax, set44(cell[U], cell[V], 
cell[U], cell[V]));
-               sse_fuvminmax = mul4(sse_fuvminmax, 
set4(octdata->get_inv_child_bit_depth(depth)));
-               sse_fuvminmax = max4(sse_fuvminmax, set44(0.0f, 0.0f, 
-9.9e9999f, -9.9e9999f));
-               sse_fuvminmax = min4(sse_fuvminmax, set44(9.9e9999f, 
9.9e9999f, 1.0f, 1.0f));
-               
+
+                sse_fuvminmax = sub4(sse_fuvminmax, set44(cell[U], cell[V], 
cell[U], cell[V]));
+                sse_fuvminmax = mul4(sse_fuvminmax, 
set4(octdata->get_inv_child_bit_depth(depth)));
+                sse_fuvminmax = max4(sse_fuvminmax, set44(0.0f, 0.0f, 
-9.9e9999f, -9.9e9999f));
+                sse_fuvminmax = min4(sse_fuvminmax, set44(9.9e9999f, 
9.9e9999f, 1.0f, 1.0f));
+
         #if DBGP
         cerr << "sse_fuvminmax (after clamp) = "; simd_cerr(sse_fuvminmax);
         #endif
-        
+
         sse_int_union iuvminmax;
-               
-               //convert to int
-               iuvminmax.ssei = _mm_cvttps_epi32(sse_fuvminmax);
-        
+
+                //convert to int
+                iuvminmax.ssei = _mm_cvttps_epi32(sse_fuvminmax);
+
         #if DBGP
         cerr << "iuvminmax = " << iuvminmax.i[0] << ", " << iuvminmax.i[1] 
<< ", " << iuvminmax.i[2] << ", " << iuvminmax.i[3];
         cerr << endl << endl;
         #endif
-               
-               for(int u= (DU==1 ? iuvminmax.i[3] : iuvminmax.i[1]); (DU==1 
? u <= iuvminmax.i[1] : u >= iuvminmax.i[3]); u += DU)
-               {
-                       int tc_U;
-                       if (u)
-                       {
-                               tc_U = axis_table[U];
-                               child_cell[U] = cell[U] | child_bit;
-                       }
-                       else
-                       {
-                               tc_U = 0;
-                               child_cell[U] = cell[U];        
-                       }
-
-                       for(int v= (DV==1 ? iuvminmax.i[2] : iuvminmax.i[0]); 
(DV==1 ? v <= iuvminmax.i[0] : v >= iuvminmax.i[2]); v += DV)
-                       {                               
-                               int tc_V;
-                               if (v)
-                               {
-                                       tc_V = axis_table[V];
-                                       child_cell[V] = cell[V] | child_bit;
-                               }
-                               else
-                               {
-                                       tc_V = 0;
-                                       child_cell[V] = cell[V];        
-                               }
-                               
-                               int target_child = tc_K | tc_U | tc_V;
-                
+
+                for(int u= (DU==1 ? iuvminmax.i[3] : iuvminmax.i[1]); (DU==1 
? u <= iuvminmax.i[1] : u >= iuvminmax.i[3]); u += DU)
+                {
+                        int tc_U;
+                        if (u)
+                        {
+                                tc_U = axis_table[U];
+                                child_cell[U] = cell[U] | child_bit;
+                        }
+                        else
+                        {
+                                tc_U = 0;
+                                child_cell[U] = cell[U];
+                        }
+
+                        for(int v= (DV==1 ? iuvminmax.i[2] : 
iuvminmax.i[0]); (DV==1 ? v <= iuvminmax.i[0] : v >= iuvminmax.i[2]); v += DV)
+                        {
+                                int tc_V;
+                                if (v)
+                                {
+                                        tc_V = axis_table[V];
+                                        child_cell[V] = cell[V] | child_bit;
+                                }
+                                else
+                                {
+                                        tc_V = 0;
+                                        child_cell[V] = cell[V];
+                                }
+
+                                int target_child = tc_K | tc_U | tc_V;
+
                 if (octdata->get_isovalue() >= node.mins[target_child] && 
octdata->get_isovalue() <= node.maxs[target_child])
                 {
                     Vector cmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
                     Vector cmax(child_cell.data[0]+child_bit, 
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
                     char newfirst = first_intersects(srp, first, last, cmin, 
cmax);
-                    
+
                     //cerr << "newfirst=" << (int)newfirst << ", last=" << 
(int)last << endl;
-                    
+
                     if (first <= last)
                     {
                         if (node.offsets[target_child]==-1)
@@ -1103,14 +1104,14 @@
                         else
                         {
                             unsigned int child_idx = node.children_start + 
node.offsets[target_child];
-                            if (depth == octdata->get_pre_cap_depth()) //cap
+                            if (depth == octdata->get_pre_cap_depth())  //cap
                             {
-                                sse_traverse_cap<K,U,V,DK>(srp, newfirst, 
last, DU, DV, fi, child_cell, 
+                                sse_traverse_cap<K,U,V,DK>(srp, newfirst, 
last, DU, DV, fi, child_cell,
                                     stop_depth, depth+1, child_idx, 
index_trace);
                             }
                             else
                             {
-                                sse_traverse_node<K,U,V,DK>(srp, newfirst, 
last, DU, DV, fi, child_cell, 
+                                sse_traverse_node<K,U,V,DK>(srp, newfirst, 
last, DU, DV, fi, child_cell,
                                     stop_depth, depth+1, child_idx, 
index_trace);
                             }
                         }
@@ -1121,174 +1122,174 @@
             }
         }
     }
-}                
+}
 
 template<char K, char U, char V, char DK>
 void IsosurfaceOctreeVolume::sse_traverse_leaf(SSERayPacket& srp, char 
first, char last, char DU, char DV,
-            const FrustumInterval& fi, const Vec3i& cell, int stop_depth, 
int depth, 
+            const FrustumInterval& fi, const Vec3i& cell, int stop_depth, 
int depth,
             int leaf_depth, ST leaf_value, const Vec3i& leaf_base_cell,
             unsigned int index_trace[]) const
 {
     int child_bit = octdata->get_child_bit_depth(depth);
     int unsafe_zone = (child_bit<<1) - 
octdata->get_child_bit_depth(octdata->get_cap_depth());
     Vec3i child_cell;
-    Vector pcenter( static_cast<float>(cell[0] | child_bit), 
-                    static_cast<float>(cell[1] | child_bit), 
+    Vector pcenter( static_cast<float>(cell[0] | child_bit),
+                    static_cast<float>(cell[1] | child_bit),
                     static_cast<float>(cell[2] | child_bit));
-                    
-    #pragma unroll(2)
+
+    MANTA_UNROLL(2);
     for(int k=0; k<2; k++)
-       {       
+        {
         sse_t child_tkenter;
         sse_t child_tkexit;
         int tc_K;
-                
-               if (k)  //AFTER THE K MIDPLANE
-               {
-                       if (DK)
-                       {
+
+                if (k)  //AFTER THE K MIDPLANE
+                {
+                        if (DK)
+                        {
                 child_tkenter = mul4(sub4(set4(pcenter[K]), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(cell[K]+(child_bit<<1)), 
fi.kminmax_orig), fi.kminmax_invdir);
-                               tc_K = axis_table[K];
-                               child_cell[K] = cell[K] | child_bit;
+                                tc_K = axis_table[K];
+                                child_cell[K] = cell[K] | child_bit;
                 #if DBGP
                 cerr << "kenter = pcenter[K] =" << pcenter[K] << endl;
                 cerr << "kexit = cell[K] + depth_bit =" << 
cell[K]+(child_bit<<1) << endl;
                 #endif
-                       }
-                       else
-                       {
+                        }
+                        else
+                        {
                 child_tkenter = mul4(sub4(set4(pcenter[K]), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(cell[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = 0;
-                               child_cell[K] = cell[K];       
-                #if DBGP     
+                                tc_K = 0;
+                                child_cell[K] = cell[K];
+                #if DBGP
                 cerr << "kenter = pcenter[K] =" << pcenter[K] << endl;
                 cerr << "kexit = cell[K] =" << cell[K] << endl;
                 #endif
-                       }
-                       
-               }
-               else    //BEFORE THE K MIDPLANE
-               {
-                       if (DK)
-                       {
+                        }
+
+                }
+                else    //BEFORE THE K MIDPLANE
+                {
+                        if (DK)
+                        {
                 child_tkenter = mul4(sub4(set4(cell[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(pcenter[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = 0;
-                               child_cell[K] = cell[K];
+                                tc_K = 0;
+                                child_cell[K] = cell[K];
                 #if DBGP
                 cerr << "kenter = cell[K] =" << cell[K] << endl;
                 cerr << "kexit = pcenter[K] =" << pcenter[K] << endl;
                 #endif
-                       }
-                       else
-                       {
+                        }
+                        else
+                        {
                 child_tkenter = mul4(sub4(set4(cell[K]+(child_bit<<1)), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(pcenter[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = axis_table[K];
-                               child_cell[K] = cell[K] | child_bit;   
+                                tc_K = axis_table[K];
+                                child_cell[K] = cell[K] | child_bit;
                 #if DBGP
                 cerr << "kenter = cell[K]+depth_bit =" << 
cell[K]+(child_bit<<1) << endl;
                 cerr << "kexit = pcenter[K] =" << pcenter[K] << endl;
                 #endif
-                       }
-               }
-        
+                        }
+                }
+
         #if DBGP
         cerr << "child_tkenter "; simd_cerr(child_tkenter);
         cerr << "child_tkexit "; simd_cerr(child_tkexit);
         cerr << "child_pkenter[K] = "; simd_cerr( 
mul4(oneOver(fi.kminmax_invdir), add4(fi.kminmax_orig, child_tkenter)) );
         cerr << "child_pkexit[K] = "; simd_cerr( 
mul4(oneOver(fi.kminmax_invdir), add4(fi.kminmax_orig, child_tkexit)) );
         #endif
-     
+
         //we have child_tkenter, child_tkexit.
         if (_mm_movemask_ps(cmp4_ge(child_tkexit, zero4()))==0)
         {
             #if DBGP
             cerr << "texit was negative; continuing." << endl;
             #endif
-                       continue;
+                        continue;
         }
-         
+
         //find pkenter_uvminmax, pkexit[u], pkexit[v]
         //child_pkenter = dir*(orig/dir) + dir*t = dir(orig+t)
         const sse_t child_pkenter_uvminmax = add4(fi.uvminmax_orig, 
mul4(fi.uvminmax_dir, child_tkenter));
         const sse_t child_pkexit_uvminmax = add4(fi.uvminmax_orig, 
mul4(fi.uvminmax_dir, child_tkexit));
-        
+
         #if DBGP
         cerr << "child_pkenter_uvminmax "; simd_cerr(child_pkenter_uvminmax);
-        cerr << "child_pkexit_uvminmax "; simd_cerr(child_pkexit_uvminmax);  
      
+        cerr << "child_pkexit_uvminmax "; simd_cerr(child_pkexit_uvminmax);
         #endif
 
         sse_union tmp_min, tmp_max;
         tmp_min.sse = min4(child_pkenter_uvminmax, child_pkexit_uvminmax);
         tmp_max.sse = max4(child_pkenter_uvminmax, child_pkexit_uvminmax);
-        
+
         #if DBGP
         cerr << "tmp_min "; simd_cerr(tmp_min.sse);
         cerr << "tmp_max "; simd_cerr(tmp_max.sse);
         #endif
-        
+
         const float umin = MIN(tmp_min.f[3], tmp_min.f[1]);
         const float vmin = MIN(tmp_min.f[2], tmp_min.f[0]);
         const float umax = MAX(tmp_max.f[3], tmp_max.f[1]);
         const float vmax = MAX(tmp_max.f[2], tmp_max.f[0]);
         sse_t sse_fuvminmax = set44(umin, vmin, umax, vmax);
-        
+
         #if DBGP
         cerr << "sse_fuvminmax (before clamp) = "; simd_cerr(sse_fuvminmax);
         #endif
-    
-               sse_fuvminmax = sub4(sse_fuvminmax, set44(cell[U], cell[V], 
cell[U], cell[V]));
-               sse_fuvminmax = mul4(sse_fuvminmax, 
set4(octdata->get_inv_child_bit_depth(depth)));
-               sse_fuvminmax = max4(sse_fuvminmax, set44(0.0f, 0.0f, 
-9.9e9999f, -9.9e9999f));
-               sse_fuvminmax = min4(sse_fuvminmax, set44(9.9e9999f, 
9.9e9999f, 1.0f, 1.0f));
-               
+
+                sse_fuvminmax = sub4(sse_fuvminmax, set44(cell[U], cell[V], 
cell[U], cell[V]));
+                sse_fuvminmax = mul4(sse_fuvminmax, 
set4(octdata->get_inv_child_bit_depth(depth)));
+                sse_fuvminmax = max4(sse_fuvminmax, set44(0.0f, 0.0f, 
-9.9e9999f, -9.9e9999f));
+                sse_fuvminmax = min4(sse_fuvminmax, set44(9.9e9999f, 
9.9e9999f, 1.0f, 1.0f));
+
         #if DBGP
         cerr << "sse_fuvminmax (after clamp) = "; simd_cerr(sse_fuvminmax);
         #endif
-        
+
         sse_int_union iuvminmax;
-               
-               //convert to int
-               iuvminmax.ssei = _mm_cvttps_epi32(sse_fuvminmax);
-        
+
+                //convert to int
+                iuvminmax.ssei = _mm_cvttps_epi32(sse_fuvminmax);
+
         #if DBGP
         cerr << "iuvminmax = " << iuvminmax.i[0] << ", " << iuvminmax.i[1] 
<< ", " << iuvminmax.i[2] << ", " << iuvminmax.i[3];
         cerr << endl << endl;
         #endif
 
-               for(int u= (DU==1 ? iuvminmax.i[3] : iuvminmax.i[1]); (DU==1 
? u <= iuvminmax.i[1] : u >= iuvminmax.i[3]); u += DU)
-               {
-                       int tc_U;
-                       if (u)
-                       {
-                               tc_U = axis_table[U];
-                               child_cell[U] = cell[U] | child_bit;
-                       }
-                       else
-                       {
-                               tc_U = 0;
-                               child_cell[U] = cell[U];        
-                       }
-
-                       for(int v= (DV==1 ? iuvminmax.i[2] : iuvminmax.i[0]); 
(DV==1 ? v <= iuvminmax.i[0] : v >= iuvminmax.i[2]); v += DV)
-                       {                               
-                               int tc_V;
-                               if (v)
-                               {
-                                       tc_V = axis_table[V];
-                                       child_cell[V] = cell[V] | child_bit;
-                               }
-                               else
-                               {
-                                       tc_V = 0;
-                                       child_cell[V] = cell[V];        
-                               }
-                               
-                               int target_child = tc_K | tc_U | tc_V;
-                                
+                for(int u= (DU==1 ? iuvminmax.i[3] : iuvminmax.i[1]); (DU==1 
? u <= iuvminmax.i[1] : u >= iuvminmax.i[3]); u += DU)
+                {
+                        int tc_U;
+                        if (u)
+                        {
+                                tc_U = axis_table[U];
+                                child_cell[U] = cell[U] | child_bit;
+                        }
+                        else
+                        {
+                                tc_U = 0;
+                                child_cell[U] = cell[U];
+                        }
+
+                        for(int v= (DV==1 ? iuvminmax.i[2] : 
iuvminmax.i[0]); (DV==1 ? v <= iuvminmax.i[0] : v >= iuvminmax.i[2]); v += DV)
+                        {
+                                int tc_V;
+                                if (v)
+                                {
+                                        tc_V = axis_table[V];
+                                        child_cell[V] = cell[V] | child_bit;
+                                }
+                                else
+                                {
+                                        tc_V = 0;
+                                        child_cell[V] = cell[V];
+                                }
+
+                                int target_child = tc_K | tc_U | tc_V;
+
                 Vec3i local_child_cell = child_cell - leaf_base_cell;
                 if (local_child_cell.data[0] & unsafe_zone || 
local_child_cell.data[1] & unsafe_zone || local_child_cell.data[2] & 
unsafe_zone)
                 {
@@ -1311,7 +1312,7 @@
                         min_rho = max_rho = this_rho = leaf_value;
                         rho[0][0][0] = static_cast<float>(this_rho);
                         Vec3i offset(0,0,child_bit);
-                                                        
+
                         //0,0,1
                         if (target_child & 1)
                         {
@@ -1322,7 +1323,7 @@
                         else
                             this_rho = leaf_value;
                         rho[0][0][1] = static_cast<float>(this_rho);
-                
+
                         //0,1,1
                         offset.data[1] = child_bit;
                         if (target_child & 3)
@@ -1334,7 +1335,7 @@
                         else
                             this_rho = leaf_value;
                         rho[0][1][1] = static_cast<float>(this_rho);
-                        
+
                         //1,1,1
                         offset.data[0] = child_bit;
                         if (target_child & 7)
@@ -1345,8 +1346,8 @@
                         }
                         else
                             this_rho = leaf_value;
-                        rho[1][1][1] = static_cast<float>(this_rho);   
-                        
+                        rho[1][1][1] = static_cast<float>(this_rho);
+
                         //1,1,0
                         offset.data[2] = 0;
                         if (target_child & 6)
@@ -1358,7 +1359,7 @@
                         else
                             this_rho = leaf_value;
                         rho[1][1][0] = static_cast<float>(this_rho);
-                        
+
                         //1,0,0
                         offset.data[1] = 0;
                         if (target_child & 4)
@@ -1368,9 +1369,9 @@
                             max_rho = MAX(max_rho, this_rho);
                         }
                         else
-                            this_rho = leaf_value;     
+                            this_rho = leaf_value;
                         rho[1][0][0] = static_cast<float>(this_rho);
-                        
+
                         //1,0,1
                         offset.data[2] = child_bit;
                         if (target_child & 5)
@@ -1381,8 +1382,8 @@
                         }
                         else
                             this_rho = leaf_value;
-                        rho[1][0][1] = static_cast<float>(this_rho);   
-                    
+                        rho[1][0][1] = static_cast<float>(this_rho);
+
                         //0,1,0
                         offset.data[0] = 0;
                         offset.data[1] = child_bit;
@@ -1395,12 +1396,12 @@
                         }
                         else
                             this_rho = leaf_value;
-                        rho[0][1][0] = static_cast<float>(this_rho);         
          
-#else  
+                        rho[0][1][0] = static_cast<float>(this_rho);
+#else
                         //use original grid data
                         float rho[2][2][2];
                         ST min_rho, max_rho;
-#define MYDATA octdata->indata //toggle this to octdata if you want to test 
pure point location (no neighbor finding)
+#define MYDATA octdata->indata  //toggle this to octdata if you want to test 
pure point location (no neighbor finding)
                         min_rho = max_rho = lookup_safe(MYDATA, 
child_cell.data[0], child_cell.data[1], child_cell.data[2]);
                         rho[0][0][0] = static_cast<float>(min_rho);
                         for(int c=1; c<8; c++)
@@ -1415,8 +1416,8 @@
 #endif
 
                         if (octdata->get_isovalue() >= min_rho && 
octdata->get_isovalue() <= max_rho)
-                        {                        
-                            IsosurfaceImplicit::sse_intersect(srp, newfirst, 
newlast, cmin, cmax, rho, 
+                        {
+                            IsosurfaceImplicit::sse_intersect(srp, newfirst, 
newlast, cmin, cmax, rho,
                                 octdata->get_isovalue(), child_tenter, 
child_texit, hitmask, this, PrimitiveCommon::getMaterial());
                         }
                     }
@@ -1425,10 +1426,10 @@
                         Vector cmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
                         Vector cmax(child_cell.data[0]+child_bit, 
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
                         char newfirst = first_intersects(srp, first, last, 
cmin, cmax);
-                            
+
                         sse_traverse_leaf<K,U,V,DK>(srp, newfirst, last, DU, 
DV, fi,
                             child_cell, stop_depth, depth+1, leaf_depth, 
leaf_value, leaf_base_cell, index_trace);
-    
+
                     }
                     if (srp.activeRays<=0)
                         return;
@@ -1439,174 +1440,174 @@
 }
 
 template<char K, char U, char V, char DK>
-void IsosurfaceOctreeVolume::sse_traverse_cap(SSERayPacket& srp, char first, 
char last, 
+void IsosurfaceOctreeVolume::sse_traverse_cap(SSERayPacket& srp, char first, 
char last,
         char DU, char DV, const FrustumInterval& fi,
         Vec3i& cell, char stop_depth, char depth, unsigned int index, 
unsigned int index_trace[]) const
 {
 #if DBGP
     cerr << "octcap " << index << ", first=" << (int)first << ",last=" << 
(int)last << endl;
-#endif    
+#endif
     OctCap& cap = octdata->get_cap(index);
     int child_bit = octdata->get_child_bit_depth(depth);
     Vec3i child_cell;
     index_trace[depth] = index;
-    Vector pcenter( static_cast<float>(cell[0] | child_bit), 
-                    static_cast<float>(cell[1] | child_bit), 
+    Vector pcenter( static_cast<float>(cell[0] | child_bit),
+                    static_cast<float>(cell[1] | child_bit),
                     static_cast<float>(cell[2] | child_bit));
-                
-    #pragma unroll(2)
+
+    MANTA_UNROLL(2);
     for(int k=0; k<2; k++)
-       {       
+        {
         sse_t child_tkenter;
         sse_t child_tkexit;
         int tc_K;
-                
-               if (k)  //AFTER THE K MIDPLANE
-               {
-                       if (DK)
-                       {
+
+                if (k)  //AFTER THE K MIDPLANE
+                {
+                        if (DK)
+                        {
                 child_tkenter = mul4(sub4(set4(pcenter[K]), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(cell[K]+(child_bit<<1)), 
fi.kminmax_orig), fi.kminmax_invdir);
-                               tc_K = axis_table[K];
-                               child_cell[K] = cell[K] | child_bit;
+                                tc_K = axis_table[K];
+                                child_cell[K] = cell[K] | child_bit;
                 #if DBGP
                 cerr << "kenter = pcenter[K] =" << pcenter[K] << endl;
                 cerr << "kexit = cell[K] + depth_bit =" << 
cell[K]+(child_bit<<1) << endl;
                 #endif
-                       }
-                       else
-                       {
+                        }
+                        else
+                        {
                 child_tkenter = mul4(sub4(set4(pcenter[K]), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(cell[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = 0;
-                               child_cell[K] = cell[K];       
-                #if DBGP     
+                                tc_K = 0;
+                                child_cell[K] = cell[K];
+                #if DBGP
                 cerr << "kenter = pcenter[K] =" << pcenter[K] << endl;
                 cerr << "kexit = cell[K] =" << cell[K] << endl;
                 #endif
-                       }
-                       
-               }
-               else    //BEFORE THE K MIDPLANE
-               {
-                       if (DK)
-                       {
+                        }
+
+                }
+                else    //BEFORE THE K MIDPLANE
+                {
+                        if (DK)
+                        {
                 child_tkenter = mul4(sub4(set4(cell[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(pcenter[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = 0;
-                               child_cell[K] = cell[K];
+                                tc_K = 0;
+                                child_cell[K] = cell[K];
                 #if DBGP
                 cerr << "kenter = cell[K] =" << cell[K] << endl;
                 cerr << "kexit = pcenter[K] =" << pcenter[K] << endl;
                 #endif
-                       }
-                       else
-                       {
+                        }
+                        else
+                        {
                 child_tkenter = mul4(sub4(set4(cell[K]+(child_bit<<1)), 
fi.kminmax_orig), fi.kminmax_invdir);
                 child_tkexit = mul4(sub4(set4(pcenter[K]), fi.kminmax_orig), 
fi.kminmax_invdir);
-                               tc_K = axis_table[K];
-                               child_cell[K] = cell[K] | child_bit;   
+                                tc_K = axis_table[K];
+                                child_cell[K] = cell[K] | child_bit;
                 #if DBGP
                 cerr << "kenter = cell[K]+depth_bit =" << 
cell[K]+(child_bit<<1) << endl;
                 cerr << "kexit = pcenter[K] =" << pcenter[K] << endl;
                 #endif
-                       }
-               }
-        
+                        }
+                }
+
         #if DBGP
         cerr << "child_tkenter "; simd_cerr(child_tkenter);
         cerr << "child_tkexit "; simd_cerr(child_tkexit);
         cerr << "child_pkenter[K] = "; simd_cerr( 
mul4(oneOver(fi.kminmax_invdir), add4(fi.kminmax_orig, child_tkenter)) );
         cerr << "child_pkexit[K] = "; simd_cerr( 
mul4(oneOver(fi.kminmax_invdir), add4(fi.kminmax_orig, child_tkexit)) );
         #endif
-     
+
         //we have child_tkenter, child_tkexit.
         if (_mm_movemask_ps(cmp4_ge(child_tkexit, zero4()))==0)
         {
             #if DBGP
             cerr << "texit was negative; continuing." << endl;
             #endif
-                       continue;
+                        continue;
         }
-         
+
         //find pkenter_uvminmax, pkexit[u], pkexit[v]
         //child_pkenter = dir*(orig/dir) + dir*t = dir(orig+t)
         const sse_t child_pkenter_uvminmax = add4(fi.uvminmax_orig, 
mul4(fi.uvminmax_dir, child_tkenter));
         const sse_t child_pkexit_uvminmax = add4(fi.uvminmax_orig, 
mul4(fi.uvminmax_dir, child_tkexit));
-        
+
         #if DBGP
         cerr << "child_pkenter_uvminmax "; simd_cerr(child_pkenter_uvminmax);
-        cerr << "child_pkexit_uvminmax "; simd_cerr(child_pkexit_uvminmax);  
      
+        cerr << "child_pkexit_uvminmax "; simd_cerr(child_pkexit_uvminmax);
         #endif
 
         sse_union tmp_min, tmp_max;
         tmp_min.sse = min4(child_pkenter_uvminmax, child_pkexit_uvminmax);
         tmp_max.sse = max4(child_pkenter_uvminmax, child_pkexit_uvminmax);
-        
+
         #if DBGP
         cerr << "tmp_min "; simd_cerr(tmp_min.sse);
         cerr << "tmp_max "; simd_cerr(tmp_max.sse);
         #endif
-        
+
         const float umin = MIN(tmp_min.f[3], tmp_min.f[1]);
         const float vmin = MIN(tmp_min.f[2], tmp_min.f[0]);
         const float umax = MAX(tmp_max.f[3], tmp_max.f[1]);
         const float vmax = MAX(tmp_max.f[2], tmp_max.f[0]);
         sse_t sse_fuvminmax = set44(umin, vmin, umax, vmax);
-        
+
         #if DBGP
         cerr << "sse_fuvminmax (before clamp) = "; simd_cerr(sse_fuvminmax);
         #endif
-    
-               sse_fuvminmax = sub4(sse_fuvminmax, set44(cell[U], cell[V], 
cell[U], cell[V]));
-               sse_fuvminmax = mul4(sse_fuvminmax, 
set4(octdata->get_inv_child_bit_depth(depth)));
-               sse_fuvminmax = max4(sse_fuvminmax, set44(0.0f, 0.0f, 
-9.9e9999f, -9.9e9999f));
-               sse_fuvminmax = min4(sse_fuvminmax, set44(9.9e9999f, 
9.9e9999f, 1.0f, 1.0f));
-               
+
+                sse_fuvminmax = sub4(sse_fuvminmax, set44(cell[U], cell[V], 
cell[U], cell[V]));
+                sse_fuvminmax = mul4(sse_fuvminmax, 
set4(octdata->get_inv_child_bit_depth(depth)));
+                sse_fuvminmax = max4(sse_fuvminmax, set44(0.0f, 0.0f, 
-9.9e9999f, -9.9e9999f));
+                sse_fuvminmax = min4(sse_fuvminmax, set44(9.9e9999f, 
9.9e9999f, 1.0f, 1.0f));
+
         #if DBGP
         cerr << "sse_fuvminmax (after clamp) = "; simd_cerr(sse_fuvminmax);
         #endif
-        
+
         sse_int_union iuvminmax;
-               
-               //convert to int
-               iuvminmax.ssei = _mm_cvttps_epi32(sse_fuvminmax);
-        
+
+                //convert to int
+                iuvminmax.ssei = _mm_cvttps_epi32(sse_fuvminmax);
+
         #if DBGP
         cerr << "iuvminmax = " << iuvminmax.i[0] << ", " << iuvminmax.i[1] 
<< ", " << iuvminmax.i[2] << ", " << iuvminmax.i[3];
         cerr << endl << endl;
         #endif
-                       
-               for(int u= (DU==1 ? iuvminmax.i[3] : iuvminmax.i[1]); (DU==1 
? u <= iuvminmax.i[1] : u >= iuvminmax.i[3]); u += DU)
-               {
-                       int tc_U;
-                       if (u)
-                       {
-                               tc_U = axis_table[U];
-                               child_cell[U] = cell[U] | child_bit;
-                       }
-                       else
-                       {
-                               tc_U = 0;
-                               child_cell[U] = cell[U];        
-                       }
-
-                       for(int v= (DV==1 ? iuvminmax.i[2] : iuvminmax.i[0]); 
(DV==1 ? v <= iuvminmax.i[0] : v >= iuvminmax.i[2]); v += DV)
-                       {                               
-                               int tc_V;
-                               if (v)
-                               {
-                                       tc_V = axis_table[V];
-                                       child_cell[V] = cell[V] | child_bit;
-                               }
-                               else
-                               {
-                                       tc_V = 0;
-                                       child_cell[V] = cell[V];        
-                               }
-                               
-                               int target_child = tc_K | tc_U | tc_V;
-                 
+
+                for(int u= (DU==1 ? iuvminmax.i[3] : iuvminmax.i[1]); (DU==1 
? u <= iuvminmax.i[1] : u >= iuvminmax.i[3]); u += DU)
+                {
+                        int tc_U;
+                        if (u)
+                        {
+                                tc_U = axis_table[U];
+                                child_cell[U] = cell[U] | child_bit;
+                        }
+                        else
+                        {
+                                tc_U = 0;
+                                child_cell[U] = cell[U];
+                        }
+
+                        for(int v= (DV==1 ? iuvminmax.i[2] : 
iuvminmax.i[0]); (DV==1 ? v <= iuvminmax.i[0] : v >= iuvminmax.i[2]); v += DV)
+                        {
+                                int tc_V;
+                                if (v)
+                                {
+                                        tc_V = axis_table[V];
+                                        child_cell[V] = cell[V] | child_bit;
+                                }
+                                else
+                                {
+                                        tc_V = 0;
+                                        child_cell[V] = cell[V];
+                                }
+
+                                int target_child = tc_K | tc_U | tc_V;
+
                 sse_t child_tenter[RayPacket::SSE_MaxSize];
                 sse_t child_texit[RayPacket::SSE_MaxSize];
                 sse_t hitmask[RayPacket::SSE_MaxSize];
@@ -1617,7 +1618,7 @@
 #ifndef IOV_BOX_CELLS
                 if (newfirst > newlast)
                     continue;
-                    
+
 #ifdef USE_OCTREE_DATA
                 float rho[2][2][2];
                 ST min_rho, max_rho, this_rho;
@@ -1626,7 +1627,7 @@
                 int prev_depth = depth-1;
                 Vec3i offset(0,0,1);
                 octvol_fill_cell(cap, 1);
-#else  
+#else
                 //use original grid data
                 float rho[2][2][2];
                 ST min_rho, max_rho;
@@ -1642,17 +1643,17 @@
                     min_rho = MIN(this_rho, min_rho);
                     max_rho = MAX(this_rho, max_rho);
                 }
-#endif    
+#endif
 
                 if (octdata->get_isovalue() >= min_rho && 
octdata->get_isovalue() <= max_rho)
                 {
                     //cerr << "in cap " << (unsigned long)(&cap) << ", 
octant " << target_child << endl;
-                    IsosurfaceImplicit::sse_intersect(srp, newfirst, 
newlast, cmin, cmax, rho, 
+                    IsosurfaceImplicit::sse_intersect(srp, newfirst, 
newlast, cmin, cmax, rho,
                         octdata->get_isovalue(), child_tenter, child_texit, 
hitmask, this, PrimitiveCommon::getMaterial());
                     if (srp.activeRays<=0)
                         return;
                 }
-#endif                
+#endif
             }
         }
     }

Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h     (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h     Fri Aug  3 13:01:07 
2007
@@ -6,6 +6,7 @@
 #include <Core/Geometry/Vector.h>
 #include <Core/Geometry/BBox.h>
 #include <Core/Color/Color.h>
+#include <Core/Util/Preprocessor.h>
 #include <Interface/Texture.h>
 #include <Model/Primitives/OctreeVolume.h>
 #include <Interface/RayPacket.h>
@@ -26,93 +27,93 @@
     class IsosurfaceOctreeVolume : public PrimitiveCommon
     {
         const OctreeVolume* octdata;
-        
+
         public:
             IsosurfaceOctreeVolume(OctreeVolume* _octdata, Material* _matl);
             ~IsosurfaceOctreeVolume();
-            
+
             void preprocess( PreprocessContext const &context );
             void computeBounds( PreprocessContext const &context, BBox &box 
) const;
             void intersect( RenderContext const &context, RayPacket &rays ) 
const;
             void computeNormal(const RenderContext& context, RayPacket& 
rays) const;
-            
+
             BBox getBounds() const;
-            
-        private:    
-            
+
+        private:
+
             void single_intersect(RayPacket& rays, int which_one) const;
-            
+
             bool single_traverse_node(RayPacket& rays, int which_one,
                           const Vector& orig, const Vector& dir, const 
Vector& inv_dir, int res,
-                          int depth, unsigned int node_index, unsigned int 
index_trace[], Vec3i& cell, 
+                          int depth, unsigned int node_index, unsigned int 
index_trace[], Vec3i& cell,
                           const float tenter, const float texit) const;
-                          
+
             bool single_traverse_leaf(RayPacket& rays, int which_one,
-                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir, int res, 
+                        const Vector& orig, const Vector& dir, const Vector& 
inv_dir, int res,
                         int depth, int leaf_depth, ST scalar, Vec3i& 
leaf_base_cell,
                         unsigned int index_trace[], Vec3i& cell, const float 
tenter, const float texit) const;
-                        
+
             bool single_traverse_cap(RayPacket& rays, int which_one,
-                          const Vector& orig, const Vector& dir, const 
Vector& inv_dir, int res, 
-                          int depth, unsigned int cap_index, unsigned int 
index_trace[], Vec3i& cell, const float tenter, 
+                          const Vector& orig, const Vector& dir, const 
Vector& inv_dir, int res,
+                          int depth, unsigned int cap_index, unsigned int 
index_trace[], Vec3i& cell, const float tenter,
                           const float texit) const;
-                    
+
 #ifdef MANTA_SSE
             struct FrustumInterval
             {
                 //NOTE: these are all in the order umin, vmin, umax, vmax.
                 //  But specifically, that order is defined by orig and dir.
                 sse_t uvminmax_orig;
-                sse_t uvminmax_dir;                
+                sse_t uvminmax_dir;
                 sse_t uvminmax_invdir;
-                             
+
                 sse_t kminmax_orig;  //kmin_xforig, kmin_xforig, 
kmax_xforig, kmax_xforig
                 sse_t kminmax_dir;
-                sse_t kminmax_invdir;  //kmin_invdir, kmin_invdir, 
kmax_invdir, kmax_invdir               
+                sse_t kminmax_invdir;  //kmin_invdir, kmin_invdir, 
kmax_invdir, kmax_invdir
             };
 
             void packet_intersect_sse(RayPacket& rays) const;
-            
+
             template<char K, char U, char V, char DK>
-            void sse_traverse(SSERayPacket& srp, char first, char last, 
+            void sse_traverse(SSERayPacket& srp, char first, char last,
                 char DU, char DV) const;
-                            
+
             template<char K, char U, char V, char DK>
             void sse_traverse_node(SSERayPacket& srp,
                 char first, char last, char DU, char DV,
                 const FrustumInterval& fi,
-                Vec3i& cell, char stop_depth, char depth, unsigned int 
index, 
+                Vec3i& cell, char stop_depth, char depth, unsigned int index,
                 unsigned int index_trace[]) const;
-                
+
             template<char K, char U, char V, char DK>
             void sse_traverse_leaf(SSERayPacket& srp, char first, char last, 
char DU, char DV,
-                const FrustumInterval& fi, const Vec3i& cell, int 
stop_depth, int depth, 
+                const FrustumInterval& fi, const Vec3i& cell, int 
stop_depth, int depth,
                 int leaf_depth, ST leaf_value, const Vec3i& leaf_base_cell,
                 unsigned int index_trace[]) const;
-                
+
             template<char K, char U, char V, char DK>
-            void sse_traverse_cap(SSERayPacket& srp, 
+            void sse_traverse_cap(SSERayPacket& srp,
                 char first, char last, char DU, char DV, const 
FrustumInterval& fi,
-                Vec3i& cell, char stop_depth, char depth, unsigned int 
index, 
-                unsigned int index_trace[]) const;    
-            
-            inline char first_intersects(SSERayPacket& srp, char first, char 
last, 
+                Vec3i& cell, char stop_depth, char depth, unsigned int index,
+                unsigned int index_trace[]) const;
+
+            inline char first_intersects(SSERayPacket& srp, char first, char 
last,
                 const Vector& min, const Vector& max) const
             {
                 sse_t boxmin[3];
                 sse_t boxmax[3];
-                #pragma unroll(3)
+                MANTA_UNROLL(3);
                 for(int axis=0; axis<3; axis++)
                 {
                     boxmin[axis] = set4(min[axis]);
                     boxmax[axis] = set4(max[axis]);
-                }    
-            
+                }
+
                 for(char smd=first; smd<=last; smd++)
                 {
                     sse_t t0 = zero4();
                     sse_t t1 = srp.minT[smd];
-                    
+
                     sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
                     const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
                     const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
@@ -122,7 +123,7 @@
                     signs = cmp4_ge(srp.dir[2][smd],zero4());
                     const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
                     const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
-                    
+
                     const sse_t tBoxNearX = mul4(sub4(b0_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
                     const sse_t tBoxNearY = mul4(sub4(b0_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
                     const sse_t tBoxNearZ = mul4(sub4(b0_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
@@ -138,30 +139,30 @@
                     t1 = min4(t1,tBoxFarX);
                     t1 = min4(t1,tBoxFarY);
                     t1 = min4(t1,tBoxFarZ);
-                    
+
                     if (_mm_movemask_ps(cmp4_le(t0,t1)))    //if any hit
                         return smd;
                 }
                 return last+1;
             }
-            
-            inline char last_intersects(SSERayPacket& srp, char first, char 
last, 
+
+            inline char last_intersects(SSERayPacket& srp, char first, char 
last,
                 const Vector& min, const Vector& max) const
             {
                 sse_t boxmin[3];
                 sse_t boxmax[3];
-                #pragma unroll(3)
+                MANTA_UNROLL(3);
                 for(int axis=0; axis<3; axis++)
                 {
                     boxmin[axis] = set4(min[axis]);
                     boxmax[axis] = set4(max[axis]);
-                }    
-            
+                }
+
                 for(char smd=last; smd>=first; smd--)
                 {
                     sse_t t0 = zero4();
                     sse_t t1 = srp.minT[smd];
-                    
+
                     sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
                     const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
                     const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
@@ -171,7 +172,7 @@
                     signs = cmp4_ge(srp.dir[2][smd],zero4());
                     const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
                     const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
-                    
+
                     const sse_t tBoxNearX = mul4(sub4(b0_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
                     const sse_t tBoxNearY = mul4(sub4(b0_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
                     const sse_t tBoxNearZ = mul4(sub4(b0_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
@@ -187,15 +188,15 @@
                     t1 = min4(t1,tBoxFarX);
                     t1 = min4(t1,tBoxFarY);
                     t1 = min4(t1,tBoxFarZ);
-                    
+
                     if (_mm_movemask_ps(cmp4_le(t0,t1)))    //if any hit
                         return smd;
                 }
                 return -1;
-            }            
-            
-            inline void intersect_cap_octant(SSERayPacket& srp, char first, 
char last, 
-                    char& newfirst, char& newlast, const Vector& min, const 
Vector& max, 
+            }
+
+            inline void intersect_cap_octant(SSERayPacket& srp, char first, 
char last,
+                    char& newfirst, char& newlast, const Vector& min, const 
Vector& max,
                     sse_t tenter[], sse_t texit[], sse_t hitmask[]) const
             {
                 sse_t boxmin[3];
@@ -203,20 +204,21 @@
                 sse_t tnear[3];
                 sse_t tfar[3];
                 sse_t dgt0[3];
-                #pragma unroll(3)
+
+                MANTA_UNROLL(3);
                 for(int axis=0; axis<3; axis++)
                 {
                     boxmin[axis] = set4(min[axis]);
                     boxmax[axis] = set4(max[axis]);
-                }   
-            
+                }
+
                 newlast = first;
-                newfirst = last+1;    
+                newfirst = last+1;
                 for(char smd=first; smd<=last; smd++)
                 {
                     tenter[smd] = zero4();
                     texit[smd] = srp.minT[smd];
-                    
+
                     dgt0[0] = cmp4_ge(srp.dir[0][smd],zero4());
                     const sse_t b0_x = mask4(dgt0[0], boxmin[0], boxmax[0]);
                     const sse_t b1_x = mask4(dgt0[0], boxmax[0], boxmin[0]);
@@ -226,7 +228,7 @@
                     dgt0[2] = cmp4_ge(srp.dir[2][smd],zero4());
                     const sse_t b0_z = mask4(dgt0[2], boxmin[2], boxmax[2]);
                     const sse_t b1_z = mask4(dgt0[2], boxmax[2], boxmin[2]);
-                    
+
                     tnear[0] = mul4(sub4(b0_x, srp.orig[0][smd]), 
srp.inv_dir[0][smd]);
                     tnear[1] = mul4(sub4(b0_y, srp.orig[1][smd]), 
srp.inv_dir[1][smd]);
                     tnear[2] = mul4(sub4(b0_z, srp.orig[2][smd]), 
srp.inv_dir[2][smd]);
@@ -242,14 +244,14 @@
                     texit[smd] = min4(texit[smd],tfar[0]);
                     texit[smd] = min4(texit[smd],tfar[1]);
                     texit[smd] = min4(texit[smd],tfar[2]);
-                    
+
                     hitmask[smd] = cmp4_lt(tenter[smd], texit[smd]);
                     if (_mm_movemask_ps(hitmask[smd]))
                     {
                         newfirst = MIN(newfirst, smd);
                         newlast = smd;
-                    } 
-                    
+                    }
+
 #ifdef IOV_BOX_CELLS
                     sse_t hitmask2 = and4(hitmask[smd], cmp4_lt(tenter[smd], 
srp.minT[smd]));
                     srp.minT[smd] = mask4(hitmask2, tenter[smd], 
srp.minT[smd]);
@@ -261,14 +263,14 @@
                             normal[axis] = mask4(cmp4_eq(tenter[smd], 
tnear[axis]), mask4(dgt0[axis], set4(-1.0f), _mm_one), zero4());
                             srp.normal[axis][smd] = mask4(hitmask2, 
normal[axis], srp.normal[axis][smd]);
                         }
-                        
-                        #pragma unroll(3)
+
+                        MANTA_UNROLL(3);
                         for(int axis=0; axis<3; axis++)
                             srp.normal[axis][smd] = mask4(hitmask2, 
normal[axis], srp.normal[axis][smd]);
-                            
+
                         int int_hitmask2 = _mm_movemask_ps(hitmask2);
-                            
-                        #pragma unroll(4);
+
+                        MANTA_UNROLL(4);
                         for(int ray=0; ray<4; ray++)
                         {
                             if (int_hitmask2 & (1<<ray))
@@ -283,7 +285,7 @@
 #endif
                 }
             }
-            
+
 #endif   //MANTA_SSE
     };
 };




  • [MANTA] r1605 - trunk/Model/Primitives, boulos, 08/03/2007

Archive powered by MHonArc 2.6.16.

Top of page