Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1108 - in trunk: Interface Model/Primitives


Chronological Thread 
  • From: knolla@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1108 - in trunk: Interface Model/Primitives
  • Date: Fri, 9 Jun 2006 06:15:12 -0600 (MDT)

Author: knolla
Date: Fri Jun  9 06:15:10 2006
New Revision: 1108

Modified:
   trunk/Interface/RayPacket.h
   trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
   trunk/Model/Primitives/IsosurfaceOctreeVolume.h
Log:
SSE packets implicit now about as fast as single ray... still needs work

Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Fri Jun  9 06:15:10 2006
@@ -73,7 +73,7 @@
 
     // Int-based arrays
     int whichEye[MaxSize];
-    int signs[3][MaxSize]; // 1=negative, 0=zero, positive
+    MANTA_ALIGN(16) int signs[3][MaxSize]; // 1=negative, 0=zero, positive
 
     // Char-based arrays
     char scratchpad_data[MaxSize][MaxScratchpadSize];
@@ -88,6 +88,7 @@
     sse_t* orig[3];
     sse_t* dir[3];
     sse_t* inv_dir[3];
+    sse_t* signs[3];
     sse_t* normal[3];
     sse_t* minT;
     RayPacket* rp;

Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc    Fri Jun  9 06:15:10 
2006
@@ -765,6 +765,7 @@
         srp.orig[axis] = (sse_t*)(data->origin[axis]);
         srp.dir[axis] = (sse_t*)(data->direction[axis]);
         srp.inv_dir[axis] = (sse_t*)(data->inverseDirection[axis]);
+        srp.signs[axis] = (sse_t*)(data->signs[axis]);
         srp.normal[axis] = (sse_t*)(data->normal[axis]);
     }
     srp.rp = &rays;
@@ -827,13 +828,14 @@
     Vec3i child_cell = cell;
     int child_bit = octdata->get_child_bit_depth(depth);
     index_trace[depth] = index;
+    int smd_first = first << 2;
 
     //intersect all children in order
     #pragma unroll(2)
     for(int midplane_x=0; midplane_x!=2; midplane_x++)
     {
         int target_x;
-        if (midplane_x - srp.rp->getSign(0,0))
+        if (midplane_x - srp.rp->getSign(smd_first,0))
         {
             target_x = 4;
             child_cell.data[0] = cell.data[0] | child_bit;
@@ -847,7 +849,7 @@
         for(int midplane_y=0; midplane_y!=2; midplane_y++)
         {
             int target_xy;
-            if (midplane_y - srp.rp->getSign(0,1))
+            if (midplane_y - srp.rp->getSign(smd_first,1))
             {
                 target_xy = target_x | 2;
                 child_cell.data[1] = cell.data[1] | child_bit;
@@ -861,7 +863,7 @@
             for(int midplane_z=0; midplane_z!=2; midplane_z++)
             {
                 int target_child;
-                if (midplane_z - srp.rp->getSign(0,2))
+                if (midplane_z - srp.rp->getSign(smd_first,2))
                 {
                     target_child = target_xy | 1;
                     child_cell.data[2] = cell.data[2] | child_bit;
@@ -871,36 +873,39 @@
                     target_child = target_xy;
                     child_cell.data[2] = cell.data[2];
                 }

-                Vector pmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
-                Vector pmax(child_cell.data[0]+child_bit, 
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
-                char newfirst = first_intersects_node_octant(srp, first, 
last, pmin, pmax);
                 
-                //cerr << "newfirst=" << (int)newfirst << ", newlast=" << 
(int)newlast << endl;
-                
-                if (newfirst <= last && octdata->get_isovalue() >= 
node.mins[target_child] && octdata->get_isovalue() <= node.maxs[target_child])
+                if (octdata->get_isovalue() >= node.mins[target_child] && 
octdata->get_isovalue() <= node.maxs[target_child])
                 {
-                    if (node.offsets[target_child]==-1)
-                    {
-                        bvh_octleaf(srp, newfirst, last, child_cell, 
stop_depth, depth, node.values[target_child], index_trace);
-                    }
-                    else
+                    Vector pmin(child_cell.data[0], child_cell.data[1], 
child_cell.data[2]);
+                    Vector pmax(child_cell.data[0]+child_bit, 
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
+                    char newfirst = first_intersects_node_octant(srp, first, 
last, pmin, pmax);
+                    
+                    //cerr << "newfirst=" << (int)newfirst << ", last=" << 
(int)last << endl;
+                    
+                    if (newfirst <= last)
                     {
-                        unsigned int child_idx = node.children_start + 
node.offsets[target_child];
-                        if (depth == octdata->get_pre_cap_depth())     //cap
+                        if (node.offsets[target_child]==-1)
                         {
-                            bvh_octcap(srp, newfirst, last, child_cell, 
stop_depth, depth+1, child_idx, index_trace);
+                            bvh_octleaf(srp, newfirst, last, child_cell, 
stop_depth, depth, node.values[target_child], index_trace);
                         }
                         else
                         {
-                            bvh_octnode(srp, newfirst, last, child_cell, 
stop_depth, depth+1, child_idx, index_trace);
+                            unsigned int child_idx = node.children_start + 
node.offsets[target_child];
+                            if (depth == octdata->get_pre_cap_depth()) //cap
+                            {
+                                bvh_octcap(srp, newfirst, last, child_cell, 
stop_depth, depth+1, child_idx, index_trace);
+                            }
+                            else
+                            {
+                                bvh_octnode(srp, newfirst, last, child_cell, 
stop_depth, depth+1, child_idx, index_trace);
+                            }
                         }
                     }
-                }
-                if (srp.activeRays==0)
-                {
-                    //cerr << "early termination" << endl;
-                    return;
+                    if (srp.activeRays==0)
+                    {
+                        //cerr << "early termination" << endl;
+                        return;
+                    }
                 }
             }
         }
@@ -919,13 +924,14 @@
     OctCap& cap = octdata->get_cap(index);
     Vec3i child_cell = cell;
     index_trace[depth] = index;
+    int smd_first = first << 2;
     
     //intersect all children in order
     #pragma unroll(2)
     for(int midplane_x=0; midplane_x<2; midplane_x++)
     {
         int target_x;
-        if (midplane_x - srp.rp->getSign(0,0))
+        if (midplane_x - srp.rp->getSign(smd_first,0))
         {
             target_x = 4;
             child_cell.data[0] = cell.data[0] | 1;
@@ -939,7 +945,7 @@
         for(int midplane_y=0; midplane_y<2; midplane_y++)
         {
             int target_xy;
-            if (midplane_y - srp.rp->getSign(0,1))
+            if (midplane_y - srp.rp->getSign(smd_first,1))
             {
                 target_xy = target_x | 2;
                 child_cell.data[1] = cell.data[1] | 1;
@@ -953,7 +959,7 @@
             for(int midplane_z=0; midplane_z<2; midplane_z++)
             {
                 int target_child;
-                if (midplane_z - srp.rp->getSign(0,2))
+                if (midplane_z - srp.rp->getSign(smd_first,2))
                 {
                     target_child = target_xy | 1;
                     child_cell.data[2] = cell.data[2] | 1;

Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h     (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h     Fri Jun  9 06:15:10 
2006
@@ -71,62 +71,100 @@
             inline char first_intersects_node_octant(SSERayPacket& srp, char 
first, char last, 
                 const Vector& min, const Vector& max) const
             {
+                sse_t boxmin[3];
+                sse_t boxmax[3];
+                #pragma unroll(3)
+                for(int axis=0; axis<3; axis++)
+                {
+                    boxmin[axis] = set4(min[axis]);
+                    boxmax[axis] = set4(max[axis]);
+                }    
+            
                 for(char smd=first; smd<=last; smd++)
                 {
-                    sse_t dgt0[3];
-                    sse_t tnear[3];
-                    sse_t tfar[3];
-                        
-                    #pragma unroll(3)
-                    for(int axis=0; axis<3; axis++)
-                    {
-                        dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());   
    //use signs?
-                        sse_t t0 = mul4(sub4(set4(min[axis]), 
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
-                        sse_t t1 = mul4(sub4(set4(max[axis]), 
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
-                        tnear[axis] = mask4(dgt0[axis], t0, t1);
-                        tfar[axis] = mask4(dgt0[axis], t1, t0);
-                    }
+                    sse_t t0 = zero4();
+                    sse_t t1 = srp.minT[smd];
                     
-                    sse_t tenter = max4(max4(tnear[0], tnear[1]), tnear[2]);
-                    sse_t texit = min4(min4(tfar[0], tfar[1]), tfar[2]);
-                        
-                    if (_mm_movemask_ps(cmp4_le(tenter, texit)))       //if 
any hit
+                    sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+                    const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
+                    const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
+                    signs = cmp4_ge(srp.dir[1][smd],zero4());
+                    const sse_t b0_y = mask4(signs, boxmin[1], boxmax[1]);
+                    const sse_t b1_y = mask4(signs, boxmax[1], boxmin[1]);
+                    signs = cmp4_ge(srp.dir[2][smd],zero4());
+                    const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
+                    const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
+                    
+                    const sse_t tBoxNearX = mul4(sub4(b0_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+                    const sse_t tBoxNearY = mul4(sub4(b0_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+                    const sse_t tBoxNearZ = mul4(sub4(b0_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+                    t0 = max4(t0,tBoxNearX);
+                    t0 = max4(t0,tBoxNearY);
+                    t0 = max4(t0,tBoxNearZ);
+
+                    const sse_t tBoxFarX = mul4(sub4(b1_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+                    const sse_t tBoxFarY = mul4(sub4(b1_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+                    const sse_t tBoxFarZ = mul4(sub4(b1_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+                    t1 = min4(t1,tBoxFarX);
+                    t1 = min4(t1,tBoxFarY);
+                    t1 = min4(t1,tBoxFarZ);
+                    
+                    if (_mm_movemask_ps(cmp4_le(t0,t1)))    //if any hit
                         return smd;
                 }
                 return last+1;
-            } 
+            }
             
             inline void intersect_cap_octant(SSERayPacket& srp, char first, 
char last, 
                     char& newfirst, char& newlast, const Vector& min, const 
Vector& max, 
                     sse_t tenter[], sse_t texit[], sse_t hitmask[]) const
             {
-                #pragma unroll(RayPacket::SSE_MaxSize)
-                for(char smd=first; smd<=last; smd++)
-                    hitmask[smd] = zero4();
-                    
+                sse_t boxmin[3];
+                sse_t boxmax[3];
+                #pragma unroll(3)
+                for(int axis=0; axis<3; axis++)
+                {
+                    boxmin[axis] = set4(min[axis]);
+                    boxmax[axis] = set4(max[axis]);
+                }   
+            
                 newlast = first;
                 newfirst = last+1;    
                 for(char smd=first; smd<=last; smd++)
                 {
-                    sse_t dgt0[3];
-                    sse_t tnear[3];
-                    sse_t tfar[3];
-                        
-                    #pragma unroll(3)
-                    for(int axis=0; axis<3; axis++)
-                    {
-                        dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());   
    //use signs?
-                        sse_t t0 = mul4(sub4(set4(min[axis]), 
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
-                        sse_t t1 = mul4(sub4(set4(max[axis]), 
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
-                        tnear[axis] = mask4(dgt0[axis], t0, t1);
-                        tfar[axis] = mask4(dgt0[axis], t1, t0);
-                    }
+                    tenter[smd] = zero4();
+                    texit[smd] = srp.minT[smd];
                     
-                    tenter[smd] = max4(max4(tnear[0], tnear[1]), tnear[2]);
-                    texit[smd] = min4(min4(tfar[0], tfar[1]), tfar[2]);
+                    sse_t signs = cmp4_ge(srp.dir[0][smd],zero4());
+                    const sse_t b0_x = mask4(signs, boxmin[0], boxmax[0]);
+                    const sse_t b1_x = mask4(signs, boxmax[0], boxmin[0]);
+                    signs = cmp4_ge(srp.dir[1][smd],zero4());
+                    const sse_t b0_y = mask4(signs, boxmin[1], boxmax[1]);
+                    const sse_t b1_y = mask4(signs, boxmax[1], boxmin[1]);
+                    signs = cmp4_ge(srp.dir[2][smd],zero4());
+                    const sse_t b0_z = mask4(signs, boxmin[2], boxmax[2]);
+                    const sse_t b1_z = mask4(signs, boxmax[2], boxmin[2]);
                     
-                    hitmask[smd] = cmp4_le(tenter[smd], texit[smd]);
-                    if (_mm_movemask_ps(hitmask[smd])) //if any hit
+                    const sse_t tBoxNearX = mul4(sub4(b0_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+                    const sse_t tBoxNearY = mul4(sub4(b0_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+                    const sse_t tBoxNearZ = mul4(sub4(b0_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+                    tenter[smd] = max4(tenter[smd],tBoxNearX);
+                    tenter[smd] = max4(tenter[smd],tBoxNearY);
+                    tenter[smd] = max4(tenter[smd],tBoxNearZ);
+
+                    const sse_t tBoxFarX = mul4(sub4(b1_x, 
srp.orig[0][smd]), srp.inv_dir[0][smd]);
+                    const sse_t tBoxFarY = mul4(sub4(b1_y, 
srp.orig[1][smd]), srp.inv_dir[1][smd]);
+                    const sse_t tBoxFarZ = mul4(sub4(b1_z, 
srp.orig[2][smd]), srp.inv_dir[2][smd]);
+
+                    texit[smd] = min4(texit[smd],tBoxFarX);
+                    texit[smd] = min4(texit[smd],tBoxFarY);
+                    texit[smd] = min4(texit[smd],tBoxFarZ);
+                    
+                    hitmask[smd] = cmp4_lt(tenter[smd], texit[smd]);
+                    if (_mm_movemask_ps(hitmask[smd]))
                     {
                         newfirst = MIN(newfirst, smd);
                         newlast = smd;
@@ -164,8 +202,9 @@
 
 #endif
                 }
-            }                 
-#endif                                           
+            }
+            
+#endif   //MANTA_SSE
     };
 };
 




  • [MANTA] r1108 - in trunk: Interface Model/Primitives, knolla, 06/09/2006

Archive powered by MHonArc 2.6.16.

Top of page