Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1236 - trunk/Model/Groups


Chronological Thread 
  • From: boulos@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1236 - trunk/Model/Groups
  • Date: Wed, 15 Nov 2006 02:19:26 -0700 (MST)

Author: boulos
Date: Wed Nov 15 02:19:24 2006
New Revision: 1236

Modified:
   trunk/Model/Groups/DynBVH.cc
   trunk/Model/Groups/DynBVH.h
Log:
Ported code from DynBVH to make a fair comparison.
Works properly on my macbook and is still faster than
DynBVH (now for arachne test)


Modified: trunk/Model/Groups/DynBVH.cc
==============================================================================
--- trunk/Model/Groups/DynBVH.cc        (original)
+++ trunk/Model/Groups/DynBVH.cc        Wed Nov 15 02:19:24 2006
@@ -10,9 +10,14 @@
 const float BVH_C_isec = 10.f;
 const float BVH_C_trav = 10.f;
 
+#define USE_DYNBVH_PORTS 1
+
 void DynBVH::intersect(const RenderContext& context, RayPacket& rays) const
 {
     rays.computeInverseDirections();
+#if USE_DYNBVH_PORTS
+    templatedTraverse(context, rays);
+#else
     rays.computeSigns();
 
     // compute IntervalArithmetic Data
@@ -74,7 +79,9 @@
     }
 #else
     intersectNode(0,context,rays, ia_data);
-#endif
+#endif // asserted parent or not
+
+#endif // dynbvh port or not
 }
 
 #if USE_ASSERTED_PARENT
@@ -128,7 +135,7 @@
             // we need not create a subpacket to help it stop early
 
             // actually if we're just one ray it'll look more than it
-            // needs to since it will reintersect the firstActive ray... damn
+            // needs to since it will reintersect the firstActive ray...
             int lastActive = lastIntersects(node.bounds, rays);
 
             // build a subpacket from firstActive to lastActive (inclusive, 
hence +1)
@@ -647,4 +654,578 @@
         }
     }
     return best_eval.event != -1;
+}
+
+
+// begin port from actual DynBVH
+void DynBVH::templatedTraverse(const RenderContext& context, RayPacket& 
packet) const {
+  struct StackNode {
+    int nodeID;
+    int firstActive;
+  };
+
+  StackNode nodeStack[64];// ={}; // NOTE(boulos): no need to 0 out
+  int stackPtr = 0;
+
+  int ID = 0;
+  int firstActive = 0;
+
+  const int frontSon[3] = {
+    packet.getDirection(0, 0) > 0 ? 0 : 1,
+    packet.getDirection(0, 1) > 0 ? 0 : 1,
+    packet.getDirection(0, 2) > 0 ? 0 : 1
+  };
+
+  const BVHNode* const nodes = this->nodes;
+  __m128 min_rcp[3];
+  __m128 max_rcp[3];
+
+  RayPacketData* data = packet.data;
+#pragma unroll(3)
+  for (int d = 0; d < 3; d++) {
+    min_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
+    max_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
+#pragma unroll(8)
+    for (int pack = 1; pack < RayPacket::SSE_MaxSize; pack++) {
+      min_rcp[d] = _mm_min_ps(min_rcp[d],
+                              _mm_load_ps(&data->inverseDirection[d][pack * 
4]));
+      max_rcp[d] = _mm_max_ps(max_rcp[d],
+                              _mm_load_ps(&data->inverseDirection[d][pack * 
4]));
+    }
+  }
+
+  // do sign check
+
+  int signChecked =
+    ((_mm_movemask_ps(_mm_cmpgt_ps(min_rcp[0], _mm_setzero_ps())) == 0xf) ||
+     (_mm_movemask_ps(_mm_cmplt_ps(max_rcp[0], _mm_setzero_ps())) == 0xf)) &&
+    ((_mm_movemask_ps(_mm_cmpgt_ps(min_rcp[1], _mm_setzero_ps())) == 0xf) ||
+     (_mm_movemask_ps(_mm_cmplt_ps(max_rcp[1], _mm_setzero_ps())) == 0xf)) &&
+    ((_mm_movemask_ps(_mm_cmpgt_ps(min_rcp[2], _mm_setzero_ps())) == 0xf) ||
+     (_mm_movemask_ps(_mm_cmplt_ps(max_rcp[2], _mm_setzero_ps())) == 0xf));
+
+  if (!signChecked) {
+    // variant for non-equal signs
+    for (;;) {
+      const BVHNode& thisNode = nodes[ID];
+      firstActive = firstActivePort(packet, firstActive, thisNode.bounds);
+      if (firstActive < RayPacket::MaxSize) {
+        if (!thisNode.isLeaf()) {
+          // inner node
+          int front = frontSon[thisNode.axis];
+          nodeStack[stackPtr].nodeID = thisNode.child + 1 - front;
+          nodeStack[stackPtr].firstActive = firstActive;
+          stackPtr++;
+          ID = thisNode.child + front;
+          continue;
+        } else {
+          // leaf
+          int lastActive = lastActivePort(packet, firstActive, 
thisNode.bounds);
+          // this part has to differ from DynBVH due to requirements
+          // on primitive intersection.  Shouldn't hurt though.
+          RayPacket subpacket(packet, firstActive, lastActive+1);
+
+          for (int i = 0; i < thisNode.children; i++ ) {
+            const int object_id = object_ids[thisNode.child+i];
+            this->get(object_id)->intersect(context,subpacket);
+          }
+        }
+      }
+      if (stackPtr <= 0) {
+        return;
+      }
+      --stackPtr;
+      ID = nodeStack[stackPtr].nodeID;
+      firstActive = nodeStack[stackPtr].firstActive;
+    }
+  }
+
+  // sign matches case
+  __m128 min_org[3];
+  __m128 max_org[3];
+
+  if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+    for (int d = 0; d < 3; d++) {
+      min_org[d] = max_org[d] = _mm_load_ps(&data->origin[d][0]);
+      for (int i = 1; i < RayPacket::SSE_MaxSize; i++) {
+        min_org[d] = _mm_min_ps(min_org[d], _mm_load_ps(&data->origin[d][i * 
4]));
+        max_org[d] = _mm_max_ps(max_org[d], _mm_load_ps(&data->origin[d][i * 
4]));
+      }
+    }
+  }
+
+  __m128 max_t = _mm_load_ps(&data->minT[0]);
+  for (int pack=1; pack < RayPacket::SSE_MaxSize; pack++) {
+    max_t = _mm_max_ps(max_t, _mm_load_ps(&data->minT[pack * 4]));
+  }
+
+  const __m128 signCorrected_max_rcp[3] = {
+    frontSon[0]?max_rcp[0]:min_rcp[0],
+    frontSon[1]?max_rcp[1]:min_rcp[1],
+    frontSon[2]?max_rcp[2]:min_rcp[2]
+  };
+
+  const __m128 signCorrected_min_rcp[3] = {
+    !frontSon[0]?max_rcp[0]:min_rcp[0],
+    !frontSon[1]?max_rcp[1]:min_rcp[1],
+    !frontSon[2]?max_rcp[2]:min_rcp[2]
+  };
+
+  const __m128 signCorrected_max_org[3] = {
+    !frontSon[0]?max_org[0]:min_org[0],
+    !frontSon[1]?max_org[1]:min_org[1],
+    !frontSon[2]?max_org[2]:min_org[2]
+  };
+
+  const __m128 signCorrected_min_org[3] = {
+    frontSon[0]?max_org[0]:min_org[0],
+    frontSon[1]?max_org[1]:min_org[1],
+    frontSon[2]?max_org[2]:min_org[2]
+  };
+
+  for (;;) {
+    const BVHNode& thisNode = nodes[ID];
+    firstActive = firstActiveSameSignFrustumPort(packet, firstActive, 
thisNode.bounds, frontSon,
+                                                 signCorrected_min_org,
+                                                 signCorrected_max_org,
+                                                 signCorrected_min_rcp,
+                                                 signCorrected_max_rcp,
+                                                 max_t);
+    if (firstActive < RayPacket::MaxSize) {
+      if (!thisNode.isLeaf()) {
+        // inner node
+        int front = frontSon[thisNode.axis];
+        nodeStack[stackPtr].nodeID = thisNode.child + int(1-front); // 
NOTE(boulos): check this...
+        nodeStack[stackPtr].firstActive = firstActive;
+        stackPtr++;
+        ID = thisNode.child + front;
+        continue;
+      } else {
+        int lastActive = lastThatIntersectsSameSignPort(packet,
+                                                        firstActive,
+                                                        thisNode.bounds,
+                                                        frontSon);
+        // Same deal as above.
+        RayPacket subpacket(packet, firstActive, lastActive+1);
+
+        for (int i = 0; i < thisNode.children; i++ ) {
+          const int object_id = object_ids[thisNode.child+i];
+          this->get(object_id)->intersect(context,subpacket);
+        }
+      }
+    }
+    if (stackPtr <= 0) {
+      return;
+    }
+    --stackPtr;
+    ID = nodeStack[stackPtr].nodeID;
+    firstActive = nodeStack[stackPtr].firstActive;
+  }
+}
+
+int DynBVH::firstActivePort(RayPacket& packet, int firstActive, const BBox& 
box) const {
+  __m128 box_min_x = _mm_set1_ps(box[0][0]);
+  __m128 box_min_y = _mm_set1_ps(box[0][1]);
+  __m128 box_min_z = _mm_set1_ps(box[0][2]);
+
+  __m128 box_max_x = _mm_set1_ps(box[1][0]);
+  __m128 box_max_y = _mm_set1_ps(box[1][1]);
+  __m128 box_max_z = _mm_set1_ps(box[1][2]);
+
+  __m128 diff_x_min;
+  __m128 diff_y_min;
+  __m128 diff_z_min;
+
+  __m128 diff_x_max;
+  __m128 diff_y_max;
+  __m128 diff_z_max;
+  const RayPacketData* data = packet.data;
+  if (packet.getFlag(RayPacket::ConstantOrigin)) {
+    diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
+    diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
+    diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
+
+    diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
+    diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
+    diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
+  }
+
+  for (int i=firstActive;i < RayPacket::MaxSize; i+=4) {
+    __m128 t0 = _mm_set1_ps(T_EPSILON);
+    __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][i]));
+      diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][i]));
+      diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][i]));
+
+      diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][i]));
+      diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][i]));
+      diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][i]));
+    }
+
+    {
+      const __m128 tBoxMin = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        diff_x_min);
+      const __m128 tBoxMax = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        diff_x_max);
+      t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+      t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+    }
+
+    {
+      const __m128 tBoxMin = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        diff_y_min);
+      const __m128 tBoxMax = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        diff_y_max);
+      t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+      t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+    }
+
+    {
+      const __m128 tBoxMin = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        diff_z_min);
+      const __m128 tBoxMax = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        diff_z_max);
+      t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+      t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+    }
+
+    if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+      return i;
+  }
+
+  return RayPacket::MaxSize;
+}
+
+int DynBVH::lastActivePort(RayPacket& packet, int firstActive, const BBox& 
box) const {
+  __m128 box_min_x = _mm_set1_ps(box[0][0]);
+  __m128 box_min_y = _mm_set1_ps(box[0][1]);
+  __m128 box_min_z = _mm_set1_ps(box[0][2]);
+
+  __m128 box_max_x = _mm_set1_ps(box[1][0]);
+  __m128 box_max_y = _mm_set1_ps(box[1][1]);
+  __m128 box_max_z = _mm_set1_ps(box[1][2]);
+
+  __m128 diff_x_min;
+  __m128 diff_y_min;
+  __m128 diff_z_min;
+
+  __m128 diff_x_max;
+  __m128 diff_y_max;
+  __m128 diff_z_max;
+  const RayPacketData* data = packet.data;
+  if (packet.getFlag(RayPacket::ConstantOrigin)) {
+    diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
+    diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
+    diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
+
+    diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
+    diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
+    diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
+  }
+
+  const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;
+  for (int i=last_ray; i > firstActive; i -= 4) {
+    __m128 t0 = _mm_set1_ps(T_EPSILON);
+    __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][i]));
+      diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][i]));
+      diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][i]));
+
+      diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][i]));
+      diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][i]));
+      diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][i]));
+    }
+
+    {
+      const __m128 tBoxMin = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        diff_x_min);
+      const __m128 tBoxMax = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        diff_x_max);
+      t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+      t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+    }
+
+    {
+      const __m128 tBoxMin = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        diff_y_min);
+      const __m128 tBoxMax = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        diff_y_max);
+      t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+      t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+    }
+
+    {
+      const __m128 tBoxMin = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        diff_z_min);
+      const __m128 tBoxMax = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        diff_z_max);
+      t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+      t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+    }
+
+    // Unlike DynBVH we want to return last active "ray", so the last
+    // entry in this simd is the answer.
+    if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+      return i + 3;
+  }
+
+  return firstActive + 3;
+}
+
+int DynBVH::firstActiveSameSignFrustumPort(RayPacket& packet,
+                                           const int firstActive,
+                                           const BBox& box,
+                                           const int signs[3],
+                                           const __m128 sc_min_org[3],
+                                           const __m128 sc_max_org[3],
+                                           const __m128 sc_min_rcp[3],
+                                           const __m128 sc_max_rcp[3],
+                                           const __m128& max_t) const {
+
+  const __m128 box_near_x = _mm_set1_ps(box[signs[0]][0]);
+  const __m128 box_near_y = _mm_set1_ps(box[signs[1]][1]);
+  const __m128 box_near_z = _mm_set1_ps(box[signs[2]][2]);
+
+  const __m128 box_far_x = _mm_set1_ps(box[1-signs[0]][0]);
+  const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
+  const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
+
+  __m128 near_minus_org_x;
+  __m128 near_minus_org_y;
+  __m128 near_minus_org_z;
+
+  __m128 far_minus_org_x;
+  __m128 far_minus_org_y;
+  __m128 far_minus_org_z;
+
+  const RayPacketData* data = packet.data;
+  if (packet.getFlag(RayPacket::ConstantOrigin)) {
+    near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][0]));
+    near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][0]));
+    near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][0]));
+
+    far_minus_org_x = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][0]));
+    far_minus_org_y = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][0]));
+    far_minus_org_z = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][0]));
+  }
+
+  // test first (assumed) packet
+  {
+    const int i = firstActive;
+    __m128 t0 = _mm_set1_ps(T_EPSILON);
+    __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][i]));
+      near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][i]));
+      near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][i]));
+    }
+
+    const __m128 tBoxNearX = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]), near_minus_org_x);
+    const __m128 tBoxNearY = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]), near_minus_org_y);
+    const __m128 tBoxNearZ = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]), near_minus_org_z);
+
+    t0 = _mm_max_ps(t0, tBoxNearX);
+    t0 = _mm_max_ps(t0, tBoxNearY);
+    t0 = _mm_max_ps(t0, tBoxNearZ);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      far_minus_org_x = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][i]));
+      far_minus_org_y = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][i]));
+      far_minus_org_z = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][i]));
+    }
+
+    const __m128 tBoxFarX = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]), far_minus_org_x);
+    const __m128 tBoxFarY = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]), far_minus_org_y);
+    const __m128 tBoxFarZ = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]), far_minus_org_z);
+
+    t1 = _mm_min_ps(t1, tBoxFarX);
+    t1 = _mm_min_ps(t1, tBoxFarY);
+    t1 = _mm_min_ps(t1, tBoxFarZ);
+
+    if (_mm_movemask_ps(_mm_cmple_ps(t0,t1)) != 0x0)
+      return i;
+  }
+
+  // first greedy "maybe hit" test failed.
+
+  if (packet.getFlag(RayPacket::ConstantOrigin)) {
+    __m128 t0 = _mm_set1_ps(T_EPSILON);
+    __m128 t1 = max_t;
+
+    const __m128 tBoxNearX = _mm_mul_ps(sc_max_rcp[0], near_minus_org_x);
+    const __m128 tBoxNearY = _mm_mul_ps(sc_max_rcp[1], near_minus_org_y);
+    const __m128 tBoxNearZ = _mm_mul_ps(sc_max_rcp[2], near_minus_org_z);
+
+    t0 = _mm_max_ps(t0, tBoxNearX);
+    t0 = _mm_max_ps(t0, tBoxNearY);
+    t0 = _mm_max_ps(t0, tBoxNearZ);
+
+    const __m128 tBoxFarX = _mm_mul_ps(sc_min_rcp[0], far_minus_org_x);
+    const __m128 tBoxFarY = _mm_mul_ps(sc_min_rcp[1], far_minus_org_y);
+    const __m128 tBoxFarZ = _mm_mul_ps(sc_min_rcp[2], far_minus_org_z);
+
+    t1 = _mm_min_ps(t1, tBoxFarX);
+    t1 = _mm_min_ps(t1, tBoxFarY);
+    t1 = _mm_min_ps(t1, tBoxFarZ);
+
+    if (_mm_movemask_ps(_mm_cmple_ps(t0,t1)) == 0x0)
+      return RayPacket::MaxSize;
+  } else {
+    // IA for non-constant origin
+    __m128 t0 = _mm_set1_ps(T_EPSILON);
+    __m128 t1 = max_t;
+
+    near_minus_org_x = _mm_sub_ps(box_near_x, sc_max_org[0]);
+    near_minus_org_y = _mm_sub_ps(box_near_y, sc_max_org[1]);
+    near_minus_org_z = _mm_sub_ps(box_near_z, sc_max_org[2]);
+
+    const __m128 tBoxNearX = _mm_mul_ps(sc_max_rcp[0], near_minus_org_x);
+    const __m128 tBoxNearY = _mm_mul_ps(sc_max_rcp[1], near_minus_org_y);
+    const __m128 tBoxNearZ = _mm_mul_ps(sc_max_rcp[2], near_minus_org_z);
+
+    t0 = _mm_max_ps(t0, tBoxNearX);
+    t0 = _mm_max_ps(t0, tBoxNearY);
+    t0 = _mm_max_ps(t0, tBoxNearZ);
+
+    far_minus_org_x = _mm_sub_ps(box_far_x, sc_min_org[0]);
+    far_minus_org_y = _mm_sub_ps(box_far_y, sc_min_org[1]);
+    far_minus_org_z = _mm_sub_ps(box_far_z, sc_min_org[2]);
+
+    const __m128 tBoxFarX = _mm_mul_ps(sc_min_rcp[0], far_minus_org_x);
+    const __m128 tBoxFarY = _mm_mul_ps(sc_min_rcp[1], far_minus_org_y);
+    const __m128 tBoxFarZ = _mm_mul_ps(sc_min_rcp[2], far_minus_org_z);
+
+    t1 = _mm_min_ps(t1, tBoxFarX);
+    t1 = _mm_min_ps(t1, tBoxFarY);
+    t1 = _mm_min_ps(t1, tBoxFarZ);
+
+    if (_mm_movemask_ps(_mm_cmple_ps(t0,t1)) == 0x0)
+      return RayPacket::MaxSize;
+  }
+
+  // frustum culling failed.  probably at least one ray hits...
+  for (int i = firstActive + 4; i < RayPacket::MaxSize; i+= 4) {
+    __m128 t0 = _mm_set1_ps(T_EPSILON);
+    __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][i]));
+      near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][i]));
+      near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][i]));
+    }
+
+    const __m128 tBoxNearX = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        near_minus_org_x);
+    const __m128 tBoxNearY = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        near_minus_org_y);
+    const __m128 tBoxNearZ = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        near_minus_org_z);
+
+    t0 = _mm_max_ps(t0, tBoxNearX);
+    t0 = _mm_max_ps(t0, tBoxNearY);
+    t0 = _mm_max_ps(t0, tBoxNearZ);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      far_minus_org_x = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][i]));
+      far_minus_org_y = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][i]));
+      far_minus_org_z = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][i]));
+    }
+
+    const __m128 tBoxFarX = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        far_minus_org_x);
+    const __m128 tBoxFarY = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        far_minus_org_y);
+    const __m128 tBoxFarZ = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        far_minus_org_z);
+
+    t1 = _mm_min_ps(t1, tBoxFarX);
+    t1 = _mm_min_ps(t1, tBoxFarY);
+    t1 = _mm_min_ps(t1, tBoxFarZ);
+    if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+      return i;
+  }
+
+  return RayPacket::MaxSize;
+}
+
+int DynBVH::lastThatIntersectsSameSignPort(RayPacket& packet,
+                                           const int firstActive,
+                                           const BBox& box,
+                                           const int signs[3]) const {
+  const __m128 box_near_x = _mm_set1_ps(box[signs[0]][0]);
+  const __m128 box_near_y = _mm_set1_ps(box[signs[1]][1]);
+  const __m128 box_near_z = _mm_set1_ps(box[signs[2]][2]);
+
+  const __m128 box_far_x = _mm_set1_ps(box[1-signs[0]][0]);
+  const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
+  const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
+
+  __m128 near_minus_org_x;
+  __m128 near_minus_org_y;
+  __m128 near_minus_org_z;
+
+  __m128 far_minus_org_x;
+  __m128 far_minus_org_y;
+  __m128 far_minus_org_z;
+
+  const RayPacketData* data = packet.data;
+  if (packet.getFlag(RayPacket::ConstantOrigin)) {
+    near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][0]));
+    near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][0]));
+    near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][0]));
+
+    far_minus_org_x = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][0]));
+    far_minus_org_y = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][0]));
+    far_minus_org_z = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][0]));
+  }
+
+  // frustum culling failed.  probably at least one ray hits...
+  const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;
+  for (int i = last_ray; i > firstActive; i-= 4) {
+    __m128 t0 = _mm_set1_ps(T_EPSILON);
+    __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][i]));
+      near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][i]));
+      near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][i]));
+    }
+
+    const __m128 tBoxNearX = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        near_minus_org_x);
+    const __m128 tBoxNearY = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        near_minus_org_y);
+    const __m128 tBoxNearZ = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        near_minus_org_z);
+
+    t0 = _mm_max_ps(t0, tBoxNearX);
+    t0 = _mm_max_ps(t0, tBoxNearY);
+    t0 = _mm_max_ps(t0, tBoxNearZ);
+
+    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+      far_minus_org_x = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][i]));
+      far_minus_org_y = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][i]));
+      far_minus_org_z = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][i]));
+    }
+
+    const __m128 tBoxFarX = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+                                        far_minus_org_x);
+    const __m128 tBoxFarY = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+                                        far_minus_org_y);
+    const __m128 tBoxFarZ = 
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+                                        far_minus_org_z);
+
+    t1 = _mm_min_ps(t1, tBoxFarX);
+    t1 = _mm_min_ps(t1, tBoxFarY);
+    t1 = _mm_min_ps(t1, tBoxFarZ);
+    if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+      return i + 3;
+  }
+
+  return firstActive + 3;
 }

Modified: trunk/Model/Groups/DynBVH.h
==============================================================================
--- trunk/Model/Groups/DynBVH.h (original)
+++ trunk/Model/Groups/DynBVH.h Wed Nov 15 02:19:24 2006
@@ -128,6 +128,27 @@
                          int axis,
                          BVHCostEval& eval);
 
+      // DynBVH ports
+      void templatedTraverse(const RenderContext& context, RayPacket& 
packet) const;
+
+      int firstActivePort(RayPacket& packet, int firstActive, const BBox& 
box) const;
+      int lastActivePort(RayPacket& packet, int firstActive, const BBox& 
box) const;
+
+      int firstActiveSameSignFrustumPort(RayPacket& packet,
+                                         const int firstActive,
+                                         const BBox& bounds,
+                                         const int signs[3],
+                                         const __m128 sc_min_org[3],
+                                         const __m128 sc_max_org[3],
+                                         const __m128 sc_min_rcp[3],
+                                         const __m128 sc_max_rcp[3],
+                                         const __m128& max_t) const;
+
+      int lastThatIntersectsSameSignPort(RayPacket& packet,
+                                         const int firstActive,
+                                         const BBox& box,
+                                         const int signs[3]) const;
+
     };
 };
 




  • [MANTA] r1236 - trunk/Model/Groups, boulos, 11/15/2006

Archive powered by MHonArc 2.6.16.

Top of page