Text archives Help
- From: boulos@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1236 - trunk/Model/Groups
- Date: Wed, 15 Nov 2006 02:19:26 -0700 (MST)
Author: boulos
Date: Wed Nov 15 02:19:24 2006
New Revision: 1236
Modified:
trunk/Model/Groups/DynBVH.cc
trunk/Model/Groups/DynBVH.h
Log:
Ported code from DynBVH to make a fair comparison.
Works properly on my macbook and is still faster than
DynBVH (now for arachne test)
Modified: trunk/Model/Groups/DynBVH.cc
==============================================================================
--- trunk/Model/Groups/DynBVH.cc (original)
+++ trunk/Model/Groups/DynBVH.cc Wed Nov 15 02:19:24 2006
@@ -10,9 +10,14 @@
const float BVH_C_isec = 10.f;
const float BVH_C_trav = 10.f;
+#define USE_DYNBVH_PORTS 1
+
void DynBVH::intersect(const RenderContext& context, RayPacket& rays) const
{
rays.computeInverseDirections();
+#if USE_DYNBVH_PORTS
+ templatedTraverse(context, rays);
+#else
rays.computeSigns();
// compute IntervalArithmetic Data
@@ -74,7 +79,9 @@
}
#else
intersectNode(0,context,rays, ia_data);
-#endif
+#endif // asserted parent or not
+
+#endif // dynbvh port or not
}
#if USE_ASSERTED_PARENT
@@ -128,7 +135,7 @@
// we need not create a subpacket to help it stop early
// actually if we're just one ray it'll look more than it
- // needs to since it will reintersect the firstActive ray... damn
+ // needs to since it will reintersect the firstActive ray...
int lastActive = lastIntersects(node.bounds, rays);
// build a subpacket from firstActive to lastActive (inclusive,
hence +1)
@@ -647,4 +654,578 @@
}
}
return best_eval.event != -1;
+}
+
+
+// begin port from actual DynBVH
+void DynBVH::templatedTraverse(const RenderContext& context, RayPacket&
packet) const {
+ struct StackNode {
+ int nodeID;
+ int firstActive;
+ };
+
+ StackNode nodeStack[64];// ={}; // NOTE(boulos): no need to 0 out
+ int stackPtr = 0;
+
+ int ID = 0;
+ int firstActive = 0;
+
+ const int frontSon[3] = {
+ packet.getDirection(0, 0) > 0 ? 0 : 1,
+ packet.getDirection(0, 1) > 0 ? 0 : 1,
+ packet.getDirection(0, 2) > 0 ? 0 : 1
+ };
+
+ const BVHNode* const nodes = this->nodes;
+ __m128 min_rcp[3];
+ __m128 max_rcp[3];
+
+ RayPacketData* data = packet.data;
+#pragma unroll(3)
+ for (int d = 0; d < 3; d++) {
+ min_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
+ max_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
+#pragma unroll(8)
+ for (int pack = 1; pack < RayPacket::SSE_MaxSize; pack++) {
+ min_rcp[d] = _mm_min_ps(min_rcp[d],
+ _mm_load_ps(&data->inverseDirection[d][pack *
4]));
+ max_rcp[d] = _mm_max_ps(max_rcp[d],
+ _mm_load_ps(&data->inverseDirection[d][pack *
4]));
+ }
+ }
+
+ // do sign check
+
+ int signChecked =
+ ((_mm_movemask_ps(_mm_cmpgt_ps(min_rcp[0], _mm_setzero_ps())) == 0xf) ||
+ (_mm_movemask_ps(_mm_cmplt_ps(max_rcp[0], _mm_setzero_ps())) == 0xf)) &&
+ ((_mm_movemask_ps(_mm_cmpgt_ps(min_rcp[1], _mm_setzero_ps())) == 0xf) ||
+ (_mm_movemask_ps(_mm_cmplt_ps(max_rcp[1], _mm_setzero_ps())) == 0xf)) &&
+ ((_mm_movemask_ps(_mm_cmpgt_ps(min_rcp[2], _mm_setzero_ps())) == 0xf) ||
+ (_mm_movemask_ps(_mm_cmplt_ps(max_rcp[2], _mm_setzero_ps())) == 0xf));
+
+ if (!signChecked) {
+ // variant for non-equal signs
+ for (;;) {
+ const BVHNode& thisNode = nodes[ID];
+ firstActive = firstActivePort(packet, firstActive, thisNode.bounds);
+ if (firstActive < RayPacket::MaxSize) {
+ if (!thisNode.isLeaf()) {
+ // inner node
+ int front = frontSon[thisNode.axis];
+ nodeStack[stackPtr].nodeID = thisNode.child + 1 - front;
+ nodeStack[stackPtr].firstActive = firstActive;
+ stackPtr++;
+ ID = thisNode.child + front;
+ continue;
+ } else {
+ // leaf
+ int lastActive = lastActivePort(packet, firstActive,
thisNode.bounds);
+ // this part has to differ from DynBVH due to requirements
+ // on primitive intersection. Shouldn't hurt though.
+ RayPacket subpacket(packet, firstActive, lastActive+1);
+
+ for (int i = 0; i < thisNode.children; i++ ) {
+ const int object_id = object_ids[thisNode.child+i];
+ this->get(object_id)->intersect(context,subpacket);
+ }
+ }
+ }
+ if (stackPtr <= 0) {
+ return;
+ }
+ --stackPtr;
+ ID = nodeStack[stackPtr].nodeID;
+ firstActive = nodeStack[stackPtr].firstActive;
+ }
+ }
+
+ // sign matches case
+ __m128 min_org[3];
+ __m128 max_org[3];
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ for (int d = 0; d < 3; d++) {
+ min_org[d] = max_org[d] = _mm_load_ps(&data->origin[d][0]);
+ for (int i = 1; i < RayPacket::SSE_MaxSize; i++) {
+ min_org[d] = _mm_min_ps(min_org[d], _mm_load_ps(&data->origin[d][i *
4]));
+ max_org[d] = _mm_max_ps(max_org[d], _mm_load_ps(&data->origin[d][i *
4]));
+ }
+ }
+ }
+
+ __m128 max_t = _mm_load_ps(&data->minT[0]);
+ for (int pack=1; pack < RayPacket::SSE_MaxSize; pack++) {
+ max_t = _mm_max_ps(max_t, _mm_load_ps(&data->minT[pack * 4]));
+ }
+
+ const __m128 signCorrected_max_rcp[3] = {
+ frontSon[0]?max_rcp[0]:min_rcp[0],
+ frontSon[1]?max_rcp[1]:min_rcp[1],
+ frontSon[2]?max_rcp[2]:min_rcp[2]
+ };
+
+ const __m128 signCorrected_min_rcp[3] = {
+ !frontSon[0]?max_rcp[0]:min_rcp[0],
+ !frontSon[1]?max_rcp[1]:min_rcp[1],
+ !frontSon[2]?max_rcp[2]:min_rcp[2]
+ };
+
+ const __m128 signCorrected_max_org[3] = {
+ !frontSon[0]?max_org[0]:min_org[0],
+ !frontSon[1]?max_org[1]:min_org[1],
+ !frontSon[2]?max_org[2]:min_org[2]
+ };
+
+ const __m128 signCorrected_min_org[3] = {
+ frontSon[0]?max_org[0]:min_org[0],
+ frontSon[1]?max_org[1]:min_org[1],
+ frontSon[2]?max_org[2]:min_org[2]
+ };
+
+ for (;;) {
+ const BVHNode& thisNode = nodes[ID];
+ firstActive = firstActiveSameSignFrustumPort(packet, firstActive,
thisNode.bounds, frontSon,
+ signCorrected_min_org,
+ signCorrected_max_org,
+ signCorrected_min_rcp,
+ signCorrected_max_rcp,
+ max_t);
+ if (firstActive < RayPacket::MaxSize) {
+ if (!thisNode.isLeaf()) {
+ // inner node
+ int front = frontSon[thisNode.axis];
+ nodeStack[stackPtr].nodeID = thisNode.child + int(1-front); //
NOTE(boulos): check this...
+ nodeStack[stackPtr].firstActive = firstActive;
+ stackPtr++;
+ ID = thisNode.child + front;
+ continue;
+ } else {
+ int lastActive = lastThatIntersectsSameSignPort(packet,
+ firstActive,
+ thisNode.bounds,
+ frontSon);
+ // Same deal as above.
+ RayPacket subpacket(packet, firstActive, lastActive+1);
+
+ for (int i = 0; i < thisNode.children; i++ ) {
+ const int object_id = object_ids[thisNode.child+i];
+ this->get(object_id)->intersect(context,subpacket);
+ }
+ }
+ }
+ if (stackPtr <= 0) {
+ return;
+ }
+ --stackPtr;
+ ID = nodeStack[stackPtr].nodeID;
+ firstActive = nodeStack[stackPtr].firstActive;
+ }
+}
+
+int DynBVH::firstActivePort(RayPacket& packet, int firstActive, const BBox&
box) const {
+ __m128 box_min_x = _mm_set1_ps(box[0][0]);
+ __m128 box_min_y = _mm_set1_ps(box[0][1]);
+ __m128 box_min_z = _mm_set1_ps(box[0][2]);
+
+ __m128 box_max_x = _mm_set1_ps(box[1][0]);
+ __m128 box_max_y = _mm_set1_ps(box[1][1]);
+ __m128 box_max_z = _mm_set1_ps(box[1][2]);
+
+ __m128 diff_x_min;
+ __m128 diff_y_min;
+ __m128 diff_z_min;
+
+ __m128 diff_x_max;
+ __m128 diff_y_max;
+ __m128 diff_z_max;
+ const RayPacketData* data = packet.data;
+ if (packet.getFlag(RayPacket::ConstantOrigin)) {
+ diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
+ diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
+ diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
+
+ diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
+ diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
+ diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
+ }
+
+ for (int i=firstActive;i < RayPacket::MaxSize; i+=4) {
+ __m128 t0 = _mm_set1_ps(T_EPSILON);
+ __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][i]));
+ diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][i]));
+ diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][i]));
+
+ diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][i]));
+ diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][i]));
+ diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][i]));
+ }
+
+ {
+ const __m128 tBoxMin =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ diff_x_min);
+ const __m128 tBoxMax =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ diff_x_max);
+ t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+ t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+ }
+
+ {
+ const __m128 tBoxMin =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ diff_y_min);
+ const __m128 tBoxMax =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ diff_y_max);
+ t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+ t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+ }
+
+ {
+ const __m128 tBoxMin =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ diff_z_min);
+ const __m128 tBoxMax =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ diff_z_max);
+ t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+ t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+ }
+
+ if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+ return i;
+ }
+
+ return RayPacket::MaxSize;
+}
+
+int DynBVH::lastActivePort(RayPacket& packet, int firstActive, const BBox&
box) const {
+ __m128 box_min_x = _mm_set1_ps(box[0][0]);
+ __m128 box_min_y = _mm_set1_ps(box[0][1]);
+ __m128 box_min_z = _mm_set1_ps(box[0][2]);
+
+ __m128 box_max_x = _mm_set1_ps(box[1][0]);
+ __m128 box_max_y = _mm_set1_ps(box[1][1]);
+ __m128 box_max_z = _mm_set1_ps(box[1][2]);
+
+ __m128 diff_x_min;
+ __m128 diff_y_min;
+ __m128 diff_z_min;
+
+ __m128 diff_x_max;
+ __m128 diff_y_max;
+ __m128 diff_z_max;
+ const RayPacketData* data = packet.data;
+ if (packet.getFlag(RayPacket::ConstantOrigin)) {
+ diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
+ diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
+ diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
+
+ diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
+ diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
+ diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
+ }
+
+ const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;
+ for (int i=last_ray; i > firstActive; i -= 4) {
+ __m128 t0 = _mm_set1_ps(T_EPSILON);
+ __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][i]));
+ diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][i]));
+ diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][i]));
+
+ diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][i]));
+ diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][i]));
+ diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][i]));
+ }
+
+ {
+ const __m128 tBoxMin =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ diff_x_min);
+ const __m128 tBoxMax =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ diff_x_max);
+ t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+ t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+ }
+
+ {
+ const __m128 tBoxMin =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ diff_y_min);
+ const __m128 tBoxMax =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ diff_y_max);
+ t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+ t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+ }
+
+ {
+ const __m128 tBoxMin =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ diff_z_min);
+ const __m128 tBoxMax =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ diff_z_max);
+ t0 = _mm_max_ps(t0, _mm_min_ps(tBoxMin, tBoxMax));
+ t1 = _mm_min_ps(t1, _mm_max_ps(tBoxMin, tBoxMax));
+ }
+
+ // Unlike DynBVH we want to return last active "ray", so the last
+ // entry in this simd is the answer.
+ if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+ return i + 3;
+ }
+
+ return firstActive + 3;
+}
+
+int DynBVH::firstActiveSameSignFrustumPort(RayPacket& packet,
+ const int firstActive,
+ const BBox& box,
+ const int signs[3],
+ const __m128 sc_min_org[3],
+ const __m128 sc_max_org[3],
+ const __m128 sc_min_rcp[3],
+ const __m128 sc_max_rcp[3],
+ const __m128& max_t) const {
+
+ const __m128 box_near_x = _mm_set1_ps(box[signs[0]][0]);
+ const __m128 box_near_y = _mm_set1_ps(box[signs[1]][1]);
+ const __m128 box_near_z = _mm_set1_ps(box[signs[2]][2]);
+
+ const __m128 box_far_x = _mm_set1_ps(box[1-signs[0]][0]);
+ const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
+ const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
+
+ __m128 near_minus_org_x;
+ __m128 near_minus_org_y;
+ __m128 near_minus_org_z;
+
+ __m128 far_minus_org_x;
+ __m128 far_minus_org_y;
+ __m128 far_minus_org_z;
+
+ const RayPacketData* data = packet.data;
+ if (packet.getFlag(RayPacket::ConstantOrigin)) {
+ near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][0]));
+ near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][0]));
+ near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][0]));
+
+ far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][0]));
+ far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][0]));
+ far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][0]));
+ }
+
+ // test first (assumed) packet
+ {
+ const int i = firstActive;
+ __m128 t0 = _mm_set1_ps(T_EPSILON);
+ __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][i]));
+ near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][i]));
+ near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][i]));
+ }
+
+ const __m128 tBoxNearX =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]), near_minus_org_x);
+ const __m128 tBoxNearY =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]), near_minus_org_y);
+ const __m128 tBoxNearZ =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]), near_minus_org_z);
+
+ t0 = _mm_max_ps(t0, tBoxNearX);
+ t0 = _mm_max_ps(t0, tBoxNearY);
+ t0 = _mm_max_ps(t0, tBoxNearZ);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][i]));
+ far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][i]));
+ far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][i]));
+ }
+
+ const __m128 tBoxFarX =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]), far_minus_org_x);
+ const __m128 tBoxFarY =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]), far_minus_org_y);
+ const __m128 tBoxFarZ =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]), far_minus_org_z);
+
+ t1 = _mm_min_ps(t1, tBoxFarX);
+ t1 = _mm_min_ps(t1, tBoxFarY);
+ t1 = _mm_min_ps(t1, tBoxFarZ);
+
+ if (_mm_movemask_ps(_mm_cmple_ps(t0,t1)) != 0x0)
+ return i;
+ }
+
+ // first greedy "maybe hit" test failed.
+
+ if (packet.getFlag(RayPacket::ConstantOrigin)) {
+ __m128 t0 = _mm_set1_ps(T_EPSILON);
+ __m128 t1 = max_t;
+
+ const __m128 tBoxNearX = _mm_mul_ps(sc_max_rcp[0], near_minus_org_x);
+ const __m128 tBoxNearY = _mm_mul_ps(sc_max_rcp[1], near_minus_org_y);
+ const __m128 tBoxNearZ = _mm_mul_ps(sc_max_rcp[2], near_minus_org_z);
+
+ t0 = _mm_max_ps(t0, tBoxNearX);
+ t0 = _mm_max_ps(t0, tBoxNearY);
+ t0 = _mm_max_ps(t0, tBoxNearZ);
+
+ const __m128 tBoxFarX = _mm_mul_ps(sc_min_rcp[0], far_minus_org_x);
+ const __m128 tBoxFarY = _mm_mul_ps(sc_min_rcp[1], far_minus_org_y);
+ const __m128 tBoxFarZ = _mm_mul_ps(sc_min_rcp[2], far_minus_org_z);
+
+ t1 = _mm_min_ps(t1, tBoxFarX);
+ t1 = _mm_min_ps(t1, tBoxFarY);
+ t1 = _mm_min_ps(t1, tBoxFarZ);
+
+ if (_mm_movemask_ps(_mm_cmple_ps(t0,t1)) == 0x0)
+ return RayPacket::MaxSize;
+ } else {
+ // IA for non-constant origin
+ __m128 t0 = _mm_set1_ps(T_EPSILON);
+ __m128 t1 = max_t;
+
+ near_minus_org_x = _mm_sub_ps(box_near_x, sc_max_org[0]);
+ near_minus_org_y = _mm_sub_ps(box_near_y, sc_max_org[1]);
+ near_minus_org_z = _mm_sub_ps(box_near_z, sc_max_org[2]);
+
+ const __m128 tBoxNearX = _mm_mul_ps(sc_max_rcp[0], near_minus_org_x);
+ const __m128 tBoxNearY = _mm_mul_ps(sc_max_rcp[1], near_minus_org_y);
+ const __m128 tBoxNearZ = _mm_mul_ps(sc_max_rcp[2], near_minus_org_z);
+
+ t0 = _mm_max_ps(t0, tBoxNearX);
+ t0 = _mm_max_ps(t0, tBoxNearY);
+ t0 = _mm_max_ps(t0, tBoxNearZ);
+
+ far_minus_org_x = _mm_sub_ps(box_far_x, sc_min_org[0]);
+ far_minus_org_y = _mm_sub_ps(box_far_y, sc_min_org[1]);
+ far_minus_org_z = _mm_sub_ps(box_far_z, sc_min_org[2]);
+
+ const __m128 tBoxFarX = _mm_mul_ps(sc_min_rcp[0], far_minus_org_x);
+ const __m128 tBoxFarY = _mm_mul_ps(sc_min_rcp[1], far_minus_org_y);
+ const __m128 tBoxFarZ = _mm_mul_ps(sc_min_rcp[2], far_minus_org_z);
+
+ t1 = _mm_min_ps(t1, tBoxFarX);
+ t1 = _mm_min_ps(t1, tBoxFarY);
+ t1 = _mm_min_ps(t1, tBoxFarZ);
+
+ if (_mm_movemask_ps(_mm_cmple_ps(t0,t1)) == 0x0)
+ return RayPacket::MaxSize;
+ }
+
+ // frustum culling failed. probably at least one ray hits...
+ for (int i = firstActive + 4; i < RayPacket::MaxSize; i+= 4) {
+ __m128 t0 = _mm_set1_ps(T_EPSILON);
+ __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][i]));
+ near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][i]));
+ near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][i]));
+ }
+
+ const __m128 tBoxNearX =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ near_minus_org_x);
+ const __m128 tBoxNearY =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ near_minus_org_y);
+ const __m128 tBoxNearZ =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ near_minus_org_z);
+
+ t0 = _mm_max_ps(t0, tBoxNearX);
+ t0 = _mm_max_ps(t0, tBoxNearY);
+ t0 = _mm_max_ps(t0, tBoxNearZ);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][i]));
+ far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][i]));
+ far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][i]));
+ }
+
+ const __m128 tBoxFarX =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ far_minus_org_x);
+ const __m128 tBoxFarY =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ far_minus_org_y);
+ const __m128 tBoxFarZ =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ far_minus_org_z);
+
+ t1 = _mm_min_ps(t1, tBoxFarX);
+ t1 = _mm_min_ps(t1, tBoxFarY);
+ t1 = _mm_min_ps(t1, tBoxFarZ);
+ if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+ return i;
+ }
+
+ return RayPacket::MaxSize;
+}
+
+int DynBVH::lastThatIntersectsSameSignPort(RayPacket& packet,
+ const int firstActive,
+ const BBox& box,
+ const int signs[3]) const {
+ const __m128 box_near_x = _mm_set1_ps(box[signs[0]][0]);
+ const __m128 box_near_y = _mm_set1_ps(box[signs[1]][1]);
+ const __m128 box_near_z = _mm_set1_ps(box[signs[2]][2]);
+
+ const __m128 box_far_x = _mm_set1_ps(box[1-signs[0]][0]);
+ const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
+ const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
+
+ __m128 near_minus_org_x;
+ __m128 near_minus_org_y;
+ __m128 near_minus_org_z;
+
+ __m128 far_minus_org_x;
+ __m128 far_minus_org_y;
+ __m128 far_minus_org_z;
+
+ const RayPacketData* data = packet.data;
+ if (packet.getFlag(RayPacket::ConstantOrigin)) {
+ near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][0]));
+ near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][0]));
+ near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][0]));
+
+ far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][0]));
+ far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][0]));
+ far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][0]));
+ }
+
+ // frustum culling failed. probably at least one ray hits...
+ const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;
+ for (int i = last_ray; i > firstActive; i-= 4) {
+ __m128 t0 = _mm_set1_ps(T_EPSILON);
+ __m128 t1 = _mm_load_ps(&data->minT[i]);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][i]));
+ near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][i]));
+ near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][i]));
+ }
+
+ const __m128 tBoxNearX =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ near_minus_org_x);
+ const __m128 tBoxNearY =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ near_minus_org_y);
+ const __m128 tBoxNearZ =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ near_minus_org_z);
+
+ t0 = _mm_max_ps(t0, tBoxNearX);
+ t0 = _mm_max_ps(t0, tBoxNearY);
+ t0 = _mm_max_ps(t0, tBoxNearZ);
+
+ if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][i]));
+ far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][i]));
+ far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][i]));
+ }
+
+ const __m128 tBoxFarX =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[0][i]),
+ far_minus_org_x);
+ const __m128 tBoxFarY =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[1][i]),
+ far_minus_org_y);
+ const __m128 tBoxFarZ =
_mm_mul_ps(_mm_load_ps(&data->inverseDirection[2][i]),
+ far_minus_org_z);
+
+ t1 = _mm_min_ps(t1, tBoxFarX);
+ t1 = _mm_min_ps(t1, tBoxFarY);
+ t1 = _mm_min_ps(t1, tBoxFarZ);
+ if (_mm_movemask_ps(_mm_cmple_ps(t0, t1)) != 0x0)
+ return i + 3;
+ }
+
+ return firstActive + 3;
}
Modified: trunk/Model/Groups/DynBVH.h
==============================================================================
--- trunk/Model/Groups/DynBVH.h (original)
+++ trunk/Model/Groups/DynBVH.h Wed Nov 15 02:19:24 2006
@@ -128,6 +128,27 @@
int axis,
BVHCostEval& eval);
+ // DynBVH ports
+ void templatedTraverse(const RenderContext& context, RayPacket&
packet) const;
+
+ int firstActivePort(RayPacket& packet, int firstActive, const BBox&
box) const;
+ int lastActivePort(RayPacket& packet, int firstActive, const BBox&
box) const;
+
+ int firstActiveSameSignFrustumPort(RayPacket& packet,
+ const int firstActive,
+ const BBox& bounds,
+ const int signs[3],
+ const __m128 sc_min_org[3],
+ const __m128 sc_max_org[3],
+ const __m128 sc_min_rcp[3],
+ const __m128 sc_max_rcp[3],
+ const __m128& max_t) const;
+
+ int lastThatIntersectsSameSignPort(RayPacket& packet,
+ const int firstActive,
+ const BBox& box,
+ const int signs[3]) const;
+
};
};
- [MANTA] r1236 - trunk/Model/Groups, boulos, 11/15/2006
Archive powered by MHonArc 2.6.16.