Text archives Help
- From: "Solomon Boulos" <boulos@cs.utah.edu>
- To: manta@sci.utah.edu
- Subject: [Manta] r2140 - trunk/Model/Groups
- Date: Mon, 3 Mar 2008 19:43:14 -0700 (MST)
Author: boulos
Date: Mon Mar 3 19:43:13 2008
New Revision: 2140
Modified:
trunk/Model/Groups/DynBVH.cc
Log:
Model/Groups/DynBVH.cc
Do the IA calculation in SSE too.
Modified: trunk/Model/Groups/DynBVH.cc
==============================================================================
--- trunk/Model/Groups/DynBVH.cc (original)
+++ trunk/Model/Groups/DynBVH.cc Mon Mar 3 19:43:13 2008
@@ -77,17 +77,127 @@
// compute IntervalArithmetic Data
IAData ia_data;
- for (int axis = 0; axis < 3; axis++ )
- {
+ for (int axis = 0; axis < 3; axis++ ) {
ia_data.min_rcp[axis] = std::numeric_limits<float>::max();
ia_data.max_rcp[axis] = -std::numeric_limits<float>::max();
ia_data.min_org[axis] = std::numeric_limits<float>::max();
ia_data.max_org[axis] = -std::numeric_limits<float>::max();
ia_data.min_org_rcp[axis] = std::numeric_limits<float>::max();
ia_data.max_org_rcp[axis] = -std::numeric_limits<float>::max();
+ }
+
+#ifdef MANTA_SSE
+ int b = (rays.begin() + 3) & ~3;
+ int e = (rays.end()) & ~3;
+ if (b >= e) {
+ for (int ray = rays.begin(); ray < rays.end(); ray++ ) {
+#if TEST_MASKS
+ if (rays.rayIsMasked(ray)) continue;
+#endif
+ for (int axis = 0; axis < 3; axis++) {
+ const Real new_rcp = rays.getInverseDirection(ray, axis);
+ const Real new_org = rays.getOrigin(ray,axis);
+ const Real new_org_rcp = new_org * new_rcp;
+
+ ia_data.min_rcp[axis] = (ia_data.min_rcp[axis] < new_rcp) ?
ia_data.min_rcp[axis] : new_rcp;
+ ia_data.max_rcp[axis] = (ia_data.max_rcp[axis] < new_rcp) ? new_rcp
: ia_data.max_rcp[axis];
+
+ ia_data.min_org[axis] = (ia_data.min_org[axis] < new_org) ?
ia_data.min_org[axis] : new_org;
+ ia_data.max_org[axis] = (ia_data.max_org[axis] < new_org) ? new_org
: ia_data.max_org[axis];
+
+ ia_data.min_org_rcp[axis] = (ia_data.min_org_rcp[axis] <
new_org_rcp) ?
+ ia_data.min_org_rcp[axis] : new_org_rcp;
+ ia_data.max_org_rcp[axis] = (ia_data.max_org_rcp[axis] <
new_org_rcp) ?
+ new_org_rcp : ia_data.max_org_rcp[axis];
+ }
}
+ } else {
+ for (int ray = rays.begin(); ray < b; ray++) {
+#if TEST_MASKS
+ if (rays.rayIsMasked(ray)) continue;
+#endif
+ for (int axis = 0; axis < 3; axis++) {
+ const Real new_rcp = rays.getInverseDirection(ray, axis);
+ const Real new_org = rays.getOrigin(ray,axis);
+ const Real new_org_rcp = new_org * new_rcp;
+
+ ia_data.min_rcp[axis] = (ia_data.min_rcp[axis] < new_rcp) ?
ia_data.min_rcp[axis] : new_rcp;
+ ia_data.max_rcp[axis] = (ia_data.max_rcp[axis] < new_rcp) ? new_rcp
: ia_data.max_rcp[axis];
- // TODO(boulos): provide an SSE version
+ ia_data.min_org[axis] = (ia_data.min_org[axis] < new_org) ?
ia_data.min_org[axis] : new_org;
+ ia_data.max_org[axis] = (ia_data.max_org[axis] < new_org) ? new_org
: ia_data.max_org[axis];
+
+ ia_data.min_org_rcp[axis] = (ia_data.min_org_rcp[axis] <
new_org_rcp) ?
+ ia_data.min_org_rcp[axis] : new_org_rcp;
+ ia_data.max_org_rcp[axis] = (ia_data.max_org_rcp[axis] <
new_org_rcp) ?
+ new_org_rcp : ia_data.max_org_rcp[axis];
+ }
+ }
+ __m128 min_rcp[3];
+ __m128 max_rcp[3];
+ __m128 min_org[3];
+ __m128 max_org[3];
+ __m128 min_org_rcp[3];
+ __m128 max_org_rcp[3];
+ // Copy current values
+ for (int axis = 0; axis < 3; axis++) {
+ min_rcp[axis] = _mm_set1_ps(ia_data.min_rcp[axis]);
+ max_rcp[axis] = _mm_set1_ps(ia_data.max_rcp[axis]);
+ min_org[axis] = _mm_set1_ps(ia_data.min_org[axis]);
+ max_org[axis] = _mm_set1_ps(ia_data.max_org[axis]);
+ min_org_rcp[axis] = _mm_set1_ps(ia_data.min_org_rcp[axis]);
+ max_org_rcp[axis] = _mm_set1_ps(ia_data.max_org_rcp[axis]);
+ }
+ // Loop over all rays over all 3 axes (note change in order for
+ // cache friendly walk down the directions and origins)
+ for (int axis = 0; axis < 3; axis++) {
+ for (int ray = b; ray < e; ray += 4) {
+ __m128 new_rcp =
_mm_load_ps(&(rays.data->inverseDirection[axis][ray]));
+ __m128 new_org = _mm_load_ps(&(rays.data->origin[axis][ray]));
+ __m128 new_org_rcp = _mm_mul_ps(new_org, new_rcp);
+
+ min_rcp[axis] = _mm_min_ps(min_rcp[axis], new_rcp);
+ max_rcp[axis] = _mm_max_ps(max_rcp[axis], new_rcp);
+ min_org[axis] = _mm_min_ps(min_org[axis], new_org);
+ max_org[axis] = _mm_max_ps(max_org[axis], new_org);
+ min_org_rcp[axis] = _mm_min_ps(min_org_rcp[axis], new_org_rcp);
+ max_org_rcp[axis] = _mm_max_ps(max_org_rcp[axis], new_org_rcp);
+ }
+ }
+ // Copy the results back out
+ for (int axis = 0; axis < 3; axis++) {
+ ia_data.min_rcp[axis] = min4f(min_rcp[axis]);
+ ia_data.max_rcp[axis] = max4f(max_rcp[axis]);
+
+ ia_data.min_org[axis] = min4f(min_org[axis]);
+ ia_data.max_org[axis] = max4f(max_org[axis]);
+
+ ia_data.min_org_rcp[axis] = min4f(min_org_rcp[axis]);
+ ia_data.max_org_rcp[axis] = max4f(max_org_rcp[axis]);
+ }
+ for (int ray = e; ray < rays.end(); ray++) {
+#if TEST_MASKS
+ if (rays.rayIsMasked(ray)) continue;
+#endif
+ for (int axis = 0; axis < 3; axis++) {
+ const Real new_rcp = rays.getInverseDirection(ray, axis);
+ const Real new_org = rays.getOrigin(ray,axis);
+ const Real new_org_rcp = new_org * new_rcp;
+
+ ia_data.min_rcp[axis] = (ia_data.min_rcp[axis] < new_rcp) ?
ia_data.min_rcp[axis] : new_rcp;
+ ia_data.max_rcp[axis] = (ia_data.max_rcp[axis] < new_rcp) ? new_rcp
: ia_data.max_rcp[axis];
+
+ ia_data.min_org[axis] = (ia_data.min_org[axis] < new_org) ?
ia_data.min_org[axis] : new_org;
+ ia_data.max_org[axis] = (ia_data.max_org[axis] < new_org) ? new_org
: ia_data.max_org[axis];
+
+ ia_data.min_org_rcp[axis] = (ia_data.min_org_rcp[axis] <
new_org_rcp) ?
+ ia_data.min_org_rcp[axis] : new_org_rcp;
+ ia_data.max_org_rcp[axis] = (ia_data.max_org_rcp[axis] <
new_org_rcp) ?
+ new_org_rcp : ia_data.max_org_rcp[axis];
+ }
+ }
+ }
+#else
for (int ray = rays.begin(); ray < rays.end(); ray++ ) {
#if TEST_MASKS
if (rays.rayIsMasked(ray)) continue;
@@ -109,6 +219,7 @@
new_org_rcp : ia_data.max_org_rcp[axis];
}
}
+#endif
intersectNode(0,context,rays, ia_data);
- [Manta] r2140 - trunk/Model/Groups, Solomon Boulos, 03/03/2008
Archive powered by MHonArc 2.6.16.