Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1578 - in trunk: Core/Util Model/Groups


Chronological Thread 
  • From: boulos@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1578 - in trunk: Core/Util Model/Groups
  • Date: Mon, 30 Jul 2007 13:03:14 -0600 (MDT)

Author: boulos
Date: Mon Jul 30 13:03:14 2007
New Revision: 1578

Added:
   trunk/Core/Util/Preprocessor.h
Modified:
   trunk/Model/Groups/DynBVH.cc
Log:
Core/Util/Preprocessor.h

  Adding an ISO C99 compliant way of doing general pragmas and causing 
  unrolling to be ignored under GCC.  

  Still need to test that ICC still works (as I sadly don't have it on my mac)

Model/Groups/DynBVH.cc

  DynBVH now warning free.


Added: trunk/Core/Util/Preprocessor.h
==============================================================================
--- (empty file)
+++ trunk/Core/Util/Preprocessor.h      Mon Jul 30 13:03:14 2007
@@ -0,0 +1,22 @@
+#ifndef MANTA_CORE_UTIL_PREPROCESSOR_H_
+#define MANTA_CORE_UTIL_PREPROCESSOR_H_
+
+// NOTE(boulos): ISO C99 defines _Pragma to let you do this.
+
+#define MANTA_PRAGMA(str) _Pragma (#str)
+
+#if defined(__INTEL_COMPILER)
+#define MANTA_UNROLL(unroll_amount) _Pragma (unroll unroll_amount)
+#else
+// NOTE(boulos): Assuming GCC
+#define MANTA_UNROLL(unroll_amount)
+#endif
+
+#if defined(__INTEL_COMPILER)
+#define MANTA_FORCEINLINE __forceinline
+#else
+// NOTE(boulos): Assuming GCC
+#define MANTA_FORCEINLINE __attribute__ ((always_inline))
+#endif
+
+#endif // MANTA_CORE_UTIL_MANTA_PREPROCESSOR_H_

Modified: trunk/Model/Groups/DynBVH.cc
==============================================================================
--- trunk/Model/Groups/DynBVH.cc        (original)
+++ trunk/Model/Groups/DynBVH.cc        Mon Jul 30 13:03:14 2007
@@ -1,4 +1,5 @@
 #include <Model/Groups/DynBVH.h>
+#include <Core/Util/Preprocessor.h>
 #include <DynBVH_Parameters.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -497,9 +498,9 @@
 
     int bestAxis = -1;
     int split = -1;
-    int num_objects = objectEnd - objectBegin;
 
 #if USE_APPROXIMATE_BUILD // 30% slower for certain datasets.
+    int num_objects = objectEnd - objectBegin;
     if (num_objects < 32) {
       split = partitionSAH(context, objectBegin,objectEnd,bestAxis);
     } else {
@@ -609,6 +610,8 @@
   BVHCostEval best_cost;
   best_cost.cost = BVH_C_isec * num_objects;
   best_cost.axis = -1;
+  best_cost.position = FLT_MAX;
+  best_cost.event = -1;
 
   // TODO(boulos): Avoid recomputing overall bounds for sample
   // positions by passing in from parent.
@@ -760,6 +763,8 @@
     BVHCostEval best_cost;
     best_cost.cost = BVH_C_isec * num_objects;
     best_cost.axis = -1;
+    best_cost.position = FLT_MAX;
+    best_cost.event = -1;
 
     for ( int axis = 0; axis < 3; axis++ )
     {
@@ -923,11 +928,12 @@
   __m128 max_rcp[3];
 
   RayPacketData* data = packet.data;
-#pragma unroll(3)
+  MANTA_UNROLL(3);
   for (int d = 0; d < 3; d++) {
     min_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
     max_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
-#pragma unroll(8)
+
+    MANTA_UNROLL(8);
     for (int pack = 1; pack < RayPacket::SSE_MaxSize; pack++) {
       min_rcp[d] = _mm_min_ps(min_rcp[d],
                               _mm_load_ps(&data->inverseDirection[d][pack * 
4]));
@@ -1074,29 +1080,25 @@
   __m128 box_max_y = _mm_set1_ps(box[1][1]);
   __m128 box_max_z = _mm_set1_ps(box[1][2]);
 
-  __m128 diff_x_min;
-  __m128 diff_y_min;
-  __m128 diff_z_min;
-
-  __m128 diff_x_max;
-  __m128 diff_y_max;
-  __m128 diff_z_max;
   const RayPacketData* data = packet.data;
-  if (packet.getFlag(RayPacket::ConstantOrigin)) {
-    diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
-    diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
-    diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
-
-    diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
-    diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
-    diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
-  }
+  const bool constant_origin = packet.getFlag(RayPacket::ConstantOrigin);
+
+  // NOTE(boulos): To get rid of a warning, we'll just do this one
+  // always. TODO(boulos): Make this code correctly handle variable
+  // sized ray packets.
+  __m128 diff_x_min = _mm_sub_ps(box_min_x, 
_mm_load_ps(&data->origin[0][0]));;
+  __m128 diff_y_min = _mm_sub_ps(box_min_y, 
_mm_load_ps(&data->origin[1][0]));;
+  __m128 diff_z_min = _mm_sub_ps(box_min_z, 
_mm_load_ps(&data->origin[2][0]));;
+
+  __m128 diff_x_max = _mm_sub_ps(box_max_x, 
_mm_load_ps(&data->origin[0][0]));;
+  __m128 diff_y_max = _mm_sub_ps(box_max_y, 
_mm_load_ps(&data->origin[1][0]));;
+  __m128 diff_z_max = _mm_sub_ps(box_max_z, 
_mm_load_ps(&data->origin[2][0]));;
 
   for (int i=firstActive;i < RayPacket::MaxSize; i+=4) {
     __m128 t0 = _mm_set1_ps(T_EPSILON);
     __m128 t1 = _mm_load_ps(&data->minT[i]);
 
-    if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+    if (!constant_origin) {
       diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][i]));
       diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][i]));
       diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][i]));
@@ -1148,23 +1150,15 @@
   __m128 box_max_y = _mm_set1_ps(box[1][1]);
   __m128 box_max_z = _mm_set1_ps(box[1][2]);
 
-  __m128 diff_x_min;
-  __m128 diff_y_min;
-  __m128 diff_z_min;
-
-  __m128 diff_x_max;
-  __m128 diff_y_max;
-  __m128 diff_z_max;
   const RayPacketData* data = packet.data;
-  if (packet.getFlag(RayPacket::ConstantOrigin)) {
-    diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
-    diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
-    diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
-
-    diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
-    diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
-    diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
-  }
+
+  __m128 diff_x_min = _mm_sub_ps(box_min_x, 
_mm_load_ps(&data->origin[0][0]));
+  __m128 diff_y_min = _mm_sub_ps(box_min_y, 
_mm_load_ps(&data->origin[1][0]));
+  __m128 diff_z_min = _mm_sub_ps(box_min_z, 
_mm_load_ps(&data->origin[2][0]));
+
+  __m128 diff_x_max = _mm_sub_ps(box_max_x, 
_mm_load_ps(&data->origin[0][0]));
+  __m128 diff_y_max = _mm_sub_ps(box_max_y, 
_mm_load_ps(&data->origin[1][0]));
+  __m128 diff_z_max = _mm_sub_ps(box_max_z, 
_mm_load_ps(&data->origin[2][0]));
 
   const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;
   for (int i=last_ray; i > firstActive; i -= 4) {
@@ -1234,24 +1228,15 @@
   const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
   const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
 
-  __m128 near_minus_org_x;
-  __m128 near_minus_org_y;
-  __m128 near_minus_org_z;
-
-  __m128 far_minus_org_x;
-  __m128 far_minus_org_y;
-  __m128 far_minus_org_z;
-
   const RayPacketData* data = packet.data;
-  if (packet.getFlag(RayPacket::ConstantOrigin)) {
-    near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][0]));
-    near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][0]));
-    near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][0]));
-
-    far_minus_org_x = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][0]));
-    far_minus_org_y = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][0]));
-    far_minus_org_z = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][0]));
-  }
+
+  __m128 near_minus_org_x  = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][0]));
+  __m128 near_minus_org_y  = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][0]));
+  __m128 near_minus_org_z  = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][0]));
+
+  __m128 far_minus_org_x   = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][0]));
+  __m128 far_minus_org_y   = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][0]));
+  __m128 far_minus_org_z   = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][0]));
 
   // test first (assumed) packet
   {
@@ -1405,24 +1390,15 @@
   const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
   const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
 
-  __m128 near_minus_org_x;
-  __m128 near_minus_org_y;
-  __m128 near_minus_org_z;
-
-  __m128 far_minus_org_x;
-  __m128 far_minus_org_y;
-  __m128 far_minus_org_z;
-
   const RayPacketData* data = packet.data;
-  if (packet.getFlag(RayPacket::ConstantOrigin)) {
-    near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][0]));
-    near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][0]));
-    near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][0]));
-
-    far_minus_org_x = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][0]));
-    far_minus_org_y = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][0]));
-    far_minus_org_z = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][0]));
-  }
+
+  __m128 near_minus_org_x = _mm_sub_ps(box_near_x, 
_mm_load_ps(&data->origin[0][0]));
+  __m128 near_minus_org_y = _mm_sub_ps(box_near_y, 
_mm_load_ps(&data->origin[1][0]));
+  __m128 near_minus_org_z = _mm_sub_ps(box_near_z, 
_mm_load_ps(&data->origin[2][0]));
+
+  __m128 far_minus_org_x  = _mm_sub_ps(box_far_x, 
_mm_load_ps(&data->origin[0][0]));
+  __m128 far_minus_org_y  = _mm_sub_ps(box_far_y, 
_mm_load_ps(&data->origin[1][0]));
+  __m128 far_minus_org_z  = _mm_sub_ps(box_far_z, 
_mm_load_ps(&data->origin[2][0]));
 
   // frustum culling failed.  probably at least one ray hits...
   const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;





Archive powered by MHonArc 2.6.16.

Top of page