Text archives Help
- From: boulos@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1578 - in trunk: Core/Util Model/Groups
- Date: Mon, 30 Jul 2007 13:03:14 -0600 (MDT)
Author: boulos
Date: Mon Jul 30 13:03:14 2007
New Revision: 1578
Added:
trunk/Core/Util/Preprocessor.h
Modified:
trunk/Model/Groups/DynBVH.cc
Log:
Core/Util/Preprocessor.h
Adding an ISO C99 compliant way of doing general pragmas and causing
unrolling to be ignored under GCC.
Still need to test that ICC still works (as I sadly don't have it on my mac)
Model/Groups/DynBVH.cc
DynBVH now warning free.
Added: trunk/Core/Util/Preprocessor.h
==============================================================================
--- (empty file)
+++ trunk/Core/Util/Preprocessor.h Mon Jul 30 13:03:14 2007
@@ -0,0 +1,22 @@
+#ifndef MANTA_CORE_UTIL_PREPROCESSOR_H_
+#define MANTA_CORE_UTIL_PREPROCESSOR_H_
+
+// NOTE(boulos): ISO C99 defines _Pragma to let you do this.
+
+#define MANTA_PRAGMA(str) _Pragma (#str)
+
+#if defined(__INTEL_COMPILER)
+#define MANTA_UNROLL(unroll_amount) _Pragma (unroll unroll_amount)
+#else
+// NOTE(boulos): Assuming GCC
+#define MANTA_UNROLL(unroll_amount)
+#endif
+
+#if defined(__INTEL_COMPILER)
+#define MANTA_FORCEINLINE __forceinline
+#else
+// NOTE(boulos): Assuming GCC
+#define MANTA_FORCEINLINE __attribute__ ((always_inline))
+#endif
+
+#endif // MANTA_CORE_UTIL_MANTA_PREPROCESSOR_H_
Modified: trunk/Model/Groups/DynBVH.cc
==============================================================================
--- trunk/Model/Groups/DynBVH.cc (original)
+++ trunk/Model/Groups/DynBVH.cc Mon Jul 30 13:03:14 2007
@@ -1,4 +1,5 @@
#include <Model/Groups/DynBVH.h>
+#include <Core/Util/Preprocessor.h>
#include <DynBVH_Parameters.h>
#include <stdio.h>
#include <stdlib.h>
@@ -497,9 +498,9 @@
int bestAxis = -1;
int split = -1;
- int num_objects = objectEnd - objectBegin;
#if USE_APPROXIMATE_BUILD // 30% slower for certain datasets.
+ int num_objects = objectEnd - objectBegin;
if (num_objects < 32) {
split = partitionSAH(context, objectBegin,objectEnd,bestAxis);
} else {
@@ -609,6 +610,8 @@
BVHCostEval best_cost;
best_cost.cost = BVH_C_isec * num_objects;
best_cost.axis = -1;
+ best_cost.position = FLT_MAX;
+ best_cost.event = -1;
// TODO(boulos): Avoid recomputing overall bounds for sample
// positions by passing in from parent.
@@ -760,6 +763,8 @@
BVHCostEval best_cost;
best_cost.cost = BVH_C_isec * num_objects;
best_cost.axis = -1;
+ best_cost.position = FLT_MAX;
+ best_cost.event = -1;
for ( int axis = 0; axis < 3; axis++ )
{
@@ -923,11 +928,12 @@
__m128 max_rcp[3];
RayPacketData* data = packet.data;
-#pragma unroll(3)
+ MANTA_UNROLL(3);
for (int d = 0; d < 3; d++) {
min_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
max_rcp[d] = _mm_load_ps(&data->inverseDirection[d][0]);
-#pragma unroll(8)
+
+ MANTA_UNROLL(8);
for (int pack = 1; pack < RayPacket::SSE_MaxSize; pack++) {
min_rcp[d] = _mm_min_ps(min_rcp[d],
_mm_load_ps(&data->inverseDirection[d][pack *
4]));
@@ -1074,29 +1080,25 @@
__m128 box_max_y = _mm_set1_ps(box[1][1]);
__m128 box_max_z = _mm_set1_ps(box[1][2]);
- __m128 diff_x_min;
- __m128 diff_y_min;
- __m128 diff_z_min;
-
- __m128 diff_x_max;
- __m128 diff_y_max;
- __m128 diff_z_max;
const RayPacketData* data = packet.data;
- if (packet.getFlag(RayPacket::ConstantOrigin)) {
- diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
- diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
- diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
-
- diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
- diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
- diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
- }
+ const bool constant_origin = packet.getFlag(RayPacket::ConstantOrigin);
+
+ // NOTE(boulos): To get rid of a warning, we'll just do this one
+ // always. TODO(boulos): Make this code correctly handle variable
+ // sized ray packets.
+ __m128 diff_x_min = _mm_sub_ps(box_min_x,
_mm_load_ps(&data->origin[0][0]));;
+ __m128 diff_y_min = _mm_sub_ps(box_min_y,
_mm_load_ps(&data->origin[1][0]));;
+ __m128 diff_z_min = _mm_sub_ps(box_min_z,
_mm_load_ps(&data->origin[2][0]));;
+
+ __m128 diff_x_max = _mm_sub_ps(box_max_x,
_mm_load_ps(&data->origin[0][0]));;
+ __m128 diff_y_max = _mm_sub_ps(box_max_y,
_mm_load_ps(&data->origin[1][0]));;
+ __m128 diff_z_max = _mm_sub_ps(box_max_z,
_mm_load_ps(&data->origin[2][0]));;
for (int i=firstActive;i < RayPacket::MaxSize; i+=4) {
__m128 t0 = _mm_set1_ps(T_EPSILON);
__m128 t1 = _mm_load_ps(&data->minT[i]);
- if (!packet.getFlag(RayPacket::ConstantOrigin)) {
+ if (!constant_origin) {
diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][i]));
diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][i]));
diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][i]));
@@ -1148,23 +1150,15 @@
__m128 box_max_y = _mm_set1_ps(box[1][1]);
__m128 box_max_z = _mm_set1_ps(box[1][2]);
- __m128 diff_x_min;
- __m128 diff_y_min;
- __m128 diff_z_min;
-
- __m128 diff_x_max;
- __m128 diff_y_max;
- __m128 diff_z_max;
const RayPacketData* data = packet.data;
- if (packet.getFlag(RayPacket::ConstantOrigin)) {
- diff_x_min = _mm_sub_ps(box_min_x, _mm_load_ps(&data->origin[0][0]));
- diff_y_min = _mm_sub_ps(box_min_y, _mm_load_ps(&data->origin[1][0]));
- diff_z_min = _mm_sub_ps(box_min_z, _mm_load_ps(&data->origin[2][0]));
-
- diff_x_max = _mm_sub_ps(box_max_x, _mm_load_ps(&data->origin[0][0]));
- diff_y_max = _mm_sub_ps(box_max_y, _mm_load_ps(&data->origin[1][0]));
- diff_z_max = _mm_sub_ps(box_max_z, _mm_load_ps(&data->origin[2][0]));
- }
+
+ __m128 diff_x_min = _mm_sub_ps(box_min_x,
_mm_load_ps(&data->origin[0][0]));
+ __m128 diff_y_min = _mm_sub_ps(box_min_y,
_mm_load_ps(&data->origin[1][0]));
+ __m128 diff_z_min = _mm_sub_ps(box_min_z,
_mm_load_ps(&data->origin[2][0]));
+
+ __m128 diff_x_max = _mm_sub_ps(box_max_x,
_mm_load_ps(&data->origin[0][0]));
+ __m128 diff_y_max = _mm_sub_ps(box_max_y,
_mm_load_ps(&data->origin[1][0]));
+ __m128 diff_z_max = _mm_sub_ps(box_max_z,
_mm_load_ps(&data->origin[2][0]));
const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;
for (int i=last_ray; i > firstActive; i -= 4) {
@@ -1234,24 +1228,15 @@
const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
- __m128 near_minus_org_x;
- __m128 near_minus_org_y;
- __m128 near_minus_org_z;
-
- __m128 far_minus_org_x;
- __m128 far_minus_org_y;
- __m128 far_minus_org_z;
-
const RayPacketData* data = packet.data;
- if (packet.getFlag(RayPacket::ConstantOrigin)) {
- near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][0]));
- near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][0]));
- near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][0]));
-
- far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][0]));
- far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][0]));
- far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][0]));
- }
+
+ __m128 near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][0]));
+ __m128 near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][0]));
+ __m128 near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][0]));
+
+ __m128 far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][0]));
+ __m128 far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][0]));
+ __m128 far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][0]));
// test first (assumed) packet
{
@@ -1405,24 +1390,15 @@
const __m128 box_far_y = _mm_set1_ps(box[1-signs[1]][1]);
const __m128 box_far_z = _mm_set1_ps(box[1-signs[2]][2]);
- __m128 near_minus_org_x;
- __m128 near_minus_org_y;
- __m128 near_minus_org_z;
-
- __m128 far_minus_org_x;
- __m128 far_minus_org_y;
- __m128 far_minus_org_z;
-
const RayPacketData* data = packet.data;
- if (packet.getFlag(RayPacket::ConstantOrigin)) {
- near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][0]));
- near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][0]));
- near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][0]));
-
- far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][0]));
- far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][0]));
- far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][0]));
- }
+
+ __m128 near_minus_org_x = _mm_sub_ps(box_near_x,
_mm_load_ps(&data->origin[0][0]));
+ __m128 near_minus_org_y = _mm_sub_ps(box_near_y,
_mm_load_ps(&data->origin[1][0]));
+ __m128 near_minus_org_z = _mm_sub_ps(box_near_z,
_mm_load_ps(&data->origin[2][0]));
+
+ __m128 far_minus_org_x = _mm_sub_ps(box_far_x,
_mm_load_ps(&data->origin[0][0]));
+ __m128 far_minus_org_y = _mm_sub_ps(box_far_y,
_mm_load_ps(&data->origin[1][0]));
+ __m128 far_minus_org_z = _mm_sub_ps(box_far_z,
_mm_load_ps(&data->origin[2][0]));
// frustum culling failed. probably at least one ray hits...
const int last_ray = (RayPacket::SSE_MaxSize - 1) * 4;
- [MANTA] r1578 - in trunk: Core/Util Model/Groups, boulos, 07/30/2007
Archive powered by MHonArc 2.6.16.