Text archives Help
- From: knolla@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1099 - in trunk: Core Core/Math Interface Model/Groups Model/Intersections Model/Primitives
- Date: Wed, 7 Jun 2006 09:06:16 -0600 (MDT)
Author: knolla
Date: Wed Jun 7 09:06:10 2006
New Revision: 1099
Added:
trunk/Core/Math/SSEDefs.h
Modified:
trunk/Core/CMakeLists.txt
trunk/Interface/RayPacket.h
trunk/Model/Groups/SSEKDTree.cc
trunk/Model/Intersections/IsosurfaceImplicit.cc
trunk/Model/Intersections/IsosurfaceImplicit.h
trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
trunk/Model/Primitives/IsosurfaceOctreeVolume.h
trunk/Model/Primitives/OctreeVolume.h
Log:
implemented first pass at SSE octree using implicit BVH. Slow and buggy.
Modified: trunk/Core/CMakeLists.txt
==============================================================================
--- trunk/Core/CMakeLists.txt (original)
+++ trunk/Core/CMakeLists.txt Wed Jun 7 09:06:10 2006
@@ -48,6 +48,7 @@
Math/Noise.cc
Math/ipow.h
Math/CatmullRomInterpolator.h
+ Math/SSEDefs.h
)
SET (CORE_SOURCES ${CORE_SOURCES}
Util/Args.h
Added: trunk/Core/Math/SSEDefs.h
==============================================================================
--- (empty file)
+++ trunk/Core/Math/SSEDefs.h Wed Jun 7 09:06:10 2006
@@ -0,0 +1,251 @@
+// SSEDefs.h
+// A comprehensive set of macros for SSE
+
+#ifndef _MANTA_SSEDEFS_H_
+#define _MANTA_SSEDEFS_H_
+
+#ifdef MANTA_SSE
+#include <xmmintrin.h>
+#include <Core/Util/Align.h>
+#include <Core/Geometry/vecdefs.h>
+#include <Core/Geometry/Vector.h>
+
+typedef __m128 sse_t;
+typedef __m128i sse_int_t;
+
+//add to these macros as necessary.
+#define or4 _mm_or_ps
+#define or4i _mm_or_si128
+#define and4 _mm_and_ps
+#define and4i _mm_and_si128
+#define andnot4 _mm_andnot_ps
+#define andnot4i _mm_andnot_si128
+#define mul4 _mm_mul_ps
+#define add4 _mm_add_ps
+#define sub4 _mm_sub_ps
+#define min4 _mm_min_ps
+#define max4 _mm_max_ps
+#define set4 _mm_set_ps1
+#define set44 _mm_set_ps
+#define set4i _mm_set1_epi32
+#define set44i _mm_set_epi32
+#define zero4 _mm_setzero_ps
+#define getmask4 _mm_movemask_ps
+#define cmp4_ge _mm_cmpge_ps
+#define cmp4_le _mm_cmple_ps
+#define cmp4_gt _mm_cmpgt_ps
+#define cmp4_lt _mm_cmplt_ps
+#define cmp4_eq _mm_cmpeq_ps
+
+//AARONBAD - this should really be somewhere in Core/Math
+#ifndef MIN
+#define MIN(a,b) (((a)<(b))?(a):(b))
+#endif
+#ifndef MAX
+#define MAX(a,b) (((a)>(b))?(a):(b))
+#endif
+
+namespace Manta
+{
+ static const MANTA_ALIGN(16) sse_t _mm_eps = _mm_set_ps1(1e-5);
+ static const MANTA_ALIGN(16) sse_t _mm_minus_eps = _mm_set_ps1(-1e-5);
+ static const MANTA_ALIGN(16) sse_t _mm_epsilon = _mm_set_ps1(1e-5);
+ static const MANTA_ALIGN(16) sse_t _mm_one = _mm_set_ps1(1.f);
+ static const MANTA_ALIGN(16) sse_t _mm_zero = _mm_set_ps1(0.f);
+ static const MANTA_ALIGN(16) sse_t _mm_one_half = _mm_set_ps1(0.5f);
+ static const MANTA_ALIGN(16) sse_t _mm_two = _mm_set_ps1(2.f);
+ static const MANTA_ALIGN(16) sse_t _mm_256 = _mm_set_ps1(256);
+ static const MANTA_ALIGN(16) sse_t _mm_255 = _mm_set_ps1(255);
+ static const MANTA_ALIGN(16) sse_t _mm_infty = _mm_set_ps1(9.9e9999f);
+ static const MANTA_ALIGN(16) sse_t _mm_minus_infty =
_mm_set_ps1(-9.9e9999f);
+ static const int _mm_intabsmask = 0x7fffffff;
+ static const int _mm_intsignbit = 0x80000000;
+ static const int _mm_inttruemask = 0xffffffff;
+ static const MANTA_ALIGN(16) sse_t _mm_absmask =
_mm_set_ps1((float&)_mm_intabsmask);
+ static const MANTA_ALIGN(16) sse_t _mm_signbit =
_mm_set_ps1((float&)_mm_intsignbit);
+ static const MANTA_ALIGN(16) sse_t _mm_true =
_mm_set_ps1((float&)_mm_inttruemask);
+ static const int minusOneI = -1;
+ static const MANTA_ALIGN(16) sse_t _mm_minusOne = _mm_set_ps1((float
&)minusOneI);
+
+ /*! return v0 + t*(v1-v0) */
+ inline sse_t lerp4(const sse_t t, const sse_t v0, const sse_t v1)
+ {
+ return add4(v0,mul4(t,sub4(v1,v0)));
+ }
+
+ inline sse_t dot4(const sse_t &ox, const sse_t &oy, const sse_t &oz,
+ const sse_t &vx, const sse_t &vy, const sse_t &vz)
+ {
+ return _mm_add_ps(_mm_add_ps(_mm_mul_ps(vx,ox),
+ _mm_mul_ps(vy,oy)),
+ _mm_mul_ps(vz,oz));
+ }
+
+ //equivalent to mask ? dest : src
+ inline sse_t mask4(const sse_t &mask, const sse_t &dest, const sse_t
&src)
+ {
+ return or4(and4(mask,dest),andnot4(mask,src));
+ }
+
+ //equivalent to ~mask ? dest : src
+ inline sse_t masknot4(const sse_t &mask, const sse_t &dest, const sse_t
&src)
+ {
+ return or4(andnot4(mask,dest),and4(mask,src));
+ }
+
+ inline sse_int_t mask4i(const sse_int_t &mask, const sse_int_t &dest,
const sse_int_t &src)
+ {
+ return or4i(and4i(mask,dest),andnot4i(mask,src));
+ }
+
+ inline sse_t abs4(const sse_t &v)
+ {
+ return andnot4(_mm_signbit,v);
+ }
+
+ inline sse_t accurateReciprocal(const sse_t v)
+ {
+ const sse_t rcp = _mm_rcp_ps(v);
+ return
_mm_sub_ps(_mm_add_ps(rcp,rcp),_mm_mul_ps(_mm_mul_ps(rcp,rcp),v));
+ }
+
+ inline sse_t uberAccurateReciprocal(const sse_t v)
+ {
+ sse_t rcp = _mm_rcp_ps(v);
+ rcp =
_mm_sub_ps(_mm_add_ps(rcp,rcp),_mm_mul_ps(_mm_mul_ps(rcp,rcp),v));
+ rcp =
_mm_sub_ps(_mm_add_ps(rcp,rcp),_mm_mul_ps(_mm_mul_ps(rcp,rcp),v));
+ rcp =
_mm_sub_ps(_mm_add_ps(rcp,rcp),_mm_mul_ps(_mm_mul_ps(rcp,rcp),v));
+ return rcp;
+ }
+
+ inline sse_t oneOver(const sse_t v)
+ {
+ const sse_t rcp = _mm_rcp_ps(v);
+ return
_mm_sub_ps(_mm_add_ps(rcp,rcp),_mm_mul_ps(_mm_mul_ps(rcp,rcp),v));
+ }
+
+ inline sse_t accurateReciprocalSqrt(const sse_t v)
+ {
+ const sse_t rcp_sqrt = _mm_rsqrt_ps(v);
+ const sse_t one_point_five = _mm_set_ps1(1.5f);
+ return _mm_mul_ps(rcp_sqrt, _mm_sub_ps(one_point_five,
_mm_mul_ps(_mm_one_half, _mm_mul_ps(_mm_mul_ps(rcp_sqrt,rcp_sqrt),v))));
+ }
+
+
+ inline sse_t reciprocal(const sse_t v)
+ {
+ return _mm_rcp_ps(v);
+ }
+
+ inline sse_t lin(const sse_t &base,
+ float u,const sse_t du,
+ float v, const sse_t dv)
+ {
+ return _mm_add_ps(_mm_add_ps(base,_mm_mul_ps(_mm_set_ps1(u),du)),
+ _mm_mul_ps(_mm_set_ps1(v),dv));
+ }
+
+ inline sse_t lin(const sse_t &base,
+ const sse_t &u,const sse_t du,
+ const sse_t &v, const sse_t dv)
+ {
+ return _mm_add_ps(_mm_add_ps(base,_mm_mul_ps(u,du)),
+ _mm_mul_ps(v,dv));
+ }
+
+
+ inline sse_t dot4(const sse_t &a, const sse_t &b)
+ {
+ const sse_t xyzw = _mm_mul_ps(a,b);
+ const sse_t zwxy = _mm_shuffle_ps(xyzw,xyzw,_MM_SHUFFLE(1,0,3,2));
+ const sse_t xz_yw_zx_wy = _mm_add_ps(zwxy,xyzw);
+ const sse_t wy_zx_yw_xz =
_mm_shuffle_ps(xz_yw_zx_wy,xz_yw_zx_wy,_MM_SHUFFLE(0,1,2,3));
+ const sse_t res = _mm_add_ps(xz_yw_zx_wy,wy_zx_yw_xz);
+ return res;
+ }
+
+ inline float dot1(const sse_t &a, const sse_t &b)
+ {
+ const sse_t d = dot4(a,b);
+ return (float&)d;
+ }
+
+ inline void normalize(sse_t &v)
+ {
+ const sse_t dot = dot4(v,v);
+ v = _mm_mul_ps(v, _mm_rsqrt_ps(dot));
+ };
+
+ inline float sqrLength(const sse_t &a)
+ {
+ return dot1(a,a);
+ }
+
+ inline float length(sse_t a)
+ {
+ const sse_t d = dot4(a,a);
+ const sse_t v = _mm_sqrt_ps(d);
+ return (float &)v;
+ }
+
+ inline float min4f(sse_t t)
+ {
+ MANTA_ALIGN(16)
+ float f[4];
+ _mm_store_ps(f,t);
+ return MIN(MIN(f[0],f[1]),MIN(f[2],f[3]));
+ }
+
+ inline float max4f(sse_t t)
+ {
+ MANTA_ALIGN(16)
+ float f[4];
+ _mm_store_ps(f,t);
+ return MAX(MAX(f[0],f[1]),MAX(f[2],f[3]));
+ }
+
+ inline float min3f(sse_t t)
+ {
+ MANTA_ALIGN(16)
+ float f[4];
+ _mm_store_ps(f,t);
+ return MIN(MIN(f[0],f[1]),f[2]);
+ }
+
+ /*! get horizontal minimum of a whole 4-way simd */
+ inline float max3f(sse_t t)
+ {
+ MANTA_ALIGN(16)
+ float f[4];
+ _mm_store_ps(f,t);
+ return MAX(MAX(f[0],f[1]),f[2]);
+ }
+
+ inline float simd_component(sse_t t, int offset)
+ {
+ MANTA_ALIGN(16)
+ float f[4];
+ _mm_store_ps(f,t);
+ return f[offset];
+ }
+
+ inline Vec3f as_Vec3f(sse_t t)
+ {
+ MANTA_ALIGN(16)
+ float f[4];
+ _mm_store_ps(f,t);
+ return Vec3f(f[2], f[1], f[0]);
+ }
+
+ inline Vector as_Vector(sse_t t)
+ {
+ MANTA_ALIGN(16)
+ float f[4];
+ _mm_store_ps(f,t);
+ return Vector(f[2], f[1], f[0]);
+ }
+
+};
+
+#endif //#ifdef MANTA_SSE
+#endif
\ No newline at end of file
Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Wed Jun 7 09:06:10 2006
@@ -14,6 +14,10 @@
#include <RayPacketParameters.h>
#include <MantaSSE.h>
+#ifdef MANTA_SSE
+#include <Core/Math/SSEDefs.h>
+#endif
+
// #include <sgi_stl_warnings_off.h>
// #include <algorithm>
// #include <sgi_stl_warnings_on.h>
@@ -22,12 +26,14 @@
class Material;
class RenderContext;
-
class MANTA_ALIGN(16) RayPacketData {
public:
enum {
MaxScratchpadSize = SCRATCHPAD_MAXSIZE,
- MaxSize = RAYPACKET_MAXSIZE
+ MaxSize = RAYPACKET_MAXSIZE,
+#ifdef MANTA_SSE
+ SSE_MaxSize = RAYPACKET_MAXSIZE/4,
+#endif
};
RayPacketData()
{
@@ -55,10 +61,10 @@
MANTA_ALIGN(16) Real inverseDirection[3][MaxSize];
MANTA_ALIGN(16) Real minT[MaxSize];
- Real image[2][MaxSize];
- Real normal[3][MaxSize];
- Real hitPosition[3][MaxSize];
- Real texCoords[3][MaxSize];
+ MANTA_ALIGN(16) Real image[2][MaxSize];
+ MANTA_ALIGN(16) Real normal[3][MaxSize];
+ MANTA_ALIGN(16) Real hitPosition[3][MaxSize];
+ MANTA_ALIGN(16) Real texCoords[3][MaxSize];
// Color-based arrays
@@ -72,11 +78,26 @@
// Char-based arrays
char scratchpad_data[MaxSize][MaxScratchpadSize];
};
+
+#ifdef MANTA_SSE
+ struct MANTA_ALIGN(16) SSERayPacket
+ {
+ sse_t* orig[3];
+ sse_t* dir[3];
+ sse_t* inv_dir[3];
+ sse_t* normal[3];
+ sse_t* minT;
+ };
+#endif
class RayPacket {
public:
enum {
MaxSize = RayPacketData::MaxSize,
+
+#ifdef MANTA_SSE
+ SSE_MaxSize = RayPacketData::SSE_MaxSize,
+#endif
// Flags.
ConstantOrigin = 0x0001,
@@ -280,9 +301,22 @@
{
if(flags & HaveInverseDirections)
return;
+#if MANTA_SSE
+ int b = (rayBegin + 3) & (~3);
+ int e = rayEnd & (~3);
+#pragma unroll(3)
+ for(int j=0; j<3; j++)
+ {
+ sse_t* dirs = (sse_t*)data->direction[j];
+ sse_t* inv_dirs = (sse_t*)data->inverseDirection[j];
+ for(int smd=b; smd<e; smd++)
+ inv_dirs[smd] = oneOver(dirs[smd]);
+ }
+#else
for(int i=rayBegin;i<rayEnd;i++)
for(int j=0;j<3;j++)
data->inverseDirection[j][i] = 1./data->direction[j][i];
+#endif
flags |= HaveInverseDirections;
}
void computeSigns()
Modified: trunk/Model/Groups/SSEKDTree.cc
==============================================================================
--- trunk/Model/Groups/SSEKDTree.cc (original)
+++ trunk/Model/Groups/SSEKDTree.cc Wed Jun 7 09:06:10 2006
@@ -48,6 +48,8 @@
#include <stdio.h>
+#include <Core/Math/SSEDefs.h>
+
using namespace Manta;
using namespace Manta::Kdtree;
using namespace SCIRun;
@@ -551,11 +553,14 @@
}
// Newton-Raphson Iteration for 1/x
+/*
+AARONBAD - now implemented in Core/Math/SSEDefs.h
inline __m128 accurateReciprocal(const __m128& v) {
const __m128 rcp = _mm_rcp_ps(v);
return _mm_sub_ps(_mm_add_ps(rcp, rcp),_mm_mul_ps(_mm_mul_ps(rcp,rcp),v));
}
+*/
void intersectTriangleEdgeSSE( IntersectPacket* result,
const RayPacket& rays,
Modified: trunk/Model/Intersections/IsosurfaceImplicit.cc
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.cc (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.cc Wed Jun 7 09:06:10
2006
@@ -1,8 +1,10 @@
#include <Model/Intersections/IsosurfaceImplicit.h>
#include <Core/Math/CubicSolver.h>
+#include <iostream>
using namespace Manta;
+using namespace std;
//From Steven Parker's 1997 RTRT isosurface intersection
bool IsosurfaceImplicit::single_intersect(const Vector& orig, const Vector&
dir,
@@ -112,4 +114,153 @@
+ (x-x_0)*(y-y_0)*rho[1][1][1];
}
+
+#ifdef MANTA_SSE
+//SSE packet implementation
+//Based on Marmitt et al. 04, Wald 05 SSE intersections (OpenRT)
+// as well as Knoll DynRT implementation
+void IsosurfaceImplicit::sse_intersect(RayPacket& rays, SSERayPacket& srp,
+ char first, char last, const Vector& pmin, const Vector& pmax,
float rho[2][2][2],
+ float isovalue, sse_t tenter[], sse_t texit[], sse_t hitmask[],
+ const Manta::Primitive* prim, const Manta::Material* matl)
+{
+ //cerr << "sse_intersect: first=" << (int)first << ",last=" << (int)last
<< endl;
+
+ for(int smd=first; smd<last; smd++)
+ {
+ if (_mm_movemask_ps(hitmask[smd])==0)
+ continue;
+
+ //compute p0, p1
+ sse_t p0[3];
+ sse_t p1[3];
+
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ p0[axis] = sub4(add4(srp.orig[axis][smd],
mul4(srp.dir[axis][smd], tenter[smd])), set4(pmin[axis]));
+ p1[axis] = sub4(add4(srp.orig[axis][smd],
mul4(srp.dir[axis][smd], texit[smd])), set4(pmin[axis]));
+ }
+
+ CubicPoly4 poly;
+ poly.generate(p0, p1, rho, isovalue);
+
+ sse_t t0 = zero4();
+ sse_t t1 = _mm_one;
+ sse_t D0 = poly.d;
+ sse_t D1 = add4(add4(poly.a,poly.b), add4(poly.c,poly.d));
+
+ //AARONBAD - we'd want something like this to avoid extra work
+ //sse_t sse_thisvoxelmask = and4(hitmask[smd], cmp4_lt(tenter[smd],
srp.minT[smd]));
+
+ //find which rays have differing signs for D0, D1. Only retain the
ones that have same signs?
+ sse_t differingSigns = cmp4_lt(mul4(D0,D1), zero4());
+ sse_t sse_thisvoxelmask = and4(hitmask[smd], differingSigns);
+ int int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
+
+ if (int_thisvoxelmask == 0) //if none of them hit, don't bother
iterating any more
+ continue;
+
+ #define NEUBAUER_ITERATIONS 3
+ #pragma unroll(NEUBAUER_ITERATIONS)
+ for (int i=0;i<NEUBAUER_ITERATIONS;i++)
+ {
+ //compute linear interpolation
+ const sse_t denom = accurateReciprocal(sub4(D0,D1));
+ sse_t t = add4(t0,mul4(mul4(D0,denom), sub4(t1,t0)));
+
+ //re-evaluate
+ sse_t D = poly.eval(t);
+
+ //conditionally store
+ const sse_t frontHalf = _mm_cmplt_ps(mul4(D0,D), zero4());
+ t1 = or4(_mm_and_ps(frontHalf,t), _mm_andnot_ps(frontHalf,t1));
+ t0 = or4(_mm_and_ps(frontHalf,t0), _mm_andnot_ps(frontHalf,t));
+ D1 = or4(_mm_and_ps(frontHalf,D), _mm_andnot_ps(frontHalf,D1));
+ D0 = or4(_mm_and_ps(frontHalf,D0), _mm_andnot_ps(frontHalf,D));
+ }
+
+ //compute hit distance
+ const sse_t denom = accurateReciprocal(sub4(D0,D1));
+ sse_t t = add4(t0, mul4(mul4(D0,denom), sub4(t1,t0)));
+ sse_t hit_t = add4(tenter[smd], mul4(t, sub4(texit[smd],
tenter[smd])));
+
+ //sse_thisvoxelmask = and4(sse_thisvoxelmask,
cmp4_lt(hit_t,srp.minT[smd]));
+ srp.minT[smd] = mask4(sse_thisvoxelmask, hit_t, srp.minT[smd]);
+ int_thisvoxelmask = _mm_movemask_ps(sse_thisvoxelmask);
+ if (int_thisvoxelmask)
+ {
+ sse_t normal[3];
+ sse_normal(rays, srp, smd, normal, pmin, pmax, rho);
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ srp.normal[axis][smd] = mask4(sse_thisvoxelmask,
normal[axis], srp.normal[axis][smd]);
+
+ #pragma unroll(4);
+ for(int ray=0; ray<4; ray++)
+ {
+ if (int_thisvoxelmask & (1<<ray))
+ {
+ int realray=(smd<<2)+ray;
+ rays.data->hitMatl[realray] = matl;
+ rays.data->hitPrim[realray] = prim;
+ }
+ }
+ }
+ }
+}
+
+void IsosurfaceImplicit::sse_normal(RayPacket &ray, SSERayPacket& srp, int
smd,
+ sse_t normal[], const Vector& pmin, const Vector& pmax,
+ const float rho[2][2][2])
+{
+ sse_t phit[3];
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ phit[axis] = add4(srp.orig[axis][smd], mul4(srp.dir[axis][smd],
srp.minT[smd]));
+
+ int axis, U, V;
+
+ //axis=0
+ axis=0;
+ U=1;
+ V=2;
+ sse_t max_m_hit_U = sub4(set4(pmax[U]), phit[U]);
+ sse_t max_m_hit_V = sub4(set4(pmax[V]), phit[V]);
+ sse_t min_m_hit_U = sub4(set4(pmin[U]), phit[U]);
+ sse_t min_m_hit_V = sub4(set4(pmin[V]), phit[V]);
+ normal[axis] = mul4(mul4(max_m_hit_U, max_m_hit_V), set4(rho[1][0][0] -
rho[0][0][0]));
+ normal[axis] = add4(normal[axis], mul4(mul4(min_m_hit_U, max_m_hit_V),
set4(rho[0][1][0] - rho[1][1][0])));
+ normal[axis] = add4(normal[axis], mul4(mul4(max_m_hit_U, min_m_hit_V),
set4(rho[0][0][1] - rho[1][0][1])));
+ normal[axis] = add4(normal[axis], mul4(mul4(min_m_hit_U, min_m_hit_V),
set4(rho[1][1][1] - rho[0][1][1])));
+
+ //axis=1
+ axis=1;
+ U=0;
+ V=2;
+ max_m_hit_U = sub4(set4(pmax[U]), phit[U]);
+ max_m_hit_V = sub4(set4(pmax[V]), phit[V]);
+ min_m_hit_U = sub4(set4(pmin[U]), phit[U]);
+ min_m_hit_V = sub4(set4(pmin[V]), phit[V]);
+ normal[axis] = mul4(mul4(max_m_hit_U, max_m_hit_V), set4(rho[0][1][0] -
rho[0][0][0]));
+ normal[axis] = add4(normal[axis], mul4(mul4(min_m_hit_U, max_m_hit_V),
set4(rho[1][0][0] - rho[1][1][0])));
+ normal[axis] = add4(normal[axis], mul4(mul4(max_m_hit_U, min_m_hit_V),
set4(rho[0][0][1] - rho[0][1][1])));
+ normal[axis] = add4(normal[axis], mul4(mul4(min_m_hit_U, min_m_hit_V),
set4(rho[1][1][1] - rho[1][0][1])));
+
+ //axis=2
+ axis=2;
+ U=0;
+ V=1;
+ max_m_hit_U = sub4(set4(pmax[U]), phit[U]);
+ max_m_hit_V = sub4(set4(pmax[V]), phit[V]);
+ min_m_hit_U = sub4(set4(pmin[U]), phit[U]);
+ min_m_hit_V = sub4(set4(pmin[V]), phit[V]);
+ normal[axis] = mul4(mul4(max_m_hit_U, max_m_hit_V), set4(rho[0][0][1] -
rho[0][0][0]));
+ normal[axis] = add4(normal[axis], mul4(mul4(min_m_hit_U, max_m_hit_V),
set4(rho[1][0][0] - rho[1][0][1])));
+ normal[axis] = add4(normal[axis], mul4(mul4(max_m_hit_U, min_m_hit_V),
set4(rho[0][1][0] - rho[0][1][1])));
+ normal[axis] = add4(normal[axis], mul4(mul4(min_m_hit_U, min_m_hit_V),
set4(rho[1][1][1] - rho[1][1][0])));
+
+}
+
+#endif
Modified: trunk/Model/Intersections/IsosurfaceImplicit.h
==============================================================================
--- trunk/Model/Intersections/IsosurfaceImplicit.h (original)
+++ trunk/Model/Intersections/IsosurfaceImplicit.h Wed Jun 7 09:06:10
2006
@@ -4,6 +4,13 @@
#include <Interface/RayPacket.h>
#include <Core/Geometry/Vector.h>
#include <Interface/Material.h>
+#include <Interface/Primitive.h>
+
+#include <MantaSSE.h>
+
+#ifdef MANTA_SSE
+#include <Core/Math/SSEDefs.h>
+#endif
namespace Manta
{
@@ -15,6 +22,119 @@
static void single_normal(Vector& outNormal, const Vector& pmin,
const Vector& pmax, const Vector& p, float
rho[2][2][2]);
+
+ //TODO - non-SSE packet intersection
+
+#ifdef MANTA_SSE
+ static void sse_intersect(RayPacket& rays, SSERayPacket& srp,
+ char first, char last, const Vector& pmin, const Vector&
pmax, float rho[2][2][2],
+ float isovalue, sse_t tenter[], sse_t texit[], sse_t
hitmask[],
+ const Manta::Primitive* prim, const Manta::Material*
matl);
+
+ static void sse_normal(RayPacket &ray, SSERayPacket& srp, int smd,
+ sse_t normal[], const Vector& pmin, const Vector& pmax,
+ const float rho[2][2][2]);
+
+ struct CubicPoly4
+ {
+ MANTA_ALIGN(16) sse_t a, b, c, d;
+
+ inline void generate(sse_t p0[], sse_t p1[], const float
voxels_cell[2][2][2], float isovalue)
+ {
+ sse_t e0[3];
+ sse_t e1[3];
+ sse_t d1[3];
+
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ e0[axis] = sub4(_mm_one, p0[axis]);
+ e1[axis] = p0[axis];
+ d1[axis] = sub4(p1[axis], p0[axis]);
+ }
+
+ sse_t interimROO = mul4(d1[1], d1[2]);
+ const sse_t interimRRR = mul4(d1[0], interimROO);
+ const sse_t interimNRR = mul4(e1[0], interimROO);
+ const sse_t interimORR = mul4(e0[0], interimROO);
+
+ interimROO = mul4(e1[1], d1[2]);
+ const sse_t interimRNR = mul4(d1[0], interimROO);
+ const sse_t interimNNR = mul4(e1[0], interimROO);
+ const sse_t interimONR = mul4(e0[0], interimROO);
+
+ interimROO = mul4(d1[1], e1[2]);
+ const sse_t interimRRN = mul4(d1[0], interimROO);
+ const sse_t interimNRN = mul4(e1[0], interimROO);
+ const sse_t interimORN = mul4(e0[0], interimROO);
+
+ interimROO = mul4(d1[1], e0[2]);
+ const sse_t interimRRO = mul4(d1[0], interimROO);
+ const sse_t interimNRO = mul4(e1[0], interimROO);
+ const sse_t interimORO = mul4(e0[0], interimROO);
+
+ interimROO = mul4(e0[1], d1[2]);
+ const sse_t interimROR = mul4(d1[0], interimROO);
+ const sse_t interimNOR = mul4(e1[0], interimROO);
+ const sse_t interimOOR = mul4(e0[0], interimROO);
+
+ interimROO = mul4(d1[0], e1[1]);
+ const sse_t interimRNN = mul4(interimROO, e1[2]);
+ const sse_t interimRNO = mul4(interimROO, e0[2]);
+
+ interimROO = mul4(d1[0], e0[1]);
+ const sse_t interimRON = mul4(interimROO, e1[2]);
+ interimROO = mul4(interimROO, e0[2]);
+
+ a = mul4(interimRRR, _mm_set_ps1(+ voxels_cell[1][1][1]
+ - voxels_cell[0][1][1]
+ - voxels_cell[1][0][1]
+ + voxels_cell[0][0][1]
+ - voxels_cell[1][1][0]
+ + voxels_cell[0][1][0]
+ + voxels_cell[1][0][0]
+ - voxels_cell[0][0][0]));
+
+ b =
mul4(_mm_set_ps1(voxels_cell[1][1][1]),add4(add4(interimNRR,interimRNR),interimRRN));
+ b =
sub4(b,mul4(_mm_set_ps1(voxels_cell[1][1][0]),sub4(add4(interimNRR,interimRNR),interimRRO)));
+ b =
sub4(b,mul4(_mm_set_ps1(voxels_cell[1][0][1]),add4(sub4(interimNRR,interimROR),interimRRN)));
+ b =
add4(b,mul4(_mm_set_ps1(voxels_cell[1][0][0]),sub4(sub4(interimNRR,interimROR),interimRRO)));
+ b =
add4(b,mul4(_mm_set_ps1(voxels_cell[0][1][1]),sub4(sub4(interimORR,interimRNR),interimRRN)));
+ b =
sub4(b,mul4(_mm_set_ps1(voxels_cell[0][1][0]),add4(sub4(interimORR,interimRNR),interimRRO)));
+ b =
sub4(b,mul4(_mm_set_ps1(voxels_cell[0][0][1]),sub4(add4(interimORR,interimROR),interimRRN)));
+ b =
add4(b,mul4(_mm_set_ps1(voxels_cell[0][0][0]),add4(add4(interimORR,interimROR),interimRRO)));
+
+ c =
mul4(_mm_set_ps1(voxels_cell[1][1][1]),add4(add4(interimRNN,interimNRN),interimNNR));
+ c =
add4(c,mul4(_mm_set_ps1(voxels_cell[1][1][0]),sub4(add4(interimRNO,interimNRO),interimNNR)));
+ c =
add4(c,mul4(_mm_set_ps1(voxels_cell[1][0][1]),add4(sub4(interimRON,interimNRN),interimNOR)));
+ c =
add4(c,mul4(_mm_set_ps1(voxels_cell[1][0][0]),sub4(sub4(interimROO,interimNRO),interimNOR)));
+ c =
add4(c,mul4(_mm_set_ps1(voxels_cell[0][1][1]),sub4(add4(interimORN,interimONR),interimRNN)));
+ c =
sub4(c,mul4(_mm_set_ps1(voxels_cell[0][1][0]),add4(sub4(interimRNO,interimORO),interimONR)));
+ c =
sub4(c,mul4(_mm_set_ps1(voxels_cell[0][0][1]),sub4(add4(interimRON,interimORN),interimOOR)));
+ c =
sub4(c,mul4(_mm_set_ps1(voxels_cell[0][0][0]),add4(add4(interimROO,interimORO),interimOOR)));
+
+ d = add4(mul4(e1[0], add4(mul4(e1[1], add4(mul4(e1[2],
+ set4(voxels_cell[1][1][1])),
+ mul4(e0[2], set4(voxels_cell[1][1][0])))),
+ mul4(e0[1], add4(mul4(e1[2],
set4(voxels_cell[1][0][1])),
+ mul4(e0[2],
set4(voxels_cell[1][0][0])))))),
+ mul4(e0[0], add4(mul4(e1[1], add4(mul4(e1[2],
+ set4(voxels_cell[0][1][1])),
+ mul4(e0[2], set4(voxels_cell[0][1][0])))),
+ mul4(e0[1], add4(mul4(e1[2], set4(voxels_cell[0][0][1])),
+ mul4(e0[2], set4(voxels_cell[0][0][0])))))));
+
+ d = sub4(d, set4(isovalue));
+ }
+
+ inline sse_t eval(const sse_t &t) const
+ {
+ return add4(mul4(add4(mul4(add4(mul4(a,t),b),t),c),t),d);
+ }
+ };
+
+
+#endif
};
};
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.cc
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.cc (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.cc Wed Jun 7 09:06:10
2006
@@ -7,6 +7,10 @@
#include <Interface/RayPacket.h>
#include <Model/Intersections/IsosurfaceImplicit.h>
+#ifdef MANTA_SSE
+#include <Core/Math/SSEDefs.h>
+#endif
+
#define USE_OCTREE_DATA
#define MIN4(a,b,c,d) min(min(a,b), min(c,d));
@@ -109,8 +113,12 @@
void IsosurfaceOctreeVolume::intersect(RenderContext const &context,
RayPacket &packet) const
{
+#if 1
+ packet_intersect_implicit_bvh(packet);
+#else
for ( int i = packet.rayBegin; i < packet.rayEnd; i++ )
single_intersect(packet, i);
+#endif
}
void IsosurfaceOctreeVolume::single_intersect(RayPacket& rays, int
which_one) const
@@ -305,7 +313,6 @@
#endif
if (node.offsets[target_child]==-1)
{
- return false;
if (single_traverse_leaf(rays, which_one, orig, dir,
inv_dir, stop_depth,
next_depth, depth, node.values[target_child],
child_cell, index_trace, child_cell, child_tenter,
child_texit))
@@ -735,4 +742,271 @@
return false;
}
+/*
+ Begin packet intersection code, for SSE packets only.
+*/
+#ifdef MANTA_SSE
+
+//an octree traversal based on implicit BVH
+void IsosurfaceOctreeVolume::packet_intersect_implicit_bvh(RayPacket& rays)
const
+{
+ rays.computeInverseDirections();
+ rays.computeSigns();
+ RayPacketData* data = rays.data;
+ SSERayPacket srp;
+
+ //intersect the global bounding box: find first, last
+ // this will require a special-case AABB intersection
+
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ srp.orig[axis] = (sse_t*)(data->origin[axis]);
+ srp.dir[axis] = (sse_t*)(data->direction[axis]);
+ srp.inv_dir[axis] = (sse_t*)(data->inverseDirection[axis]);
+ srp.normal[axis] = (sse_t*)(data->normal[axis]);
+ }
+ srp.minT = (sse_t*)(data->minT);
+
+ int first = RayPacket::SSE_MaxSize;
+ int last = -1;
+ #pragma unroll(RayPacket::SSE_MaxSize)
+ for(int smd=0; smd<RayPacket::SSE_MaxSize; smd++)
+ {
+ sse_t dgt0[3];
+ sse_t tnear[3];
+ sse_t tfar[3];
+ sse_t tnear_unpadded[3];
+ sse_t tfar_unpadded[3];
+
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());
+ sse_t t0 = mul4(sub4(zero4(), srp.orig[axis][smd]),
srp.inv_dir[axis][smd]);
+ sse_t t1 = mul4(sub4(set4(octdata->dims[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
+ sse_t t1p = mul4(sub4(set4(octdata->padded_dims[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
+
+ tnear_unpadded[axis] = mask4(dgt0[axis], t0, t1);
+ tfar_unpadded[axis] = mask4(dgt0[axis], t1, t0);
+ tnear[axis] = mask4(dgt0[axis], t0, t1p);
+ tfar[axis] = mask4(dgt0[axis], t1p, t0);
+ }
+
+ sse_t tenter_unpadded = max4(max4(tnear_unpadded[0],
tnear_unpadded[1]), tnear_unpadded[2]);
+ sse_t texit_unpadded = min4(min4(tfar_unpadded[0],
tfar_unpadded[1]), tfar_unpadded[2]);
+
+ if (_mm_movemask_ps(_mm_cmple_ps(tenter_unpadded, texit_unpadded))
== 0) //if none of them were valid
+ continue;
+
+ first = MIN(first, smd);
+ last = smd;
+ }
+ last++;
+
+ if (first >= last)
+ return;
+
+ //cerr << "root node: first = " << (int)first << ", last = " <<
(int)last << endl;
+
+ unsigned int index_trace[octdata->get_max_depth() + 1];
+ Vec3i cell(0,0,0);
+ bvh_octnode(rays, srp, first, last, cell, octdata->get_cap_depth(), 0,
0, index_trace);
+}
+
+bool IsosurfaceOctreeVolume::bvh_octnode(RayPacket& rays, SSERayPacket& srp,
char first, char last,
+ const Vec3i& cell, int stop_depth, int depth, unsigned int
index, unsigned int index_trace[]) const
+{
+ //cerr << "octnode " << (int)depth << ", " << index << "; first=" <<
(int)first << ",last=" << (int)last << endl;
+ OctNode& node = octdata->get_node(depth, index);
+ Vec3i child_cell = cell;
+ int child_bit = octdata->get_child_bit_depth(depth);
+
+ index_trace[depth] = index;
+
+ //intersect all children in order
+ #pragma unroll(2)
+ for(int midplane_x=0; midplane_x!=2; midplane_x++)
+ {
+ int target_x;
+ if (midplane_x - rays.getSign(0,0))
+ {
+ target_x = 4;
+ child_cell.data[0] = cell.data[0] | child_bit;
+ }
+ else
+ {
+ target_x = 0;
+ child_cell.data[0] = cell.data[0];
+ }
+ #pragma unroll(2)
+ for(int midplane_y=0; midplane_y!=2; midplane_y++)
+ {
+ int target_xy;
+ if (midplane_y - rays.getSign(0,1))
+ {
+ target_xy = target_x | 2;
+ child_cell.data[1] = cell.data[1] | child_bit;
+ }
+ else
+ {
+ target_xy = target_x;
+ child_cell.data[1] = cell.data[1];
+ }
+ #pragma unroll(2)
+ for(int midplane_z=0; midplane_z!=2; midplane_z++)
+ {
+ int target_child;
+ if (midplane_z - rays.getSign(0,2))
+ {
+ target_child = target_xy | 1;
+ child_cell.data[2] = cell.data[2] | child_bit;
+ }
+ else
+ {
+ target_child = target_xy;
+ child_cell.data[2] = cell.data[2];
+ }
+
+ char newfirst, newlast;
+ Vector pmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
+ Vector pmax(child_cell.data[0]+child_bit,
child_cell.data[1]+child_bit, child_cell.data[2]+child_bit);
+ intersect_octant(srp, first, last, newfirst, newlast, pmin,
pmax);
+
+ //cerr << "newfirst=" << (int)newfirst << ", newlast=" <<
(int)newlast << endl;
+
+ if (newfirst < newlast && octdata->get_isovalue() >=
node.mins[target_child] && octdata->get_isovalue() <= node.maxs[target_child])
+ {
+ if (node.offsets[target_child]==-1)
+ {
+ if (bvh_octleaf(rays, srp, newfirst, newlast,
child_cell, stop_depth, depth, node.values[target_child], index_trace))
+ return true;
+ }
+ else
+ {
+ unsigned int child_idx = node.children_start +
node.offsets[target_child];
+ if (depth == octdata->get_pre_cap_depth()) //cap
+ {
+ if (bvh_octcap(rays, srp, newfirst, newlast,
child_cell, stop_depth, depth+1, child_idx, index_trace))
+ return true;
+ }
+ else
+ {
+ if (bvh_octnode(rays, srp, newfirst, newlast,
child_cell, stop_depth, depth+1, child_idx, index_trace))
+ return true;
+ }
+ }
+ }
+ }
+ }
+ }
+ return false;
+}
+
+bool IsosurfaceOctreeVolume::bvh_octleaf(RayPacket& rays, SSERayPacket& srp,
char first, char last,
+ const Vec3i& cell, int stop_depth, int depth, ST value, unsigned
int index_trace[]) const
+{
+}
+
+bool IsosurfaceOctreeVolume::bvh_octcap(RayPacket& rays, SSERayPacket& srp,
char first, char last,
+ const Vec3i& cell, int stop_depth, int depth, unsigned int
index, unsigned int index_trace[]) const
+{
+ //cerr << "octcap " << index << ", first=" << (int)first << ",last=" <<
(int)last << endl;
+ OctCap& cap = octdata->get_cap(index);
+ Vec3i child_cell = cell;
+ index_trace[depth] = index;
+
+ //intersect all children in order
+ #pragma unroll(2)
+ for(int midplane_x=0; midplane_x<2; midplane_x++)
+ {
+ int target_x;
+ if (midplane_x - rays.getSign(0,0))
+ {
+ target_x = 4;
+ child_cell.data[0] = cell.data[0] | 1;
+ }
+ else
+ {
+ target_x = 0;
+ child_cell.data[0] = cell.data[0];
+ }
+ #pragma unroll(2)
+ for(int midplane_y=0; midplane_y<2; midplane_y++)
+ {
+ int target_xy;
+ if (midplane_y - rays.getSign(0,1))
+ {
+ target_xy = target_x | 2;
+ child_cell.data[1] = cell.data[1] | 1;
+ }
+ else
+ {
+ target_xy = target_x;
+ child_cell.data[1] = cell.data[1];
+ }
+ #pragma unroll(2)
+ for(int midplane_z=0; midplane_z<2; midplane_z++)
+ {
+ int target_child;
+ if (midplane_z - rays.getSign(0,2))
+ {
+ target_child = target_xy | 1;
+ child_cell.data[2] = cell.data[2] | 1;
+ }
+ else
+ {
+ target_child = target_xy;
+ child_cell.data[2] = cell.data[2];
+ }
+
+ sse_t child_tenter[RayPacket::SSE_MaxSize];
+ sse_t child_texit[RayPacket::SSE_MaxSize];
+ sse_t hitmask[RayPacket::SSE_MaxSize];
+ char newfirst, newlast;
+ Vector cmin(child_cell.data[0], child_cell.data[1],
child_cell.data[2]);
+ Vector cmax(child_cell.data[0]+1, child_cell.data[1]+1,
child_cell.data[2]+1);
+ intersect_octant(srp, first, last, newfirst, newlast, cmin,
cmax, child_tenter, child_texit, hitmask);
+
+ if (newfirst >= newlast)
+ continue;
+
+#ifdef USE_OCTREE_DATA
+ float rho[2][2][2];
+ ST min_rho, max_rho, this_rho;
+ min_rho = max_rho = this_rho = cap.values[target_child];
+ rho[0][0][0] = static_cast<float>(this_rho);
+ int prev_depth = depth-1;
+ Vec3i offset(0,0,1);
+ octvol_fill_cell(cap, 1);
+#else
+ //use original grid data
+ float rho[2][2][2];
+ ST min_rho, max_rho;
+#define MYDATA octdata->indata
+ min_rho = max_rho = lookup_safe(MYDATA, child_cell.data[0],
child_cell.data[1], child_cell.data[2]);
+ rho[0][0][0] = static_cast<float>(min_rho);
+ for(int c=1; c<8; c++)
+ {
+ Vec3i offset((c&4)!=0, (c&2)!=0, c&1);
+ Vec3i neighboridx = child_cell + offset;
+ ST this_rho = lookup_safe(MYDATA, neighboridx.data[0],
neighboridx.data[1], neighboridx.data[2]);
+ rho[offset.data[0]][offset.data[1]][offset.data[2]] =
static_cast<float>(this_rho);
+ min_rho = MIN(this_rho, min_rho);
+ max_rho = MAX(this_rho, max_rho);
+ }
+#endif
+
+ if (octdata->get_isovalue() >= min_rho &&
octdata->get_isovalue() <= max_rho)
+ {
+ //cerr << "in cap " << (unsigned long)(&cap) << ",
octant " << target_child << endl;
+ IsosurfaceImplicit::sse_intersect(rays, srp, newfirst,
newlast, cmin, cmax, rho,
+ octdata->get_isovalue(), child_tenter, child_texit,
hitmask, this, PrimitiveCommon::getMaterial());
+ }
+ }
+ }
+ }
+ return false;
+}
+#endif //#ifdef MANTA_SSE
Modified: trunk/Model/Primitives/IsosurfaceOctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/IsosurfaceOctreeVolume.h (original)
+++ trunk/Model/Primitives/IsosurfaceOctreeVolume.h Wed Jun 7 09:06:10
2006
@@ -8,6 +8,14 @@
#include <Core/Color/Color.h>
#include <Interface/Texture.h>
#include <Model/Primitives/OctreeVolume.h>
+#include <Interface/RayPacket.h>
+
+#include <MantaSSE.h>
+
+#ifdef MANTA_SSE
+#include <Core/Math/SSEDefs.h>
+#endif
+
namespace Manta
{
@@ -44,6 +52,94 @@
const Vector& orig, const Vector& dir, const
Vector& inv_dir, int res,
int depth, unsigned int cap_index, unsigned int
index_trace[], Vec3i& cell, const float tenter,
const float texit) const;
+
+#ifdef MANTA_SSE
+ void packet_intersect_implicit_bvh(RayPacket& rays)
const;
+
+ bool bvh_octnode(RayPacket& rays, SSERayPacket& srp, char first,
char last,
+ const Vec3i& cell, int stop_depth, int depth,
unsigned int index,
+ unsigned int index_trace[]) const;
+
+ bool bvh_octleaf(RayPacket& rays, SSERayPacket& srp, char first,
char last,
+ const Vec3i& cell, int stop_depth,
int depth, ST value,
+ unsigned int index_trace[]) const;
+
+ bool bvh_octcap(RayPacket& rays, SSERayPacket& srp, char first,
char last,
+ const Vec3i& cell, int stop_depth, int depth, unsigned int
index,
+ unsigned int index_trace[]) const;
+
+ inline void intersect_octant(SSERayPacket& srp, char first, char
last,
+ char& newfirst, char& newlast, const Vector& min, const
Vector& max) const
+ {
+ newfirst = last;
+ newlast = first;
+ for(char smd=first; smd<last; smd++)
+ {
+ sse_t dgt0[3];
+ sse_t tnear[3];
+ sse_t tfar[3];
+
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());
//use signs?
+ sse_t t0 = mul4(sub4(set4(min[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
+ sse_t t1 = mul4(sub4(set4(max[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
+ tnear[axis] = mask4(dgt0[axis], t0, t1);
+ tfar[axis] = mask4(dgt0[axis], t1, t0);
+ }
+
+ sse_t tenter = max4(max4(tnear[0], tnear[1]), tnear[2]);
+ sse_t texit = min4(min4(tfar[0], tfar[1]), tfar[2]);
+
+ if (_mm_movemask_ps(cmp4_le(tenter, texit)) != 0) //if
any hit
+ {
+ newfirst = MIN(newfirst, smd);
+ newlast = smd;
+ }
+ }
+ newlast++;
+ }
+
+ inline void intersect_octant(SSERayPacket& srp, char first, char
last,
+ char& newfirst, char& newlast, const Vector& min,
+ const Vector& max, sse_t tenter[], sse_t texit[], sse_t
hitmask[]) const
+ {
+ #pragma unroll(RayPacket::SSE_MaxSize)
+ for(char smd=0; smd<RayPacket::SSE_MaxSize; smd++)
+ hitmask[smd] = zero4();
+
+ newfirst = last;
+ newlast = first;
+ for(char smd=first; smd<last; smd++)
+ {
+ sse_t dgt0[3];
+ sse_t tnear[3];
+ sse_t tfar[3];
+
+ #pragma unroll(3)
+ for(int axis=0; axis<3; axis++)
+ {
+ dgt0[axis] = cmp4_ge(srp.dir[axis][smd], zero4());
//use signs?
+ sse_t t0 = mul4(sub4(set4(min[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
+ sse_t t1 = mul4(sub4(set4(max[axis]),
srp.orig[axis][smd]), srp.inv_dir[axis][smd]);
+ tnear[axis] = mask4(dgt0[axis], t0, t1);
+ tfar[axis] = mask4(dgt0[axis], t1, t0);
+ }
+
+ tenter[smd] = max4(max4(tnear[0], tnear[1]), tnear[2]);
+ texit[smd] = min4(min4(tfar[0], tfar[1]), tfar[2]);
+
+ hitmask[smd] = cmp4_le(tenter[smd], texit[smd]);
+ if (_mm_movemask_ps(hitmask[smd]) != 0) //if any hit
+ {
+ newfirst = MIN(newfirst, smd);
+ newlast = smd;
+ }
+ }
+ newlast++;
+ }
+#endif
};
};
Modified: trunk/Model/Primitives/OctreeVolume.h
==============================================================================
--- trunk/Model/Primitives/OctreeVolume.h (original)
+++ trunk/Model/Primitives/OctreeVolume.h Wed Jun 7 09:06:10 2006
@@ -328,7 +328,8 @@
if (depth == pre_cap_depth)
{
index = node.children_start + node.offsets[target_child];
- int target_child = ((p.data[0] & 1) << 2) | ((p.data[1]
& 1) << 1) | (p.data[2] & 1);
+ int target_child = ((p.data[0] & 1) <<
+ 2) | ((p.data[1] & 1) << 1) | (p.data[2] & 1);
return
steps[current_timestep].caps[index].values[target_child];
}
- [MANTA] r1099 - in trunk: Core Core/Math Interface Model/Groups Model/Intersections Model/Primitives, knolla, 06/07/2006
Archive powered by MHonArc 2.6.16.