Text archives Help
- From: boulos@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1175 - trunk/Model/Primitives
- Date: Sun, 13 Aug 2006 21:45:10 -0600 (MDT)
Author: boulos
Date: Sun Aug 13 21:45:09 2006
New Revision: 1175
Modified:
trunk/Model/Primitives/WaldTriangle.cc
Log:
Adding MANTA_SSE version of WaldTriangle test.
Increases erw6 test from 21 to 24.
Modified: trunk/Model/Primitives/WaldTriangle.cc
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.cc (original)
+++ trunk/Model/Primitives/WaldTriangle.cc Sun Aug 13 21:45:09 2006
@@ -85,6 +85,172 @@
n_k = normal[k];
}
+#ifdef MANTA_SSE
+void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays)
const {
+ const int axis = k;
+ const int ku = (k==2)?0:k+1;
+ const int kv = (k==0)?2:k-1;
+
+ // what qualifiers go here?
+ RayPacketData* data = rays.data;
+
+ const bool RaysConstantOrigin = rays.getFlag(RayPacket::ConstantOrigin);
+ const int ray_begin = rays.begin();
+ const int ray_end = rays.end();
+ const int sse_begin = (ray_begin + 3) & (~3);
+ const int sse_end = (ray_end) & (~3);
+
+ for (int ray = ray_begin; ray < sse_begin; ++ray) {
+ const float nd0 = ( n_u * data->direction[ku][ray] +
+ n_v * data->direction[kv][ray] +
+ data->direction[k][ray] );
+ const float nd = 1.f/nd0;
+
+
+ float org_k = data->origin[axis][ray];
+ float org_ku = data->origin[ku][ray];
+ float org_kv = data->origin[kv][ray];
+
+ float f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+
+ const float f = f0 * nd;
+ // plane test
+ if ( f < T_EPSILON || f > data->minT[ray] )
+ continue;
+
+ const float hu = org_ku + f*data->direction[ku][ray];
+ const float hv = org_kv + f*data->direction[kv][ray];
+ const float lambda = b_d + hu*b_nu + hv * b_nv;
+
+ // barycentric test
+ if ( lambda < 0.f )
+ continue;
+
+ const float mue = c_d + hu * c_nu + hv * c_nv;
+ if ( mue < 0.f || mue + lambda > 1.f )
+ continue;
+
+ rays.hit(ray, f, getMaterial(), this, getTexCoordMapper());
+ }
+
+ for (int ray = sse_begin; ray < sse_end; ray += 4) {
+ const __m128 nd0 = ( _mm_add_ps( _mm_mul_ps( _mm_set1_ps(n_u),
+
_mm_load_ps(&data->direction[ku][ray])),
+ _mm_add_ps(
_mm_mul_ps(_mm_set1_ps(n_v),
+
_mm_load_ps(&data->direction[kv][ray])),
+
_mm_load_ps(&data->direction[k][ray]))));
+ const __m128 nd = oneOver(nd0);
+
+ const __m128 org_k = _mm_load_ps(&data->origin[axis][ray]);
+ const __m128 org_ku = _mm_load_ps(&data->origin[ku][ray]);
+ const __m128 org_kv = _mm_load_ps(&data->origin[kv][ray]);
+ const __m128 f0 = _mm_sub_ps(_mm_set1_ps(n_d),
+ _mm_add_ps(org_k,
_mm_add_ps(_mm_mul_ps(_mm_set1_ps(n_u),
+
org_ku),
+
_mm_mul_ps(_mm_set1_ps(n_v),
+
org_kv))));
+
+ const __m128 f = _mm_mul_ps(f0, nd); // maybe these would be faster
as swizzle after _mm_load_ps
+ // plane test
+ __m128 mask_test = _mm_and_ps( _mm_cmpnle_ps(f,
_mm_set1_ps(T_EPSILON)),
+
_mm_cmpnle_ps(_mm_load_ps(&data->minT[ray]), f));
+ if (_mm_movemask_ps(mask_test) == 0x0) continue;
+
+ const __m128 hu = _mm_add_ps( _mm_mul_ps(f,
_mm_load_ps(&data->direction[ku][ray])), org_ku);
+ const __m128 hv = _mm_add_ps( _mm_mul_ps(f,
_mm_load_ps(&data->direction[kv][ray])), org_kv);
+ const __m128 lambda = _mm_add_ps( _mm_set1_ps(b_d),
+ _mm_add_ps( _mm_mul_ps(hu,
_mm_set1_ps(b_nu)),
+ _mm_mul_ps(hv,
_mm_set1_ps(b_nv))));
+ mask_test = _mm_and_ps(mask_test, _mm_cmpnlt_ps(lambda,
_mm_setzero_ps()));
+ if (_mm_movemask_ps(mask_test) == 0x0) continue;
+
+ const __m128 mue = _mm_add_ps( _mm_set1_ps(c_d),
+ _mm_add_ps( _mm_mul_ps(hu,
_mm_set1_ps(c_nu)),
+ _mm_mul_ps(hv,
_mm_set1_ps(c_nv))));
+
+ mask_test = _mm_and_ps(mask_test, _mm_and_ps( _mm_cmpnlt_ps(mue,
_mm_setzero_ps()),
+
_mm_cmpnlt_ps(_mm_set1_ps(1.f), _mm_add_ps(mue, lambda))));
+
+ int hit_result = _mm_movemask_ps(mask_test);
+ switch (hit_result) {
+ case 0x0:
+ // all miss
+ break;
+ case 0xf:
+ // all hit:
+ _mm_store_ps(&data->minT[ray], f);
+#ifdef __x86_64
+ _mm_store_si128((__m128i*)&data->hitMatl[ray],
_mm_set1_epi64x((long long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitMatl[ray+2],
_mm_set1_epi64x((long long)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[ray],
_mm_set1_epi64x((long long)this));
+ _mm_store_si128((__m128i*)&data->hitPrim[ray+2],
_mm_set1_epi64x((long long)this));
+ _mm_store_si128((__m128i*)&data->hitTex[ray],
_mm_set1_epi64x((long long)getTexCoordMapper()));
+ _mm_store_si128((__m128i*)&data->hitTex[ray+2],
_mm_set1_epi64x((long long)getTexCoordMapper()));
+#else
+ _mm_store_si128((__m128i*)&data->hitMatl[ray],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[ray],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[ray],
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+ break;
+ default:
+ // mixed, TODO(boulos): are these casts really a good idea?
(shouldn't we do *((__m128i*)&f) instead?)
+ _mm_maskmoveu_si128((__m128i)_mm_castps_si128(f),
(__m128i)_mm_castps_si128(mask_test),
+ (char*)&data->minT[ray]);
+#ifdef __x86_64
+ __m128i lohit =
(__m128i)_mm_castps_si128(_mm_unpacklo_ps(mask_test, mask_test));
+ __m128i hihit =
(__m128i)_mm_castps_si128(_mm_unpackhi_ps(mask_test, mask_test));
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
lohit, (char*)&data->hitMatl[ray]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
hihit, (char*)&data->hitMatl[ray+2]);
+
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), lohit,
(char*)&data->hitPrim[ray]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), hihit,
(char*)&data->hitPrim[ray+2]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[ray]);
+ _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[ray+2]);
+#else
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(mask_test),
+ (char*)&data->hitMatl[ray]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(mask_test),
+ (char*)&data->hitPrim[ray]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(mask_test),
+ (char*)&data->hitTex[ray]);
+#endif
+ break;
+ }
+ }
+
+ for (int ray = sse_end; ray < ray_end; ++ray) {
+ const float nd0 = ( n_u * data->direction[ku][ray] +
+ n_v * data->direction[kv][ray] +
+ data->direction[k][ray] );
+ const float nd = 1.f/nd0;
+
+ float org_k = data->origin[axis][ray];
+ float org_ku = data->origin[ku][ray];
+ float org_kv = data->origin[kv][ray];
+
+ float f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+
+ const float f = f0 * nd;
+ // plane test
+ if ( f < T_EPSILON || f > data->minT[ray] )
+ continue;
+
+ const float hu = org_ku + f*data->direction[ku][ray];
+ const float hv = org_kv + f*data->direction[kv][ray];
+ const float lambda = b_d + hu*b_nu + hv * b_nv;
+
+ // barycentric test
+ if ( lambda < 0.f )
+ continue;
+
+ const float mue = c_d + hu * c_nu + hv * c_nv;
+ if ( mue < 0.f || mue + lambda > 1.f )
+ continue;
+
+ rays.hit(ray, f, getMaterial(), this, getTexCoordMapper());
+ }
+}
+#else
#if !(USE_SIMD) // portable C version
void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays)
const
{
@@ -548,3 +714,4 @@
}
}
#endif
+#endif // MANTA_SSE
- [MANTA] r1175 - trunk/Model/Primitives, boulos, 08/13/2006
Archive powered by MHonArc 2.6.16.