Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1175 - trunk/Model/Primitives


Chronological Thread 
  • From: boulos@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1175 - trunk/Model/Primitives
  • Date: Sun, 13 Aug 2006 21:45:10 -0600 (MDT)

Author: boulos
Date: Sun Aug 13 21:45:09 2006
New Revision: 1175

Modified:
   trunk/Model/Primitives/WaldTriangle.cc
Log:
Adding MANTA_SSE version of WaldTriangle test.
Increases erw6 test from 21 to 24.


Modified: trunk/Model/Primitives/WaldTriangle.cc
==============================================================================
--- trunk/Model/Primitives/WaldTriangle.cc      (original)
+++ trunk/Model/Primitives/WaldTriangle.cc      Sun Aug 13 21:45:09 2006
@@ -85,6 +85,172 @@
     n_k = normal[k];
 }
 
+#ifdef MANTA_SSE
+void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays) 
const {
+    const int axis = k;
+    const int ku = (k==2)?0:k+1;
+    const int kv = (k==0)?2:k-1;
+
+    // what qualifiers go here?
+    RayPacketData* data = rays.data;
+
+    const bool RaysConstantOrigin = rays.getFlag(RayPacket::ConstantOrigin);
+    const int ray_begin = rays.begin();
+    const int ray_end   = rays.end();
+    const int sse_begin = (ray_begin + 3) & (~3);
+    const int sse_end   = (ray_end) & (~3);
+
+    for (int ray = ray_begin; ray < sse_begin; ++ray) {
+        const float nd0 = ( n_u * data->direction[ku][ray] +
+                            n_v * data->direction[kv][ray] +
+                            data->direction[k][ray] );
+        const float nd  = 1.f/nd0;
+
+
+        float org_k  = data->origin[axis][ray];
+        float org_ku = data->origin[ku][ray];
+        float org_kv = data->origin[kv][ray];
+
+        float f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+
+        const float f = f0 * nd;
+        // plane test
+        if ( f < T_EPSILON || f > data->minT[ray] )
+            continue;
+
+        const float hu = org_ku + f*data->direction[ku][ray];
+        const float hv = org_kv + f*data->direction[kv][ray];
+        const float lambda = b_d + hu*b_nu + hv * b_nv;
+
+        // barycentric test
+        if ( lambda < 0.f )
+            continue;
+
+        const float mue = c_d + hu * c_nu + hv * c_nv;
+        if ( mue < 0.f || mue + lambda > 1.f )
+            continue;
+
+        rays.hit(ray, f, getMaterial(), this, getTexCoordMapper());
+    }
+
+    for (int ray = sse_begin; ray < sse_end; ray += 4) {
+        const __m128 nd0 = ( _mm_add_ps( _mm_mul_ps( _mm_set1_ps(n_u),
+                                                     
_mm_load_ps(&data->direction[ku][ray])),
+                                         _mm_add_ps( 
_mm_mul_ps(_mm_set1_ps(n_v),
+                                                                
_mm_load_ps(&data->direction[kv][ray])),
+                                                     
_mm_load_ps(&data->direction[k][ray]))));
+        const __m128 nd  = oneOver(nd0);
+
+        const __m128 org_k  = _mm_load_ps(&data->origin[axis][ray]);
+        const __m128 org_ku = _mm_load_ps(&data->origin[ku][ray]);
+        const __m128 org_kv = _mm_load_ps(&data->origin[kv][ray]);
+        const __m128 f0     = _mm_sub_ps(_mm_set1_ps(n_d),
+                                         _mm_add_ps(org_k, 
_mm_add_ps(_mm_mul_ps(_mm_set1_ps(n_u),
+                                                                             
    org_ku),
+                                                                      
_mm_mul_ps(_mm_set1_ps(n_v),
+                                                                             
    org_kv))));
+
+        const __m128 f = _mm_mul_ps(f0, nd); // maybe these would be faster 
as swizzle after _mm_load_ps
+        // plane test
+        __m128 mask_test = _mm_and_ps( _mm_cmpnle_ps(f, 
_mm_set1_ps(T_EPSILON)),
+                                       
_mm_cmpnle_ps(_mm_load_ps(&data->minT[ray]), f));
+        if (_mm_movemask_ps(mask_test) == 0x0) continue;
+
+        const __m128 hu = _mm_add_ps( _mm_mul_ps(f, 
_mm_load_ps(&data->direction[ku][ray])), org_ku);
+        const __m128 hv = _mm_add_ps( _mm_mul_ps(f, 
_mm_load_ps(&data->direction[kv][ray])), org_kv);
+        const __m128 lambda = _mm_add_ps( _mm_set1_ps(b_d),
+                                          _mm_add_ps( _mm_mul_ps(hu, 
_mm_set1_ps(b_nu)),
+                                                      _mm_mul_ps(hv, 
_mm_set1_ps(b_nv))));
+        mask_test = _mm_and_ps(mask_test, _mm_cmpnlt_ps(lambda, 
_mm_setzero_ps()));
+        if (_mm_movemask_ps(mask_test) == 0x0) continue;
+
+        const __m128 mue = _mm_add_ps( _mm_set1_ps(c_d),
+                                       _mm_add_ps( _mm_mul_ps(hu, 
_mm_set1_ps(c_nu)),
+                                                   _mm_mul_ps(hv, 
_mm_set1_ps(c_nv))));
+
+        mask_test = _mm_and_ps(mask_test, _mm_and_ps( _mm_cmpnlt_ps(mue, 
_mm_setzero_ps()),
+                                                      
_mm_cmpnlt_ps(_mm_set1_ps(1.f), _mm_add_ps(mue, lambda))));
+
+        int hit_result = _mm_movemask_ps(mask_test);
+        switch (hit_result) {
+        case 0x0:
+            // all miss
+            break;
+        case 0xf:
+            // all hit:
+            _mm_store_ps(&data->minT[ray], f);
+#ifdef __x86_64
+            _mm_store_si128((__m128i*)&data->hitMatl[ray], 
_mm_set1_epi64x((long long)getMaterial()));
+            _mm_store_si128((__m128i*)&data->hitMatl[ray+2], 
_mm_set1_epi64x((long long)getMaterial()));
+            _mm_store_si128((__m128i*)&data->hitPrim[ray], 
_mm_set1_epi64x((long long)this));
+            _mm_store_si128((__m128i*)&data->hitPrim[ray+2], 
_mm_set1_epi64x((long long)this));
+            _mm_store_si128((__m128i*)&data->hitTex[ray], 
_mm_set1_epi64x((long long)getTexCoordMapper()));
+            _mm_store_si128((__m128i*)&data->hitTex[ray+2], 
_mm_set1_epi64x((long long)getTexCoordMapper()));
+#else
+            _mm_store_si128((__m128i*)&data->hitMatl[ray], 
_mm_set1_epi32((int)getMaterial()));
+            _mm_store_si128((__m128i*)&data->hitPrim[ray], 
_mm_set1_epi32((int)this));
+            _mm_store_si128((__m128i*)&data->hitTex[ray], 
_mm_set1_epi32((int)getTexCoordMapper()));
+#endif
+            break;
+        default:
+            // mixed, TODO(boulos): are these casts really a good idea? 
(shouldn't we do *((__m128i*)&f) instead?)
+            _mm_maskmoveu_si128((__m128i)_mm_castps_si128(f), 
(__m128i)_mm_castps_si128(mask_test),
+                                (char*)&data->minT[ray]);
+#ifdef __x86_64
+            __m128i lohit = 
(__m128i)_mm_castps_si128(_mm_unpacklo_ps(mask_test, mask_test));
+            __m128i hihit = 
(__m128i)_mm_castps_si128(_mm_unpackhi_ps(mask_test, mask_test));
+            _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
lohit, (char*)&data->hitMatl[ray]);
+            _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
hihit, (char*)&data->hitMatl[ray+2]);
+
+            _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), lohit, 
(char*)&data->hitPrim[ray]);
+            _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), hihit, 
(char*)&data->hitPrim[ray+2]);
+            _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[ray]);
+            _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[ray+2]);
+#else
+            _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(mask_test),
+                                (char*)&data->hitMatl[ray]);
+            _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(mask_test),
+                                (char*)&data->hitPrim[ray]);
+            _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(mask_test),
+                                (char*)&data->hitTex[ray]);
+#endif
+            break;
+        }
+    }
+
+    for (int ray = sse_end; ray < ray_end; ++ray) {
+        const float nd0 = ( n_u * data->direction[ku][ray] +
+                            n_v * data->direction[kv][ray] +
+                            data->direction[k][ray] );
+        const float nd  = 1.f/nd0;
+
+        float org_k  = data->origin[axis][ray];
+        float org_ku = data->origin[ku][ray];
+        float org_kv = data->origin[kv][ray];
+
+        float f0 = n_d - (org_k + n_u * org_ku + n_v * org_kv);
+
+        const float f = f0 * nd;
+        // plane test
+        if ( f < T_EPSILON || f > data->minT[ray] )
+            continue;
+
+        const float hu = org_ku + f*data->direction[ku][ray];
+        const float hv = org_kv + f*data->direction[kv][ray];
+        const float lambda = b_d + hu*b_nu + hv * b_nv;
+
+        // barycentric test
+        if ( lambda < 0.f )
+            continue;
+
+        const float mue = c_d + hu * c_nu + hv * c_nv;
+        if ( mue < 0.f || mue + lambda > 1.f )
+            continue;
+
+        rays.hit(ray, f, getMaterial(), this, getTexCoordMapper());
+    }
+}
+#else
 #if !(USE_SIMD) // portable C version
 void WaldTriangle::intersect(const RenderContext& context, RayPacket& rays) 
const
 {
@@ -548,3 +714,4 @@
    }
 }
 #endif
+#endif // MANTA_SSE




  • [MANTA] r1175 - trunk/Model/Primitives, boulos, 08/13/2006

Archive powered by MHonArc 2.6.16.

Top of page