Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[Manta] r2138 - trunk/Interface


Chronological Thread 
  • From: "Austin Robison" <arobison@rayscale.com>
  • To: manta@sci.utah.edu
  • Subject: [Manta] r2138 - trunk/Interface
  • Date: Fri, 29 Feb 2008 16:11:15 -0700 (MST)

Author: arobison
Date: Fri Feb 29 16:11:15 2008
New Revision: 2138

Modified:
   trunk/Interface/Primitive.cc
Log:
Replacing the SSE normal copy with a memcpy() version.
This is slightly faster than the SSE copy, going from
~17.0fps to ~17.2fps on one core of my laptop.


Modified: trunk/Interface/Primitive.cc
==============================================================================
--- trunk/Interface/Primitive.cc        (original)
+++ trunk/Interface/Primitive.cc        Fri Feb 29 16:11:15 2008
@@ -25,82 +25,11 @@
                                         RayPacket& rays) const {
   rays.computeNormals(context);
 
-#ifdef MANTA_SSE
-  RayPacketData* data = rays.data;
-  if((rays.rayBegin ^ (rays.rayEnd-1)) & ~3){
-    int i = rays.rayBegin & ~3;
-    // Prologue
-    if(i != rays.rayBegin){
-      __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
-      // mask is on for active rays
-      __m128 mask = _mm_castsi128_ps(_mm_cmpgt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayBegin-i-1)));
-      _mm_store_ps(&data->geometricNormal[0][i],
-                   mask4(mask,
-                         _mm_load_ps(&data->normal[0][i]),
-                         _mm_load_ps(&data->geometricNormal[0][i])));
-      _mm_store_ps(&data->geometricNormal[1][i],
-                   mask4(mask,
-                         _mm_load_ps(&data->normal[1][i]),
-                         _mm_load_ps(&data->geometricNormal[1][i])));
-      _mm_store_ps(&data->geometricNormal[2][i],
-                   mask4(mask,
-                         _mm_load_ps(&data->normal[2][i]),
-                         _mm_load_ps(&data->geometricNormal[2][i])));
-    }
-    // Primary loop body
-    int e = rays.rayEnd - 3;
-    for(;i<e;i+=4){
-      _mm_store_ps(&data->geometricNormal[0][i], 
_mm_load_ps(&data->normal[0][i]));
-      _mm_store_ps(&data->geometricNormal[1][i], 
_mm_load_ps(&data->normal[1][i]));
-      _mm_store_ps(&data->geometricNormal[2][i], 
_mm_load_ps(&data->normal[2][i]));
-    }
-    // Epilogue
-    if(i != rays.rayEnd){
-      __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
-      // mask is on for active rays
-      __m128 mask = _mm_castsi128_ps(_mm_cmplt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayEnd-i)));
-      _mm_store_ps(&data->geometricNormal[0][i],
-                   mask4(mask,
-                         _mm_load_ps(&data->normal[0][i]),
-                         _mm_load_ps(&data->geometricNormal[0][i])));
-      _mm_store_ps(&data->geometricNormal[1][i],
-                   mask4(mask,
-                         _mm_load_ps(&data->normal[1][i]),
-                         _mm_load_ps(&data->geometricNormal[1][i])));
-      _mm_store_ps(&data->geometricNormal[2][i],
-                   mask4(mask,
-                         _mm_load_ps(&data->normal[2][i]),
-                         _mm_load_ps(&data->geometricNormal[2][i])));
-    }
-  } else {
-    // Single SSE vector
-    int i = rays.rayBegin & ~3;
-    __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
-    __m128 mask = _mm_castsi128_ps(_mm_and_si128(_mm_cmpgt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayBegin-i-1)), _mm_cmplt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayEnd-i))));
-    _mm_store_ps(&data->geometricNormal[0][i],
-                 mask4(mask,
-                       _mm_load_ps(&data->normal[0][i]),
-                       _mm_load_ps(&data->geometricNormal[0][i])));
-    _mm_store_ps(&data->geometricNormal[1][i],
-                 mask4(mask,
-                       _mm_load_ps(&data->normal[1][i]),
-                       _mm_load_ps(&data->geometricNormal[1][i])));
-    _mm_store_ps(&data->geometricNormal[2][i],
-                 mask4(mask,
-                       _mm_load_ps(&data->normal[2][i]),
-                       _mm_load_ps(&data->geometricNormal[2][i])));
+  for(int i = 0; i < 3; ++i) {
+    memcpy(&rays.data->geometricNormal[i][rays.begin()],
+           &rays.data->normal[i][rays.begin()],
+           (rays.end()-rays.begin())*sizeof(Real));
   }
-#elif 1
-  RayPacketData* data = rays.data;
-  for(int i = rays.begin(); i < rays.end(); ++i) {
-    data->geometricNormal[0][i] = data->normal[0][i];
-    data->geometricNormal[1][i] = data->normal[1][i];
-    data->geometricNormal[2][i] = data->normal[2][i];
-  }
-#else // #ifdef MANTA_SSE
-  for(int i = rays.begin(); i != rays.end(); ++i)
-    rays.setGeometricNormal(i, rays.getNormal(i));
-#endif
 
   rays.setFlag( RayPacket::HaveGeometricNormals );
   if( rays.getFlag( RayPacket::HaveUnitNormals ) )




  • [Manta] r2138 - trunk/Interface, Austin Robison, 02/29/2008

Archive powered by MHonArc 2.6.16.

Top of page