Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[Manta] r2136 - trunk/Interface


Chronological Thread 
  • From: "James Bigler" <bigler@cs.utah.edu>
  • To: manta@sci.utah.edu
  • Subject: [Manta] r2136 - trunk/Interface
  • Date: Fri, 29 Feb 2008 08:41:47 -0700 (MST)

Author: bigler
Date: Fri Feb 29 08:41:47 2008
New Revision: 2136

Modified:
   trunk/Interface/Primitive.cc
Log:
Interface/Primitive.cc

  Added horrible SSE code for copying.

  On my laptop for one processor:

    before:   11.51 fps
    C copy:   11.61 fps
    SSE copy: 11.67 fps

  I wonder if the masked copies with all the load and unloads are kill us.
  Perhaps making the prologue, epilogue, and single SSE vector cases use C
  copies would help.  Austin wants to try memcpy.  That's also worth a shot, 
now
  that we have sse code to compare it with.

  I believe the really cost to using geometricNormals is in the forward facing
  computation where we have to compute the dot products again and do sign
  changes.  That bit of code could use some SSE.


Modified: trunk/Interface/Primitive.cc
==============================================================================
--- trunk/Interface/Primitive.cc        (original)
+++ trunk/Interface/Primitive.cc        Fri Feb 29 08:41:47 2008
@@ -25,8 +25,82 @@
                                         RayPacket& rays) const {
   rays.computeNormals(context);
 
+#ifdef MANTA_SSE
+  RayPacketData* data = rays.data;
+  if((rays.rayBegin ^ (rays.rayEnd-1)) & ~3){
+    int i = rays.rayBegin & ~3;
+    // Prologue
+    if(i != rays.rayBegin){
+      __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
+      // mask is on for active rays
+      __m128 mask = _mm_castsi128_ps(_mm_cmpgt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayBegin-i-1)));
+      _mm_store_ps(&data->geometricNormal[0][i],
+                   mask4(mask,
+                         _mm_load_ps(&data->normal[0][i]),
+                         _mm_load_ps(&data->geometricNormal[0][i])));
+      _mm_store_ps(&data->geometricNormal[1][i],
+                   mask4(mask,
+                         _mm_load_ps(&data->normal[1][i]),
+                         _mm_load_ps(&data->geometricNormal[1][i])));
+      _mm_store_ps(&data->geometricNormal[2][i],
+                   mask4(mask,
+                         _mm_load_ps(&data->normal[2][i]),
+                         _mm_load_ps(&data->geometricNormal[2][i])));
+    }
+    // Primary loop body
+    int e = rays.rayEnd - 3;
+    for(;i<e;i+=4){
+      _mm_store_ps(&data->geometricNormal[0][i], 
_mm_load_ps(&data->normal[0][i]));
+      _mm_store_ps(&data->geometricNormal[1][i], 
_mm_load_ps(&data->normal[1][i]));
+      _mm_store_ps(&data->geometricNormal[2][i], 
_mm_load_ps(&data->normal[2][i]));
+    }
+    // Epilogue
+    if(i != rays.rayEnd){
+      __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
+      // mask is on for active rays
+      __m128 mask = _mm_castsi128_ps(_mm_cmplt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayEnd-i)));
+      _mm_store_ps(&data->geometricNormal[0][i],
+                   mask4(mask,
+                         _mm_load_ps(&data->normal[0][i]),
+                         _mm_load_ps(&data->geometricNormal[0][i])));
+      _mm_store_ps(&data->geometricNormal[1][i],
+                   mask4(mask,
+                         _mm_load_ps(&data->normal[1][i]),
+                         _mm_load_ps(&data->geometricNormal[1][i])));
+      _mm_store_ps(&data->geometricNormal[2][i],
+                   mask4(mask,
+                         _mm_load_ps(&data->normal[2][i]),
+                         _mm_load_ps(&data->geometricNormal[2][i])));
+    }
+  } else {
+    // Single SSE vector
+    int i = rays.rayBegin & ~3;
+    __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
+    __m128 mask = _mm_castsi128_ps(_mm_and_si128(_mm_cmpgt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayBegin-i-1)), _mm_cmplt_epi32(ray_idx, 
_mm_set1_epi32(rays.rayEnd-i))));
+    _mm_store_ps(&data->geometricNormal[0][i],
+                 mask4(mask,
+                       _mm_load_ps(&data->normal[0][i]),
+                       _mm_load_ps(&data->geometricNormal[0][i])));
+    _mm_store_ps(&data->geometricNormal[1][i],
+                 mask4(mask,
+                       _mm_load_ps(&data->normal[1][i]),
+                       _mm_load_ps(&data->geometricNormal[1][i])));
+    _mm_store_ps(&data->geometricNormal[2][i],
+                 mask4(mask,
+                       _mm_load_ps(&data->normal[2][i]),
+                       _mm_load_ps(&data->geometricNormal[2][i])));
+  }
+#elif 1
+  RayPacketData* data = rays.data;
+  for(int i = rays.begin(); i < rays.end(); ++i) {
+    data->geometricNormal[0][i] = data->normal[0][i];
+    data->geometricNormal[1][i] = data->normal[1][i];
+    data->geometricNormal[2][i] = data->normal[2][i];
+  }
+#else // #ifdef MANTA_SSE
   for(int i = rays.begin(); i != rays.end(); ++i)
     rays.setGeometricNormal(i, rays.getNormal(i));
+#endif
 
   rays.setFlag( RayPacket::HaveGeometricNormals );
   if( rays.getFlag( RayPacket::HaveUnitNormals ) )




  • [Manta] r2136 - trunk/Interface, James Bigler, 02/29/2008

Archive powered by MHonArc 2.6.16.

Top of page