Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1022 - in trunk: Image include


Chronological Thread 
  • From: sparker@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1022 - in trunk: Image include
  • Date: Fri, 28 Apr 2006 23:41:38 -0600 (MDT)

Author: sparker
Date: Fri Apr 28 23:41:33 2006
New Revision: 1022

Added:
   trunk/Image/SimpleImage_special.cc
Modified:
   trunk/Image/CMakeLists.txt
   trunk/Image/SimpleImage.h
   trunk/include/CMakeLists.txt
Log:
Use SSE for SimpleImage::set.  Results in a ~60% speedup for -pixelsampler 
null.
Now at ~82 fps for 1 processor and 120 for 2 (limited by OpenGL) on a macbook 
pro.
SSE enabled for ARGB format only


Modified: trunk/Image/CMakeLists.txt
==============================================================================
--- trunk/Image/CMakeLists.txt  (original)
+++ trunk/Image/CMakeLists.txt  Fri Apr 28 23:41:33 2006
@@ -32,6 +32,7 @@
              SimpleImageBase.h
              SimpleImageBase.cc
              SimpleImage.h
+             SimpleImage_special.cc
              SimpleImage_templates.cc
              TGAFile.h
              TGAFile.cc

Modified: trunk/Image/SimpleImage.h
==============================================================================
--- trunk/Image/SimpleImage.h   (original)
+++ trunk/Image/SimpleImage.h   Fri Apr 28 23:41:33 2006
@@ -31,8 +31,9 @@
 
 #include <Image/SimpleImageBase.h>
 #include <Core/Color/Color.h>
-#include <Interface/Parameters.h>
 #include <Core/Exceptions/IllegalValue.h>
+#include <Image/Pixel.h>
+#include <Interface/Parameters.h>
 #include <sgi_stl_warnings_off.h>
 #include <string>
 #include <vector>
@@ -116,6 +117,9 @@
       }
     }
   }
+
+  template<>
+    void SimpleImage<ARGB8Pixel>::set(const Fragment& fragment);
 
   template<class Pixel>
   void SimpleImage<Pixel>::get(Fragment& fragment) const

Added: trunk/Image/SimpleImage_special.cc
==============================================================================
--- (empty file)
+++ trunk/Image/SimpleImage_special.cc  Fri Apr 28 23:41:33 2006
@@ -0,0 +1,54 @@
+
+#include <Image/SimpleImage.h>
+#include <iostream>
+#include <MantaSSE.h>
+
+using namespace Manta;
+using namespace std;
+
+template<>
+void SimpleImage<ARGB8Pixel>::set(const Fragment& fragment)
+{
+  if(fragment.getFlag(Fragment::ConsecutiveX|Fragment::ConstantEye) ==
+     Fragment::ConsecutiveX|Fragment::ConstantEye){
+    int b = fragment.begin();
+    ARGB8Pixel* pix = 
eyeStart[fragment.getWhichEye(b)][fragment.getY(b)]+fragment.getX(b);
+#if MANTA_SSE
+    // If the fragment and the pixel are on an aligned boundary, use SSE
+    if(((fragment.pixelBegin | fragment.pixelEnd) & 0xf) == 0){
+      // Aligned for SSE
+      __m128 scale = _mm_set1_ps( 255.99999f );
+      for(int i=fragment.begin(); i+3< fragment.end();i+=4){
+        __m128 r = _mm_load_ps(&fragment.color[0][i]);
+        __m128 g = _mm_load_ps(&fragment.color[1][i]);
+        __m128 b = _mm_load_ps(&fragment.color[2][i]);
+        r = _mm_mul_ps(r, scale);
+        g = _mm_mul_ps(g, scale);
+        b = _mm_mul_ps(b, scale);
+        __m128i alpha = _mm_set1_epi32(255);  // alpha
+        __m128i r32 = _mm_cvttps_epi32(r); // 32 bits: r0r1r2r3
+        __m128i g32 = _mm_cvttps_epi32(g); // 32 bits: g0g1g2g3
+        __m128i b32 = _mm_cvttps_epi32(b); // 32 bits: b0b1b2b3
+        __m128i ag16 = _mm_packs_epi32(alpha, g32); // 16 bits: 
a0a1a2a3g0g1g2g3
+        __m128i rb16 = _mm_packs_epi32(r32, b32); // 16 bits: 
r0r1r2r3b0b1b2b3
+        __m128i ar16 = _mm_unpacklo_epi16(ag16, rb16); // 16 bits: 
a0r0a1r1a2r2a3r3
+        __m128i gb16 = _mm_unpackhi_epi16(ag16, rb16); // 16 bits: 
g0b0g1b1g2b0g3b3
+        __m128i argb16a = _mm_unpacklo_epi32(ar16, gb16); // 16 bits 
a0r0g0b0a1a1g1b1
+        __m128i argb16b = _mm_unpackhi_epi32(ar16, gb16); // 16 bits 
a2r2g2b2a3r3g3b3
+        __m128i result = _mm_packus_epi16(argb16a, argb16b); // 8 bits: 
a0r0g0b0...a3r3g3b3
+        _mm_stream_si128((__m128i*)pix, result);
+        pix += 4;
+      }
+    } else
+#endif /* MANTA_SSE */
+    {
+      for(int i=fragment.begin(); i< fragment.end();i++)
+        convertToPixel(*pix++, fragment.getColor(i).convertRGB());
+    }
+  } else {
+    for(int i=fragment.begin();i<fragment.end();i++){
+      
convertToPixel(eyeStart[fragment.getWhichEye(i)][fragment.getY(i)][fragment.getX(i)],
 fragment.getColor(i).convertRGB());
+    }
+  }
+}
+

Modified: trunk/include/CMakeLists.txt
==============================================================================
--- trunk/include/CMakeLists.txt        (original)
+++ trunk/include/CMakeLists.txt        Fri Apr 28 23:41:33 2006
@@ -27,4 +27,18 @@
   ${CMAKE_BINARY_DIR}/include/FragmentParameters.h
   )
 
+###############################################################################
+## Configure MantaSSE.h
+
+IF(MANTA_SSE)
+  SET(MANTA_SSE_DEF "1" CACHE INTERNAL "Turn on SSE code")
+ELSE(MANTA_SSE)
+  SET(MANTA_SSE_DEF "0" CACHE INTERNAL "Turn off SSE code")
+ENDIF(MANTA_SSE)
+
+CONFIGURE_FILE(
+  ${CMAKE_SOURCE_DIR}/include/MantaSSE.h.CMakeTemplate
+  ${CMAKE_BINARY_DIR}/include/MantaSSE.h
+  )
+
 




  • [MANTA] r1022 - in trunk: Image include, sparker, 04/28/2006

Archive powered by MHonArc 2.6.16.

Top of page