Text archives Help
- From: sparker@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1022 - in trunk: Image include
- Date: Fri, 28 Apr 2006 23:41:38 -0600 (MDT)
Author: sparker
Date: Fri Apr 28 23:41:33 2006
New Revision: 1022
Added:
trunk/Image/SimpleImage_special.cc
Modified:
trunk/Image/CMakeLists.txt
trunk/Image/SimpleImage.h
trunk/include/CMakeLists.txt
Log:
Use SSE for SimpleImage::set. Results in a ~60% speedup for -pixelsampler
null.
Now at ~82 fps for 1 processor and 120 for 2 (limited by OpenGL) on a macbook
pro.
SSE enabled for ARGB format only
Modified: trunk/Image/CMakeLists.txt
==============================================================================
--- trunk/Image/CMakeLists.txt (original)
+++ trunk/Image/CMakeLists.txt Fri Apr 28 23:41:33 2006
@@ -32,6 +32,7 @@
SimpleImageBase.h
SimpleImageBase.cc
SimpleImage.h
+ SimpleImage_special.cc
SimpleImage_templates.cc
TGAFile.h
TGAFile.cc
Modified: trunk/Image/SimpleImage.h
==============================================================================
--- trunk/Image/SimpleImage.h (original)
+++ trunk/Image/SimpleImage.h Fri Apr 28 23:41:33 2006
@@ -31,8 +31,9 @@
#include <Image/SimpleImageBase.h>
#include <Core/Color/Color.h>
-#include <Interface/Parameters.h>
#include <Core/Exceptions/IllegalValue.h>
+#include <Image/Pixel.h>
+#include <Interface/Parameters.h>
#include <sgi_stl_warnings_off.h>
#include <string>
#include <vector>
@@ -116,6 +117,9 @@
}
}
}
+
+ template<>
+ void SimpleImage<ARGB8Pixel>::set(const Fragment& fragment);
template<class Pixel>
void SimpleImage<Pixel>::get(Fragment& fragment) const
Added: trunk/Image/SimpleImage_special.cc
==============================================================================
--- (empty file)
+++ trunk/Image/SimpleImage_special.cc Fri Apr 28 23:41:33 2006
@@ -0,0 +1,54 @@
+
+#include <Image/SimpleImage.h>
+#include <iostream>
+#include <MantaSSE.h>
+
+using namespace Manta;
+using namespace std;
+
+template<>
+void SimpleImage<ARGB8Pixel>::set(const Fragment& fragment)
+{
+ if(fragment.getFlag(Fragment::ConsecutiveX|Fragment::ConstantEye) ==
+ Fragment::ConsecutiveX|Fragment::ConstantEye){
+ int b = fragment.begin();
+ ARGB8Pixel* pix =
eyeStart[fragment.getWhichEye(b)][fragment.getY(b)]+fragment.getX(b);
+#if MANTA_SSE
+ // If the fragment and the pixel are on an aligned boundary, use SSE
+ if(((fragment.pixelBegin | fragment.pixelEnd) & 0xf) == 0){
+ // Aligned for SSE
+ __m128 scale = _mm_set1_ps( 255.99999f );
+ for(int i=fragment.begin(); i+3< fragment.end();i+=4){
+ __m128 r = _mm_load_ps(&fragment.color[0][i]);
+ __m128 g = _mm_load_ps(&fragment.color[1][i]);
+ __m128 b = _mm_load_ps(&fragment.color[2][i]);
+ r = _mm_mul_ps(r, scale);
+ g = _mm_mul_ps(g, scale);
+ b = _mm_mul_ps(b, scale);
+ __m128i alpha = _mm_set1_epi32(255); // alpha
+ __m128i r32 = _mm_cvttps_epi32(r); // 32 bits: r0r1r2r3
+ __m128i g32 = _mm_cvttps_epi32(g); // 32 bits: g0g1g2g3
+ __m128i b32 = _mm_cvttps_epi32(b); // 32 bits: b0b1b2b3
+ __m128i ag16 = _mm_packs_epi32(alpha, g32); // 16 bits:
a0a1a2a3g0g1g2g3
+ __m128i rb16 = _mm_packs_epi32(r32, b32); // 16 bits:
r0r1r2r3b0b1b2b3
+ __m128i ar16 = _mm_unpacklo_epi16(ag16, rb16); // 16 bits:
a0r0a1r1a2r2a3r3
+ __m128i gb16 = _mm_unpackhi_epi16(ag16, rb16); // 16 bits:
g0b0g1b1g2b0g3b3
+ __m128i argb16a = _mm_unpacklo_epi32(ar16, gb16); // 16 bits
a0r0g0b0a1a1g1b1
+ __m128i argb16b = _mm_unpackhi_epi32(ar16, gb16); // 16 bits
a2r2g2b2a3r3g3b3
+ __m128i result = _mm_packus_epi16(argb16a, argb16b); // 8 bits:
a0r0g0b0...a3r3g3b3
+ _mm_stream_si128((__m128i*)pix, result);
+ pix += 4;
+ }
+ } else
+#endif /* MANTA_SSE */
+ {
+ for(int i=fragment.begin(); i< fragment.end();i++)
+ convertToPixel(*pix++, fragment.getColor(i).convertRGB());
+ }
+ } else {
+ for(int i=fragment.begin();i<fragment.end();i++){
+
convertToPixel(eyeStart[fragment.getWhichEye(i)][fragment.getY(i)][fragment.getX(i)],
fragment.getColor(i).convertRGB());
+ }
+ }
+}
+
Modified: trunk/include/CMakeLists.txt
==============================================================================
--- trunk/include/CMakeLists.txt (original)
+++ trunk/include/CMakeLists.txt Fri Apr 28 23:41:33 2006
@@ -27,4 +27,18 @@
${CMAKE_BINARY_DIR}/include/FragmentParameters.h
)
+###############################################################################
+## Configure MantaSSE.h
+
+IF(MANTA_SSE)
+ SET(MANTA_SSE_DEF "1" CACHE INTERNAL "Turn on SSE code")
+ELSE(MANTA_SSE)
+ SET(MANTA_SSE_DEF "0" CACHE INTERNAL "Turn off SSE code")
+ENDIF(MANTA_SSE)
+
+CONFIGURE_FILE(
+ ${CMAKE_SOURCE_DIR}/include/MantaSSE.h.CMakeTemplate
+ ${CMAKE_BINARY_DIR}/include/MantaSSE.h
+ )
+
- [MANTA] r1022 - in trunk: Image include, sparker, 04/28/2006
Archive powered by MHonArc 2.6.16.