Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1125 - trunk/Image


Chronological Thread 
  • From: bigler@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1125 - trunk/Image
  • Date: Tue, 20 Jun 2006 16:19:34 -0600 (MDT)

Author: bigler
Date: Tue Jun 20 16:19:34 2006
New Revision: 1125

Modified:
   trunk/Image/SimpleImage_special.cc
Log:

Use the _MM_TRANSPOSE4_PS method instead of all the crazy swizzle4
stuff.  It turned out to be slightly faster on my machine, plus it is
easier to look at. ;)


Modified: trunk/Image/SimpleImage_special.cc
==============================================================================
--- trunk/Image/SimpleImage_special.cc  (original)
+++ trunk/Image/SimpleImage_special.cc  Tue Jun 20 16:19:34 2006
@@ -181,12 +181,6 @@
   }
 }
 
-// vec_a  = a[0], a[1], a[2], a[3]
-// vec_b  = b[0], b[1], b[2], b[3]
-// Result = a[f0],a[f1],b[f2],b[f3]
-#define swizzle4(vec_a, vec_b, f0, f1, f2, f3) \
-  _mm_shuffle_ps( (vec_a), (vec_b), _MM_SHUFFLE(f3, f2, f1, f0) )
-
 template<>
 void SimpleImage<RGBAfloatPixel>::set(const Fragment& fragment)
 {
@@ -206,26 +200,21 @@
         __m128 b = _mm_load_ps(&fragment.color[2][i]);
         __m128 a = _mm_set_ps1(1.0f);  // alpha a0a1a2a3
 
-        // r0r1g0g1
-        __m128 rg01 = swizzle4(r, g, 0, 1, 0, 1);
-        // r2r3g2g3
-        __m128 rg23 = swizzle4(r, g, 2, 3, 2, 3);
-        // b0b1a0a1
-        __m128 ba01 = swizzle4(b, a, 0, 1, 0, 1);
-        // b2b3a2a3
-        __m128 ba23 = swizzle4(b, a, 2, 3, 2, 3);
-
-        // r0g0b0g0...
-        __m128 p0 = swizzle4(rg01, ba01, 0, 2, 0, 2);
-        __m128 p1 = swizzle4(rg01, ba01, 1, 3, 1, 3);
-        __m128 p2 = swizzle4(rg23, ba23, 0, 2, 0, 2);
-        __m128 p3 = swizzle4(rg23, ba23, 1, 3, 1, 3);
-
+        // This will do the following
+        // r = r0r1r2r3       r = r0g0b0a0
+        // g = g0g1g2g3   =>  g = r1g1b1a1
+        // b = b0b1b2b3       b = r2g2b2a2
+        // a = a0a1a2a3       a = r3g3b3a3
+        //
+        // I found this method to be slightly faster and easier to use
+        // than the swizzle4 code previously found in the repository.
+        _MM_TRANSPOSE4_PS(r, g, b, a);
+        
         // Copy data over
-        _mm_stream_ps(fpix,    p0);
-        _mm_stream_ps(fpix+4,  p1);
-        _mm_stream_ps(fpix+8,  p2);
-        _mm_stream_ps(fpix+12, p3);
+        _mm_stream_ps(fpix,    r);
+        _mm_stream_ps(fpix+4,  g);
+        _mm_stream_ps(fpix+8,  b);
+        _mm_stream_ps(fpix+12, a);
 
         fpix += 16;
       }




  • [MANTA] r1125 - trunk/Image, bigler, 06/20/2006

Archive powered by MHonArc 2.6.16.

Top of page