Text archives Help
- From: bigler@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1125 - trunk/Image
- Date: Tue, 20 Jun 2006 16:19:34 -0600 (MDT)
Author: bigler
Date: Tue Jun 20 16:19:34 2006
New Revision: 1125
Modified:
trunk/Image/SimpleImage_special.cc
Log:
Use the _MM_TRANSPOSE4_PS method instead of all the crazy swizzle4
stuff. It turned out to be slightly faster on my machine, plus it is
easier to look at. ;)
Modified: trunk/Image/SimpleImage_special.cc
==============================================================================
--- trunk/Image/SimpleImage_special.cc (original)
+++ trunk/Image/SimpleImage_special.cc Tue Jun 20 16:19:34 2006
@@ -181,12 +181,6 @@
}
}
-// vec_a = a[0], a[1], a[2], a[3]
-// vec_b = b[0], b[1], b[2], b[3]
-// Result = a[f0],a[f1],b[f2],b[f3]
-#define swizzle4(vec_a, vec_b, f0, f1, f2, f3) \
- _mm_shuffle_ps( (vec_a), (vec_b), _MM_SHUFFLE(f3, f2, f1, f0) )
-
template<>
void SimpleImage<RGBAfloatPixel>::set(const Fragment& fragment)
{
@@ -206,26 +200,21 @@
__m128 b = _mm_load_ps(&fragment.color[2][i]);
__m128 a = _mm_set_ps1(1.0f); // alpha a0a1a2a3
- // r0r1g0g1
- __m128 rg01 = swizzle4(r, g, 0, 1, 0, 1);
- // r2r3g2g3
- __m128 rg23 = swizzle4(r, g, 2, 3, 2, 3);
- // b0b1a0a1
- __m128 ba01 = swizzle4(b, a, 0, 1, 0, 1);
- // b2b3a2a3
- __m128 ba23 = swizzle4(b, a, 2, 3, 2, 3);
-
- // r0g0b0g0...
- __m128 p0 = swizzle4(rg01, ba01, 0, 2, 0, 2);
- __m128 p1 = swizzle4(rg01, ba01, 1, 3, 1, 3);
- __m128 p2 = swizzle4(rg23, ba23, 0, 2, 0, 2);
- __m128 p3 = swizzle4(rg23, ba23, 1, 3, 1, 3);
-
+ // This will do the following
+ // r = r0r1r2r3 r = r0g0b0a0
+ // g = g0g1g2g3 => g = r1g1b1a1
+ // b = b0b1b2b3 b = r2g2b2a2
+ // a = a0a1a2a3 a = r3g3b3a3
+ //
+ // I found this method to be slightly faster and easier to use
+ // than the swizzle4 code previously found in the repository.
+ _MM_TRANSPOSE4_PS(r, g, b, a);
+
// Copy data over
- _mm_stream_ps(fpix, p0);
- _mm_stream_ps(fpix+4, p1);
- _mm_stream_ps(fpix+8, p2);
- _mm_stream_ps(fpix+12, p3);
+ _mm_stream_ps(fpix, r);
+ _mm_stream_ps(fpix+4, g);
+ _mm_stream_ps(fpix+8, b);
+ _mm_stream_ps(fpix+12, a);
fpix += 16;
}
- [MANTA] r1125 - trunk/Image, bigler, 06/20/2006
Archive powered by MHonArc 2.6.16.