Text archives Help
- From: bigler@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1394 - trunk/Core/Math
- Date: Thu, 17 May 2007 15:41:48 -0600 (MDT)
Author: bigler
Date: Thu May 17 15:41:47 2007
New Revision: 1394
Added:
trunk/Core/Math/MT_RNG_SIMD.h
Modified:
trunk/Core/Math/SSEDefs.h
Log:
MT_RNG_SIMD.h
A SIMD version of the MT random number generator. Seems to work
with a simple test, but needs to be more rigously verified.
SSEDefs.h
Added some functions needed to support MT_RNG_SIMD.
Added: trunk/Core/Math/MT_RNG_SIMD.h
==============================================================================
--- (empty file)
+++ trunk/Core/Math/MT_RNG_SIMD.h Thu May 17 15:41:47 2007
@@ -0,0 +1,81 @@
+#ifndef MT_RNG_SIMD_H__
+#define MT_RNG_SIMD_H__
+
+#include <Core/Math/SSEDefs.h>
+
+namespace Manta {
+
+#ifdef MANTA_SSE
+ struct MT_RNG_SIMD
+ {
+ sse_int_t state[ 624 ];
+ int position;
+ };
+
+ inline void seed_MT_RNG_SIMD( MT_RNG_SIMD& that,
+ sse_int_t seed )
+ {
+ that.state[ 0 ] = seed;
+ for ( that.position = 1; that.position < 624; ++that.position ) {
+ union
+ {
+ sse_int_t partial;
+ unsigned int ints[ 4 ];
+ };
+ partial = xor4i( that.state[ that.position - 1 ],
+ shift_right4int( that.state[ that.position - 1 ], 30
) );
+ ints[ 0 ] *= 0x6c078965;
+ ints[ 1 ] *= 0x6c078965;
+ ints[ 2 ] *= 0x6c078965;
+ ints[ 3 ] *= 0x6c078965;
+ that.state[ that.position ] = add4i( partial, set4i( that.position ) );
+ }
+ }
+
+ inline sse_t next_MT_RNG_SIMD( MT_RNG_SIMD& that )
+ {
+ if ( that.position == 624 ) {
+ static const sse_int_t magic = set4i( 0x9908b0df );
+ for ( int index = 0; index < 227; ++index ) {
+ sse_int_t mix = or4i( and4i( that.state[ index ], _mm_signbit_si128
),
+ and4i( that.state[ index + 1 ],
_mm_absmask_si128 ) );
+ sse_int_t magic_mix = mask4i( cmp4_eq_i( and4i( mix, _mm_one_si128
), _mm_one_si128 ),
+ magic, _mm_zero_si128 );
+ that.state[ index ] = xor4i( xor4i( that.state[ index + 397 ],
+ shift_right4int( mix, 1 ) ),
+ magic_mix );
+ }
+ for ( int index = 227; index < 623; ++index ) {
+ sse_int_t mix = or4i( and4i( that.state[ index ], _mm_signbit_si128
),
+ and4i( that.state[ index + 1 ],
_mm_absmask_si128 ) );
+ sse_int_t magic_mix = mask4i( cmp4_eq_i( and4i( mix, _mm_one_si128
), _mm_one_si128 ),
+ magic, _mm_zero_si128 );
+ that.state[ index ] = xor4i( xor4i( that.state[ index - 227 ],
+ shift_right4int( mix, 1 ) ),
+ magic_mix );
+ }
+ sse_int_t mix = or4i( and4i( that.state[ 623 ], _mm_signbit_si128 ),
+ and4i( that.state[ 0 ], _mm_absmask_si128 ) );
+ sse_int_t magic_mix = mask4i( cmp4_eq_i( and4i( mix, _mm_one_si128 ),
_mm_one_si128 ),
+ magic, _mm_zero_si128 );
+ that.state[ 623 ] = xor4i( xor4i( that.state[ 396 ],
+ shift_right4int( mix, 1 ) ),
+ magic_mix );
+ that.position = 0;
+ }
+ static const MANTA_ALIGN(16) sse_int_t magic2 = set4i( 0x9d2c5680 );
+ static const MANTA_ALIGN(16) sse_int_t magic3 = set4i( 0xefc60000 );
+ sse_int_t value = that.state[ that.position++ ];
+ value = xor4i( value, shift_right4int( value, 11 ) );
+ value = xor4i( value, and4i( shift_left4int( value, 7 ), magic2 ) );
+ value = xor4i( value, and4i( shift_left4int( value, 15 ), magic3 ) );
+ value = xor4i( value, shift_right4int( value, 18 ) );
+ static const MANTA_ALIGN(16) sse_t scale = set4( 1.0f / 4294967296.0f );
+ return add4( mul4( convert4_i2f( value ), scale ), _mm_one_half );
+ }
+
+#endif // #ifdef MANTA_SSE
+
+} // end namespace Manta
+
+#endif // #ifndef MT_RNG_SIMD_H__
Modified: trunk/Core/Math/SSEDefs.h
==============================================================================
--- trunk/Core/Math/SSEDefs.h (original)
+++ trunk/Core/Math/SSEDefs.h Thu May 17 15:41:47 2007
@@ -24,8 +24,10 @@
#define andnot4 _mm_andnot_ps
#define andnot4i _mm_andnot_si128
#define xor4 _mm_xor_ps
+#define xor4i _mm_xor_si128
#define mul4 _mm_mul_ps
#define add4 _mm_add_ps
+#define add4i _mm_add_epi32
#define sub4 _mm_sub_ps
#define min4 _mm_min_ps
#define max4 _mm_max_ps
@@ -44,13 +46,20 @@
#define cmp4_gt _mm_cmpgt_ps
#define cmp4_lt _mm_cmplt_ps
#define cmp4_eq _mm_cmpeq_ps
+#define cmp4_eq_i _mm_cmpeq_epi32
#define load44 _mm_load_ps
#define load44i _mm_load_si128
#define store44 _mm_store_ps
#define store44i _mm_store_si128
+// The cast_x2y are more like reinterpret casts. For convertions of
+// types use the convert4 functions below.
#define cast_i2f _mm_castsi128_ps
#define cast_f2i _mm_castps_si128
+#define convert4_f2i _mm_cvttps_epi32
+#define convert4_i2f _mm_cvtepi32_ps
#define sqrt4 _mm_sqrt_ps
+#define shift_right4int _mm_srli_epi32
+#define shift_left4int _mm_slli_epi32
namespace std {
std::ostream& operator<<(std::ostream& os, __m128);
@@ -102,7 +111,9 @@
static const MANTA_ALIGN(16) sse_t _mm_minus_eps = _mm_set_ps1(-1e-5);
static const MANTA_ALIGN(16) sse_t _mm_epsilon = _mm_set_ps1(1e-5);
static const MANTA_ALIGN(16) sse_t _mm_one = _mm_set_ps1(1.f);
+ static const MANTA_ALIGN(16) sse_int_t _mm_one_si128 = _mm_set1_epi32(1);
static const MANTA_ALIGN(16) sse_t _mm_zero = _mm_set_ps1(0.f);
+ static const MANTA_ALIGN(16) sse_int_t _mm_zero_si128 =
_mm_set1_epi32(0);
static const MANTA_ALIGN(16) sse_t _mm_one_half = _mm_set_ps1(0.5f);
static const MANTA_ALIGN(16) sse_t _mm_two = _mm_set_ps1(2.f);
static const MANTA_ALIGN(16) sse_t _mm_256 = _mm_set_ps1(256);
@@ -117,6 +128,8 @@
static const MANTA_ALIGN(16) sse_t _mm_true =
_mm_set_ps1((float&)_mm_inttruemask);
static const int minusOneI = -1;
static const MANTA_ALIGN(16) sse_t _mm_minusOne = _mm_set_ps1((float
&)minusOneI);
+ static const MANTA_ALIGN(16) sse_int_t _mm_absmask_si128 =
set4i(_mm_intabsmask);
+ static const MANTA_ALIGN(16) sse_int_t _mm_signbit_si128 =
set4i(_mm_intsignbit);
/*! return v0 + t*(v1-v0) */
inline sse_t lerp4(const sse_t t, const sse_t v0, const sse_t v1)
- [MANTA] r1394 - trunk/Core/Math, bigler, 05/17/2007
Archive powered by MHonArc 2.6.16.