Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1394 - trunk/Core/Math


Chronological Thread 
  • From: bigler@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1394 - trunk/Core/Math
  • Date: Thu, 17 May 2007 15:41:48 -0600 (MDT)

Author: bigler
Date: Thu May 17 15:41:47 2007
New Revision: 1394

Added:
   trunk/Core/Math/MT_RNG_SIMD.h
Modified:
   trunk/Core/Math/SSEDefs.h
Log:

MT_RNG_SIMD.h

  A SIMD version of the MT random number generator.  Seems to work
  with a simple test, but needs to be more rigously verified.

SSEDefs.h

  Added some functions needed to support MT_RNG_SIMD.


Added: trunk/Core/Math/MT_RNG_SIMD.h
==============================================================================
--- (empty file)
+++ trunk/Core/Math/MT_RNG_SIMD.h       Thu May 17 15:41:47 2007
@@ -0,0 +1,81 @@
+#ifndef MT_RNG_SIMD_H__
+#define MT_RNG_SIMD_H__
+
+#include <Core/Math/SSEDefs.h>
+
+namespace Manta {
+
+#ifdef MANTA_SSE
+  struct MT_RNG_SIMD
+  {
+    sse_int_t state[ 624 ];
+    int position;
+  };
+
+  inline void seed_MT_RNG_SIMD( MT_RNG_SIMD& that,
+                                sse_int_t seed )
+  {
+    that.state[ 0 ] = seed;
+    for ( that.position = 1; that.position < 624; ++that.position ) {
+      union
+      {
+        sse_int_t partial;
+        unsigned int ints[ 4 ];
+      };
+      partial = xor4i( that.state[ that.position - 1 ],
+                       shift_right4int( that.state[ that.position - 1 ], 30 
) );
+      ints[ 0 ] *= 0x6c078965;
+      ints[ 1 ] *= 0x6c078965;
+      ints[ 2 ] *= 0x6c078965;
+      ints[ 3 ] *= 0x6c078965;
+      that.state[ that.position ] = add4i( partial, set4i( that.position ) );
+    }
+  }
+
+  inline sse_t next_MT_RNG_SIMD( MT_RNG_SIMD& that )
+  {
+    if ( that.position == 624 ) {
+      static const sse_int_t magic = set4i( 0x9908b0df );
+      for ( int index = 0; index < 227; ++index ) {
+        sse_int_t mix = or4i( and4i( that.state[ index ], _mm_signbit_si128 
),
+                              and4i( that.state[ index + 1 ], 
_mm_absmask_si128 ) );
+        sse_int_t magic_mix = mask4i( cmp4_eq_i( and4i( mix, _mm_one_si128 
), _mm_one_si128 ),
+                                      magic, _mm_zero_si128 );
+        that.state[ index ] = xor4i( xor4i( that.state[ index + 397 ],
+                                            shift_right4int( mix, 1 ) ),
+                                     magic_mix );
+      }
+      for ( int index = 227; index < 623; ++index ) {
+        sse_int_t mix = or4i( and4i( that.state[ index ], _mm_signbit_si128 
),
+                              and4i( that.state[ index + 1 ], 
_mm_absmask_si128 ) );
+        sse_int_t magic_mix = mask4i( cmp4_eq_i( and4i( mix, _mm_one_si128 
), _mm_one_si128 ),
+                                      magic, _mm_zero_si128 );
+        that.state[ index ] = xor4i( xor4i( that.state[ index - 227 ],
+                                            shift_right4int( mix, 1 ) ),
+                                     magic_mix );
+      }
+      sse_int_t mix = or4i( and4i( that.state[ 623 ], _mm_signbit_si128 ),
+                            and4i( that.state[ 0 ], _mm_absmask_si128 ) );
+      sse_int_t magic_mix = mask4i( cmp4_eq_i( and4i( mix, _mm_one_si128 ), 
_mm_one_si128 ),
+                                    magic, _mm_zero_si128 );
+      that.state[ 623 ] = xor4i( xor4i( that.state[ 396 ],
+                                        shift_right4int( mix, 1 ) ),
+                                 magic_mix );
+      that.position = 0;
+    }
+    static const MANTA_ALIGN(16) sse_int_t magic2 = set4i( 0x9d2c5680 );
+    static const MANTA_ALIGN(16) sse_int_t magic3 = set4i( 0xefc60000 );
+    sse_int_t value = that.state[ that.position++ ];
+    value = xor4i( value, shift_right4int( value, 11 ) );
+    value = xor4i( value, and4i( shift_left4int( value, 7 ), magic2 ) );
+    value = xor4i( value, and4i( shift_left4int( value, 15 ), magic3 ) );
+    value = xor4i( value, shift_right4int( value, 18 ) );
+    static const MANTA_ALIGN(16) sse_t scale = set4( 1.0f / 4294967296.0f );
+    return add4( mul4( convert4_i2f( value ), scale ), _mm_one_half );
+  }
+
+#endif // #ifdef MANTA_SSE
+  
+} // end namespace Manta
+
+#endif // #ifndef MT_RNG_SIMD_H__

Modified: trunk/Core/Math/SSEDefs.h
==============================================================================
--- trunk/Core/Math/SSEDefs.h   (original)
+++ trunk/Core/Math/SSEDefs.h   Thu May 17 15:41:47 2007
@@ -24,8 +24,10 @@
 #define andnot4 _mm_andnot_ps
 #define andnot4i _mm_andnot_si128
 #define xor4 _mm_xor_ps
+#define xor4i _mm_xor_si128
 #define mul4 _mm_mul_ps
 #define add4 _mm_add_ps
+#define add4i _mm_add_epi32
 #define sub4 _mm_sub_ps
 #define min4 _mm_min_ps
 #define max4 _mm_max_ps
@@ -44,13 +46,20 @@
 #define cmp4_gt _mm_cmpgt_ps
 #define cmp4_lt _mm_cmplt_ps
 #define cmp4_eq _mm_cmpeq_ps
+#define cmp4_eq_i _mm_cmpeq_epi32
 #define load44 _mm_load_ps
 #define load44i _mm_load_si128
 #define store44 _mm_store_ps
 #define store44i _mm_store_si128
+// The cast_x2y are more like reinterpret casts.  For convertions of
+// types use the convert4 functions below.
 #define cast_i2f _mm_castsi128_ps
 #define cast_f2i _mm_castps_si128
+#define convert4_f2i _mm_cvttps_epi32
+#define convert4_i2f _mm_cvtepi32_ps
 #define sqrt4 _mm_sqrt_ps
+#define shift_right4int _mm_srli_epi32
+#define shift_left4int _mm_slli_epi32
 
 namespace std {
   std::ostream& operator<<(std::ostream& os, __m128);
@@ -102,7 +111,9 @@
     static const MANTA_ALIGN(16) sse_t _mm_minus_eps = _mm_set_ps1(-1e-5);
     static const MANTA_ALIGN(16) sse_t _mm_epsilon = _mm_set_ps1(1e-5);
     static const MANTA_ALIGN(16) sse_t _mm_one = _mm_set_ps1(1.f);
+    static const MANTA_ALIGN(16) sse_int_t _mm_one_si128 = _mm_set1_epi32(1);
     static const MANTA_ALIGN(16) sse_t _mm_zero = _mm_set_ps1(0.f);
+    static const MANTA_ALIGN(16) sse_int_t _mm_zero_si128 = 
_mm_set1_epi32(0);
     static const MANTA_ALIGN(16) sse_t _mm_one_half = _mm_set_ps1(0.5f);
     static const MANTA_ALIGN(16) sse_t _mm_two = _mm_set_ps1(2.f);
     static const MANTA_ALIGN(16) sse_t _mm_256 = _mm_set_ps1(256);
@@ -117,6 +128,8 @@
     static const MANTA_ALIGN(16) sse_t _mm_true = 
_mm_set_ps1((float&)_mm_inttruemask);
     static const int minusOneI = -1;
     static const MANTA_ALIGN(16) sse_t _mm_minusOne = _mm_set_ps1((float 
&)minusOneI);
+  static const MANTA_ALIGN(16) sse_int_t _mm_absmask_si128 = 
set4i(_mm_intabsmask);
+  static const MANTA_ALIGN(16) sse_int_t _mm_signbit_si128 = 
set4i(_mm_intsignbit);
 
     /*! return v0 + t*(v1-v0) */
     inline sse_t lerp4(const sse_t t, const sse_t v0, const sse_t v1)




  • [MANTA] r1394 - trunk/Core/Math, bigler, 05/17/2007

Archive powered by MHonArc 2.6.16.

Top of page