Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1423 - trunk/Core/Math


Chronological Thread 
  • From: bigler@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1423 - trunk/Core/Math
  • Date: Tue, 26 Jun 2007 15:28:26 -0600 (MDT)

Author: bigler
Date: Tue Jun 26 15:28:25 2007
New Revision: 1423

Modified:
   trunk/Core/Math/Noise.cc
Log:

Core/Math/Noise.cc

  Added _mm_mullo_epi32 function.

  New version of Interpolate that has only one multiply.

  Added CheapRNG and Table lookup for permutationSSE.  Current code
  defaults to Table.

  Fixed ScalarNoiseSSE by subtracting one from the weights of the
  opposite corners.


Modified: trunk/Core/Math/Noise.cc
==============================================================================
--- trunk/Core/Math/Noise.cc    (original)
+++ trunk/Core/Math/Noise.cc    Tue Jun 26 15:28:25 2007
@@ -330,9 +330,27 @@
   }
 
 #if MANTA_SSE
+  __m128i _mm_mullo_epi32( __m128i a, __m128i b )
+  {
+    __m128i t0;
+    __m128i t1;
+
+    t0 = _mm_mul_epu32(a,b);
+    t1 = _mm_mul_epu32( _mm_shuffle_epi32( a, 0xB1 ),
+                        _mm_shuffle_epi32( b, 0xB1 ) );
+
+    t0 = _mm_shuffle_epi32( t0, 0xD8 );
+    t1 = _mm_shuffle_epi32( t1, 0xD8 );
+
+    return _mm_unpacklo_epi32( t0, t1 );
+  } 
+
   // This will return a random hash number from 0 to 255
   /*static inline*/ __m128i permutationSSE(const __m128i& index)
   {
+#if 0 // 0 for table, 1 for RNG
+#if 1 // 0 for CheapRNG, 1 for DissolveRNG
+    
 //     val = val & 255;
 //     if (val & 1)
 //       val = (val >> 1) ^ mask;
@@ -342,6 +360,22 @@
     __m128i ifmask = _mm_cmpeq_epi32(_mm_and_si128(index_masked, 
_mm_set1_epi32(1)), _mm_set1_epi32(1));
     return _mm_xor_si128(_mm_srli_epi32(index_masked, 1),
                          _mm_and_si128(ifmask, _mm_set1_epi32(0xB8)));
+#else
+    return _mm_and_si128(_mm_set1_epi32(0xFF),
+                         _mm_add_epi32(_mm_set1_epi32(1013904223),
+                                       
_mm_mullo_epi32(_mm_set1_epi32(1664525), index)));
+                                                       
+#endif
+#else
+    union {
+      unsigned int i[4];
+      __m128i      s;
+    } indicies, results;
+    indicies.s = index;
+    for(unsigned int i = 0; i < 4; ++i)
+      results.i[i] = NoiseXPermutationTable[indicies.i[i]&255];
+    return results.s;
+#endif
   }
 
   /*static inline*/ __m128 grad(const __m128i& hash,
@@ -349,7 +383,6 @@
                             const __m128 & y,
                             const __m128 & z)
   {
-#if 1
     // CONVERT LO 4 BITS OF HASH CODE
     // int h = hash & 15;
     __m128i h    = _mm_and_si128(hash, _mm_set1_epi32(15));
@@ -381,15 +414,13 @@
                                                              
_mm_set1_epi32(0x80000000))),
                               v);
     return _mm_add_ps(uPart, vPart);
-#else
-    
-#endif
   }
 
   static inline __m128 Interpolate(const __m128& d1, const __m128& d2, const 
__m128& weight)
   {
-    return _mm_add_ps(_mm_mul_ps(d1, _mm_sub_ps(_mm_set_ps1(1.f), weight)),
-                      _mm_mul_ps(d2, weight));
+    return _mm_add_ps(d1,
+                      _mm_mul_ps(weight,
+                                 _mm_sub_ps(d2, d1)));
   }
   
   __m128 ScalarNoiseSSE( const __m128& location_x,
@@ -398,7 +429,13 @@
   {
     __m128  offset_of_x  = fracSSE(location_x);
     __m128i integer_of_x = _mm_cvttps_epi32(_mm_sub_ps(location_x, 
offset_of_x));
-    __m128  fade_x       = _mm_mul_ps(_mm_mul_ps(offset_of_x, offset_of_x), 
_mm_mul_ps(offset_of_x, _mm_add_ps(_mm_mul_ps(offset_of_x, 
_mm_sub_ps(_mm_mul_ps(offset_of_x, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))), 
_mm_set_ps1(10.f))));
+    __m128  fade_x       = _mm_mul_ps(_mm_mul_ps(offset_of_x, offset_of_x),
+                                      _mm_mul_ps(offset_of_x,
+                                                 
_mm_add_ps(_mm_mul_ps(offset_of_x,
+                                                                       
_mm_sub_ps(_mm_mul_ps(offset_of_x,
+                                                                             
                _mm_set_ps1(6.f)),
+                                                                             
     _mm_set_ps1(15.f))),
+                                                            
_mm_set_ps1(10.f))));
 
     __m128  offset_of_y  = fracSSE(location_y);
     __m128i integer_of_y = _mm_cvttps_epi32(_mm_sub_ps(location_y, 
offset_of_y));
@@ -413,26 +450,26 @@
     __m128i hash_000     = permutationSSE(_mm_add_epi32(hash_00, 
integer_of_z));
     __m128i hash_001     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_00, integer_of_z), 
_mm_set1_epi32(1)));
     __m128  value_000    = grad(hash_000, offset_of_x, offset_of_y, 
offset_of_z);
-    __m128  value_001    = grad(hash_001, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_001    = grad(hash_001, offset_of_x, offset_of_y, 
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
     
     __m128i hash_01      = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_0 
, integer_of_y), _mm_set1_epi32(1)));
     __m128i hash_010     = permutationSSE(_mm_add_epi32(hash_01, 
integer_of_z));
     __m128i hash_011     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_01, integer_of_z), 
_mm_set1_epi32(1)));
-    __m128  value_010    = grad(hash_010, offset_of_x, offset_of_y, 
offset_of_z);
-    __m128  value_011    = grad(hash_011, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_010    = grad(hash_010, offset_of_x, 
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
+    __m128  value_011    = grad(hash_011, offset_of_x, 
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), _mm_sub_ps(offset_of_z, 
_mm_set_ps1(1)));
 
     __m128i hash_1       = permutationSSE(_mm_add_epi32(integer_of_x, 
_mm_set1_epi32(1)));
     __m128i hash_10      = permutationSSE(_mm_add_epi32(hash_1,  
integer_of_y));
     __m128i hash_100     = permutationSSE(_mm_add_epi32(hash_10, 
integer_of_z));
     __m128i hash_101     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_10, integer_of_z), 
_mm_set1_epi32(1)));
-    __m128  value_100    = grad(hash_100, offset_of_x, offset_of_y, 
offset_of_z);
-    __m128  value_101    = grad(hash_101, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_100    = grad(hash_100, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), offset_of_y, offset_of_z);
+    __m128  value_101    = grad(hash_101, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), offset_of_y, _mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
 
     __m128i hash_11      = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_1 
, integer_of_y), _mm_set1_epi32(1)));
     __m128i hash_110     = permutationSSE(_mm_add_epi32(hash_11, 
integer_of_z));
     __m128i hash_111     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_11, integer_of_z), 
_mm_set1_epi32(1)));
-    __m128  value_110    = grad(hash_110, offset_of_x, offset_of_y, 
offset_of_z);
-    __m128  value_111    = grad(hash_111, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_110    = grad(hash_110, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
+    __m128  value_111    = grad(hash_111, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), 
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
 
     //
     __m128  value_00     = Interpolate(value_000, value_001, fade_z);




  • [MANTA] r1423 - trunk/Core/Math, bigler, 06/26/2007

Archive powered by MHonArc 2.6.16.

Top of page