Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1422 - trunk/Core/Math


Chronological Thread 
  • From: bigler@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1422 - trunk/Core/Math
  • Date: Sat, 23 Jun 2007 15:18:31 -0600 (MDT)

Author: bigler
Date: Sat Jun 23 15:18:29 2007
New Revision: 1422

Modified:
   trunk/Core/Math/Noise.cc
   trunk/Core/Math/Noise.h
Log:

Core/Math/Noise.cc
Core/Math/Noise.h

  Got something kind of running for ScalarNoiseSSE.  There are some
  weird bugs I'm going to try and work out now.


Modified: trunk/Core/Math/Noise.cc
==============================================================================
--- trunk/Core/Math/Noise.cc    (original)
+++ trunk/Core/Math/Noise.cc    Sat Jun 23 15:18:29 2007
@@ -4,7 +4,7 @@
 #include <Core/Math/Noise.h>
 
 #ifdef MANTA_SSE
-#  include <SSEDefs.h>
+#  include <Core/Math/SSEDefs.h>
 #endif
 
 using namespace SCIRun;
@@ -329,16 +329,67 @@
     return Interpolate( value_0, value_1, fade_x );
   }
 
+#if MANTA_SSE
   // This will return a random hash number from 0 to 255
-  static __m128i permutationSSE(cosnt __m128i& index)
+  /*static inline*/ __m128i permutationSSE(const __m128i& index)
   {
+//     val = val & 255;
 //     if (val & 1)
 //       val = (val >> 1) ^ mask;
 //     else
 //       val = val >> 1;
-    __m128i index_masked = _mm_and_si128(index, _mm_set_epi32(0xFF));
-    __m128i ifmask = _mm_cmpeq_epi32(_mm_and_si128(index_masked, 
_mm_set_epi32(1)), _mm_set_epi32(1));
-    return _mm_xor_si128(_mm_srli_epi32(index_masked, _mm_srli_epi32(1)), 
_mm_and_si128(ifmask, _mm_set_epi32(0xB8)));
+    __m128i index_masked = _mm_and_si128(index, _mm_set1_epi32(0xFF));
+    __m128i ifmask = _mm_cmpeq_epi32(_mm_and_si128(index_masked, 
_mm_set1_epi32(1)), _mm_set1_epi32(1));
+    return _mm_xor_si128(_mm_srli_epi32(index_masked, 1),
+                         _mm_and_si128(ifmask, _mm_set1_epi32(0xB8)));
+  }
+
+  /*static inline*/ __m128 grad(const __m128i& hash,
+                            const __m128 & x,
+                            const __m128 & y,
+                            const __m128 & z)
+  {
+#if 1
+    // CONVERT LO 4 BITS OF HASH CODE
+    // int h = hash & 15;
+    __m128i h    = _mm_and_si128(hash, _mm_set1_epi32(15));
+    // INTO 12 GRADIENT DIRECTIONS.
+    // double u = h<8 ? x : y;
+    __m128  u    = mask4(_mm_castsi128_ps(_mm_cmplt_epi32(h, 
_mm_set1_epi32(8))),
+                         x,
+                         y);
+    // double v = h<4 ? y : h==12||h==14 ? x : z;
+    __m128  v    = mask4(_mm_castsi128_ps(_mm_cmplt_epi32(h, 
_mm_set1_epi32(4))),
+                         y,
+                         
mask4(_mm_castsi128_ps(_mm_or_si128(_mm_cmpeq_epi32(h, _mm_set1_epi32(12)),
+                                                             
_mm_cmpeq_epi32(h, _mm_set1_epi32(14)))),
+                               x,
+                               z));
+    // return ((h&1) == 0 ? u : -u) + ((h&2) == 0 ? v : -v);
+    // return ((h&1) == 1 ? -u : u) + ((h&2) == 2 ? -v : v);
+
+    // To do the unary negation we will xor 0x80000000 with the sign
+    // bit.  xor'ing with zero will leave the result unchanged, so if
+    // we 'and' the comparison mask with 0x80000000 we will be able to
+    // selectively change the sign bit.
+    __m128 uPart = 
_mm_xor_ps(_mm_castsi128_ps(_mm_and_si128(_mm_cmpeq_epi32(_mm_and_si128(h, 
_mm_set1_epi32(1)),
+                                                                             
_mm_set1_epi32(1)),
+                                                             
_mm_set1_epi32(0x80000000))),
+                              u);
+    __m128 vPart = 
_mm_xor_ps(_mm_castsi128_ps(_mm_and_si128(_mm_cmpeq_epi32(_mm_and_si128(h, 
_mm_set1_epi32(2)),
+                                                                             
_mm_set1_epi32(2)),
+                                                             
_mm_set1_epi32(0x80000000))),
+                              v);
+    return _mm_add_ps(uPart, vPart);
+#else
+    
+#endif
+  }
+
+  static inline __m128 Interpolate(const __m128& d1, const __m128& d2, const 
__m128& weight)
+  {
+    return _mm_add_ps(_mm_mul_ps(d1, _mm_sub_ps(_mm_set_ps1(1.f), weight)),
+                      _mm_mul_ps(d2, weight));
   }
   
   __m128 ScalarNoiseSSE( const __m128& location_x,
@@ -348,11 +399,55 @@
     __m128  offset_of_x  = fracSSE(location_x);
     __m128i integer_of_x = _mm_cvttps_epi32(_mm_sub_ps(location_x, 
offset_of_x));
     __m128  fade_x       = _mm_mul_ps(_mm_mul_ps(offset_of_x, offset_of_x), 
_mm_mul_ps(offset_of_x, _mm_add_ps(_mm_mul_ps(offset_of_x, 
_mm_sub_ps(_mm_mul_ps(offset_of_x, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))), 
_mm_set_ps1(10.f))));
+
+    __m128  offset_of_y  = fracSSE(location_y);
+    __m128i integer_of_y = _mm_cvttps_epi32(_mm_sub_ps(location_y, 
offset_of_y));
+    __m128  fade_y       = _mm_mul_ps(_mm_mul_ps(offset_of_y, offset_of_y), 
_mm_mul_ps(offset_of_y, _mm_add_ps(_mm_mul_ps(offset_of_y, 
_mm_sub_ps(_mm_mul_ps(offset_of_y, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))), 
_mm_set_ps1(10.f))));
+
+    __m128  offset_of_z  = fracSSE(location_z);
+    __m128i integer_of_z = _mm_cvttps_epi32(_mm_sub_ps(location_z, 
offset_of_z));
+    __m128  fade_z       = _mm_mul_ps(_mm_mul_ps(offset_of_z, offset_of_z), 
_mm_mul_ps(offset_of_z, _mm_add_ps(_mm_mul_ps(offset_of_z, 
_mm_sub_ps(_mm_mul_ps(offset_of_z, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))), 
_mm_set_ps1(10.f))));
+    
     __m128i hash_0       = permutationSSE(integer_of_x);
-    __m128i hash_00      = permutationSSE(_mm_add_ps(hash_0,  integer_of_y));
-    __m128i hash_000     = permutationSSE(_mm_add_ps(hash_00, integer_of_z));
-    __m128i hash_001     = permutationSSE(_mm_add_ps(_mm_add_ps(hash_00, 
integer_of_z), _mm_set_epi32(1)));
+    __m128i hash_00      = permutationSSE(_mm_add_epi32(hash_0,  
integer_of_y));
+    __m128i hash_000     = permutationSSE(_mm_add_epi32(hash_00, 
integer_of_z));
+    __m128i hash_001     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_00, integer_of_z), 
_mm_set1_epi32(1)));
+    __m128  value_000    = grad(hash_000, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_001    = grad(hash_001, offset_of_x, offset_of_y, 
offset_of_z);
+    
+    __m128i hash_01      = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_0 
, integer_of_y), _mm_set1_epi32(1)));
+    __m128i hash_010     = permutationSSE(_mm_add_epi32(hash_01, 
integer_of_z));
+    __m128i hash_011     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_01, integer_of_z), 
_mm_set1_epi32(1)));
+    __m128  value_010    = grad(hash_010, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_011    = grad(hash_011, offset_of_x, offset_of_y, 
offset_of_z);
+
+    __m128i hash_1       = permutationSSE(_mm_add_epi32(integer_of_x, 
_mm_set1_epi32(1)));
+    __m128i hash_10      = permutationSSE(_mm_add_epi32(hash_1,  
integer_of_y));
+    __m128i hash_100     = permutationSSE(_mm_add_epi32(hash_10, 
integer_of_z));
+    __m128i hash_101     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_10, integer_of_z), 
_mm_set1_epi32(1)));
+    __m128  value_100    = grad(hash_100, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_101    = grad(hash_101, offset_of_x, offset_of_y, 
offset_of_z);
+
+    __m128i hash_11      = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_1 
, integer_of_y), _mm_set1_epi32(1)));
+    __m128i hash_110     = permutationSSE(_mm_add_epi32(hash_11, 
integer_of_z));
+    __m128i hash_111     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_11, integer_of_z), 
_mm_set1_epi32(1)));
+    __m128  value_110    = grad(hash_110, offset_of_x, offset_of_y, 
offset_of_z);
+    __m128  value_111    = grad(hash_111, offset_of_x, offset_of_y, 
offset_of_z);
+
+    //
+    __m128  value_00     = Interpolate(value_000, value_001, fade_z);
+    __m128  value_01     = Interpolate(value_010, value_011, fade_z);
+    __m128  value_0      = Interpolate(value_00,  value_01,  fade_y);
+    // 
+    __m128  value_10     = Interpolate(value_100, value_101, fade_z);
+    __m128  value_11     = Interpolate(value_110, value_111, fade_z);
+    __m128  value_1      = Interpolate(value_10,  value_11,  fade_y);
+    //
+    __m128  value        = Interpolate(value_0,   value_1,   fade_x);
+    
+    return value;
   }
+#endif
   
   Vector const VectorNoise( Vector const& location )
   {

Modified: trunk/Core/Math/Noise.h
==============================================================================
--- trunk/Core/Math/Noise.h     (original)
+++ trunk/Core/Math/Noise.h     Sat Jun 23 15:18:29 2007
@@ -29,6 +29,11 @@
   __m128 ScalarNoiseSSE( const __m128& location_x,
                          const __m128& location_y,
                          const __m128& location_z);
+  __m128 grad(const __m128i& hash,
+              const __m128 & x,
+              const __m128 & y,
+              const __m128 & z);
+  __m128i permutationSSE(const __m128i& index);
 #endif
 
   /**




  • [MANTA] r1422 - trunk/Core/Math, bigler, 06/23/2007

Archive powered by MHonArc 2.6.16.

Top of page