Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1429 - trunk/Core/Math


Chronological Thread 
  • From: sparker@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1429 - trunk/Core/Math
  • Date: Wed, 27 Jun 2007 12:34:08 -0600 (MDT)

Author: sparker
Date: Wed Jun 27 12:34:04 2007
New Revision: 1429

Modified:
   trunk/Core/Math/Noise.cc
Log:
Added better looking SSE hash function.  It is currently slower than the 
scalar version but could probably be optimized significantly.


Modified: trunk/Core/Math/Noise.cc
==============================================================================
--- trunk/Core/Math/Noise.cc    (original)
+++ trunk/Core/Math/Noise.cc    Wed Jun 27 12:34:04 2007
@@ -348,8 +348,8 @@
   // This will return a random hash number from 0 to 255
   /*static inline*/ __m128i permutationSSE(const __m128i& index)
   {
-#if 0 // 0 for table, 1 for RNG
-#if 1 // 0 for CheapRNG, 1 for DissolveRNG
+#if 1 // 0 for table, 1 for RNG
+#if 0 // 0 for CheapRNG, 1 for DissolveRNG
     
 //     val = val & 255;
 //     if (val & 1)
@@ -361,10 +361,12 @@
     return _mm_xor_si128(_mm_srli_epi32(index_masked, 1),
                          _mm_and_si128(ifmask, _mm_set1_epi32(0xB8)));
 #else
-    return _mm_and_si128(_mm_set1_epi32(0xFF),
+    return _mm_mullo_epi32(index, _mm_set1_epi32(741103597));
+#if 0
+    return /*_mm_and_si128(_mm_set1_epi32(0xFF), */_mm_srai_epi32(
                          _mm_add_epi32(_mm_set1_epi32(1013904223),
-                                       
_mm_mullo_epi32(_mm_set1_epi32(1664525), index)));
-                                                       
+                                       
_mm_mullo_epi32(_mm_set1_epi32(1664525), index)), 9)/*)*/;
+#endif
 #endif
 #else
     union {
@@ -378,6 +380,26 @@
 #endif
   }
 
+  /*static inline*/ __m128i ssehash(const __m128i& x, const __m128i& y, 
const __m128i& z)
+  {
+#if 0
+    __m128i xx = _mm_mullo_epi32(x, _mm_set1_epi32(741103597));
+    __m128i yy = _mm_mullo_epi32(_mm_add_epi32(xx, y), 
_mm_set1_epi32(741103597));
+    __m128i zz = _mm_mullo_epi32(_mm_add_epi32(yy, z), 
_mm_set1_epi32(741103597));
+    return _mm_sri_epi32(zz, 17);
+#else
+    __m128 xx = _mm_cvtepi32_ps(x);
+    __m128 yy = _mm_cvtepi32_ps(y);
+    __m128 zz = _mm_cvtepi32_ps(z);
+    xx = _mm_add_ps(xx, _mm_set1_ps(0.6180339887)); // Avoid singularity 
when x==0
+    yy = _mm_add_ps(yy, _mm_set1_ps(0.6180339887));
+    zz = _mm_add_ps(zz, _mm_set1_ps(0.6180339887));
+    __m128 hh = _mm_mul_ps(_mm_mul_ps(xx, yy),
+                           _mm_mul_ps(zz, _mm_set1_ps(3.14159265389793)));
+    return _mm_srai_epi32(_mm_castps_si128(hh), 17);
+#endif
+  }
+
   /*static inline*/ __m128 grad(const __m128i& hash,
                             const __m128 & x,
                             const __m128 & y,
@@ -445,29 +467,49 @@
     __m128i integer_of_z = _mm_cvttps_epi32(_mm_sub_ps(location_z, 
offset_of_z));
     __m128  fade_z       = _mm_mul_ps(_mm_mul_ps(offset_of_z, offset_of_z), 
_mm_mul_ps(offset_of_z, _mm_add_ps(_mm_mul_ps(offset_of_z, 
_mm_sub_ps(_mm_mul_ps(offset_of_z, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))), 
_mm_set_ps1(10.f))));
     
+#if 0
     __m128i hash_0       = permutationSSE(integer_of_x);
     __m128i hash_00      = permutationSSE(_mm_add_epi32(hash_0,  
integer_of_y));
     __m128i hash_000     = permutationSSE(_mm_add_epi32(hash_00, 
integer_of_z));
     __m128i hash_001     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_00, integer_of_z), 
_mm_set1_epi32(1)));
+#else
+    __m128i hash_000 = ssehash(integer_of_x, integer_of_y, integer_of_z);
+    __m128i hash_001 = ssehash(integer_of_x, integer_of_y, 
_mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+#endif
     __m128  value_000    = grad(hash_000, offset_of_x, offset_of_y, 
offset_of_z);
     __m128  value_001    = grad(hash_001, offset_of_x, offset_of_y, 
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
-    
+
+#if 0    
     __m128i hash_01      = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_0 
, integer_of_y), _mm_set1_epi32(1)));
     __m128i hash_010     = permutationSSE(_mm_add_epi32(hash_01, 
integer_of_z));
     __m128i hash_011     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_01, integer_of_z), 
_mm_set1_epi32(1)));
+#else
+    __m128i hash_010 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y, 
_mm_set1_epi32(1)), integer_of_z);
+    __m128i hash_011 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y, 
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+#endif
     __m128  value_010    = grad(hash_010, offset_of_x, 
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
     __m128  value_011    = grad(hash_011, offset_of_x, 
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), _mm_sub_ps(offset_of_z, 
_mm_set_ps1(1)));
 
+#if 0
     __m128i hash_1       = permutationSSE(_mm_add_epi32(integer_of_x, 
_mm_set1_epi32(1)));
     __m128i hash_10      = permutationSSE(_mm_add_epi32(hash_1,  
integer_of_y));
     __m128i hash_100     = permutationSSE(_mm_add_epi32(hash_10, 
integer_of_z));
     __m128i hash_101     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_10, integer_of_z), 
_mm_set1_epi32(1)));
+#else
+    __m128i hash_100 = ssehash(_mm_add_epi32(integer_of_x, 
_mm_set1_epi32(1)), integer_of_y, integer_of_z);
+    __m128i hash_101 = ssehash(_mm_add_epi32(integer_of_x, 
_mm_set1_epi32(1)), integer_of_y, _mm_add_epi32(integer_of_z, 
_mm_set1_epi32(1)));
+#endif
     __m128  value_100    = grad(hash_100, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), offset_of_y, offset_of_z);
     __m128  value_101    = grad(hash_101, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), offset_of_y, _mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
 
+#if 0
     __m128i hash_11      = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_1 
, integer_of_y), _mm_set1_epi32(1)));
     __m128i hash_110     = permutationSSE(_mm_add_epi32(hash_11, 
integer_of_z));
     __m128i hash_111     = 
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_11, integer_of_z), 
_mm_set1_epi32(1)));
+#else
+    __m128i hash_110 = ssehash(_mm_add_epi32(integer_of_x, 
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_y, _mm_set1_epi32(1)), 
integer_of_z);
+    __m128i hash_111 = ssehash(_mm_add_epi32(integer_of_x, 
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_y, _mm_set1_epi32(1)), 
_mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+#endif
     __m128  value_110    = grad(hash_110, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
     __m128  value_111    = grad(hash_111, _mm_sub_ps(offset_of_x, 
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), 
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
 




  • [MANTA] r1429 - trunk/Core/Math, sparker, 06/27/2007

Archive powered by MHonArc 2.6.16.

Top of page