Text archives Help
- From: sparker@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1429 - trunk/Core/Math
- Date: Wed, 27 Jun 2007 12:34:08 -0600 (MDT)
Author: sparker
Date: Wed Jun 27 12:34:04 2007
New Revision: 1429
Modified:
trunk/Core/Math/Noise.cc
Log:
Added better looking SSE hash function. It is currently slower than the
scalar version but could probably be optimized significantly.
Modified: trunk/Core/Math/Noise.cc
==============================================================================
--- trunk/Core/Math/Noise.cc (original)
+++ trunk/Core/Math/Noise.cc Wed Jun 27 12:34:04 2007
@@ -348,8 +348,8 @@
// This will return a random hash number from 0 to 255
/*static inline*/ __m128i permutationSSE(const __m128i& index)
{
-#if 0 // 0 for table, 1 for RNG
-#if 1 // 0 for CheapRNG, 1 for DissolveRNG
+#if 1 // 0 for table, 1 for RNG
+#if 0 // 0 for CheapRNG, 1 for DissolveRNG
// val = val & 255;
// if (val & 1)
@@ -361,10 +361,12 @@
return _mm_xor_si128(_mm_srli_epi32(index_masked, 1),
_mm_and_si128(ifmask, _mm_set1_epi32(0xB8)));
#else
- return _mm_and_si128(_mm_set1_epi32(0xFF),
+ return _mm_mullo_epi32(index, _mm_set1_epi32(741103597));
+#if 0
+ return /*_mm_and_si128(_mm_set1_epi32(0xFF), */_mm_srai_epi32(
_mm_add_epi32(_mm_set1_epi32(1013904223),
-
_mm_mullo_epi32(_mm_set1_epi32(1664525), index)));
-
+
_mm_mullo_epi32(_mm_set1_epi32(1664525), index)), 9)/*)*/;
+#endif
#endif
#else
union {
@@ -378,6 +380,26 @@
#endif
}
+ /*static inline*/ __m128i ssehash(const __m128i& x, const __m128i& y,
const __m128i& z)
+ {
+#if 0
+ __m128i xx = _mm_mullo_epi32(x, _mm_set1_epi32(741103597));
+ __m128i yy = _mm_mullo_epi32(_mm_add_epi32(xx, y),
_mm_set1_epi32(741103597));
+ __m128i zz = _mm_mullo_epi32(_mm_add_epi32(yy, z),
_mm_set1_epi32(741103597));
+ return _mm_sri_epi32(zz, 17);
+#else
+ __m128 xx = _mm_cvtepi32_ps(x);
+ __m128 yy = _mm_cvtepi32_ps(y);
+ __m128 zz = _mm_cvtepi32_ps(z);
+ xx = _mm_add_ps(xx, _mm_set1_ps(0.6180339887)); // Avoid singularity
when x==0
+ yy = _mm_add_ps(yy, _mm_set1_ps(0.6180339887));
+ zz = _mm_add_ps(zz, _mm_set1_ps(0.6180339887));
+ __m128 hh = _mm_mul_ps(_mm_mul_ps(xx, yy),
+ _mm_mul_ps(zz, _mm_set1_ps(3.14159265389793)));
+ return _mm_srai_epi32(_mm_castps_si128(hh), 17);
+#endif
+ }
+
/*static inline*/ __m128 grad(const __m128i& hash,
const __m128 & x,
const __m128 & y,
@@ -445,29 +467,49 @@
__m128i integer_of_z = _mm_cvttps_epi32(_mm_sub_ps(location_z,
offset_of_z));
__m128 fade_z = _mm_mul_ps(_mm_mul_ps(offset_of_z, offset_of_z),
_mm_mul_ps(offset_of_z, _mm_add_ps(_mm_mul_ps(offset_of_z,
_mm_sub_ps(_mm_mul_ps(offset_of_z, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))),
_mm_set_ps1(10.f))));
+#if 0
__m128i hash_0 = permutationSSE(integer_of_x);
__m128i hash_00 = permutationSSE(_mm_add_epi32(hash_0,
integer_of_y));
__m128i hash_000 = permutationSSE(_mm_add_epi32(hash_00,
integer_of_z));
__m128i hash_001 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_00, integer_of_z),
_mm_set1_epi32(1)));
+#else
+ __m128i hash_000 = ssehash(integer_of_x, integer_of_y, integer_of_z);
+ __m128i hash_001 = ssehash(integer_of_x, integer_of_y,
_mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+#endif
__m128 value_000 = grad(hash_000, offset_of_x, offset_of_y,
offset_of_z);
__m128 value_001 = grad(hash_001, offset_of_x, offset_of_y,
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
-
+
+#if 0
__m128i hash_01 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_0
, integer_of_y), _mm_set1_epi32(1)));
__m128i hash_010 = permutationSSE(_mm_add_epi32(hash_01,
integer_of_z));
__m128i hash_011 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_01, integer_of_z),
_mm_set1_epi32(1)));
+#else
+ __m128i hash_010 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y,
_mm_set1_epi32(1)), integer_of_z);
+ __m128i hash_011 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y,
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+#endif
__m128 value_010 = grad(hash_010, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
__m128 value_011 = grad(hash_011, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), _mm_sub_ps(offset_of_z,
_mm_set_ps1(1)));
+#if 0
__m128i hash_1 = permutationSSE(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)));
__m128i hash_10 = permutationSSE(_mm_add_epi32(hash_1,
integer_of_y));
__m128i hash_100 = permutationSSE(_mm_add_epi32(hash_10,
integer_of_z));
__m128i hash_101 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_10, integer_of_z),
_mm_set1_epi32(1)));
+#else
+ __m128i hash_100 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), integer_of_y, integer_of_z);
+ __m128i hash_101 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), integer_of_y, _mm_add_epi32(integer_of_z,
_mm_set1_epi32(1)));
+#endif
__m128 value_100 = grad(hash_100, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, offset_of_z);
__m128 value_101 = grad(hash_101, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, _mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
+#if 0
__m128i hash_11 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_1
, integer_of_y), _mm_set1_epi32(1)));
__m128i hash_110 = permutationSSE(_mm_add_epi32(hash_11,
integer_of_z));
__m128i hash_111 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_11, integer_of_z),
_mm_set1_epi32(1)));
+#else
+ __m128i hash_110 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_y, _mm_set1_epi32(1)),
integer_of_z);
+ __m128i hash_111 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_y, _mm_set1_epi32(1)),
_mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+#endif
__m128 value_110 = grad(hash_110, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
__m128 value_111 = grad(hash_111, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)),
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
- [MANTA] r1429 - trunk/Core/Math, sparker, 06/27/2007
Archive powered by MHonArc 2.6.16.