Text archives Help
- From: bigler@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1455 - trunk/Core/Math
- Date: Mon, 9 Jul 2007 11:04:16 -0600 (MDT)
Author: bigler
Date: Mon Jul 9 11:04:15 2007
New Revision: 1455
Modified:
trunk/Core/Math/Noise.cc
Log:
Core/Math/Noise.cc
Added an option that does all the unpacking and packing in one go.
This is faster than Steve's ssehash on my laptop.
Modified: trunk/Core/Math/Noise.cc
==============================================================================
--- trunk/Core/Math/Noise.cc (original)
+++ trunk/Core/Math/Noise.cc Mon Jul 9 11:04:15 2007
@@ -447,13 +447,25 @@
_mm_mul_ps(weight,
_mm_sub_ps(d2, d1)));
}
+
+#define USE_PERMUTATION_UNPACKED 1
+
+ typedef union {
+ unsigned int i[4];
+ __m128i s;
+ } unsigned_int_ssei;
__m128 ScalarNoiseSSE( const __m128& location_x,
const __m128& location_y,
const __m128& location_z)
{
__m128 offset_of_x = fracSSE(location_x);
+#if USE_PERMUTATION_UNPACKED
+ unsigned_int_ssei integer_of_x;
+ integer_of_x.s = _mm_and_si128(_mm_cvttps_epi32(_mm_sub_ps(location_x,
offset_of_x)), _mm_set1_epi32(0xFF));
+#else
__m128i integer_of_x = _mm_cvttps_epi32(_mm_sub_ps(location_x,
offset_of_x));
+#endif
__m128 fade_x = _mm_mul_ps(_mm_mul_ps(offset_of_x, offset_of_x),
_mm_mul_ps(offset_of_x,
_mm_add_ps(_mm_mul_ps(offset_of_x,
@@ -463,59 +475,94 @@
_mm_set_ps1(10.f))));
__m128 offset_of_y = fracSSE(location_y);
+#if USE_PERMUTATION_UNPACKED
+ unsigned_int_ssei integer_of_y;
+ integer_of_y.s = _mm_cvttps_epi32(_mm_sub_ps(location_y, offset_of_y));
+#else
__m128i integer_of_y = _mm_cvttps_epi32(_mm_sub_ps(location_y,
offset_of_y));
+#endif
__m128 fade_y = _mm_mul_ps(_mm_mul_ps(offset_of_y, offset_of_y),
_mm_mul_ps(offset_of_y, _mm_add_ps(_mm_mul_ps(offset_of_y,
_mm_sub_ps(_mm_mul_ps(offset_of_y, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))),
_mm_set_ps1(10.f))));
__m128 offset_of_z = fracSSE(location_z);
+#if USE_PERMUTATION_UNPACKED
+ unsigned_int_ssei integer_of_z;
+ integer_of_z.s = _mm_cvttps_epi32(_mm_sub_ps(location_z, offset_of_z));
+#else
__m128i integer_of_z = _mm_cvttps_epi32(_mm_sub_ps(location_z,
offset_of_z));
+#endif
__m128 fade_z = _mm_mul_ps(_mm_mul_ps(offset_of_z, offset_of_z),
_mm_mul_ps(offset_of_z, _mm_add_ps(_mm_mul_ps(offset_of_z,
_mm_sub_ps(_mm_mul_ps(offset_of_z, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))),
_mm_set_ps1(10.f))));
-
+
+#if USE_PERMUTATION_UNPACKED
+ unsigned_int_ssei hash_000, hash_001, hash_010, hash_011, hash_100,
hash_101, hash_110, hash_111;
+ for(unsigned int i = 0; i < 4; ++i) {
+ unsigned int x = integer_of_x.i[i];
+ unsigned int y = integer_of_y.i[i];
+ unsigned int z = integer_of_z.i[i];
+ unsigned int hash_0 = NoiseXPermutationTable[(x) ];
+ unsigned int hash_1 = NoiseXPermutationTable[(x+1) & 0xFF];
+ unsigned int hash_00 = NoiseXPermutationTable[(hash_0 + y) & 0xFF];
+ unsigned int hash_01 = NoiseXPermutationTable[(hash_0 + y+1) & 0xFF];
+ unsigned int hash_10 = NoiseXPermutationTable[(hash_1 + y) & 0xFF];
+ unsigned int hash_11 = NoiseXPermutationTable[(hash_1 + y+1) & 0xFF];
+ hash_000.i[i] = NoiseXPermutationTable[(hash_00 + z) & 0xFF];
+ hash_001.i[i] = NoiseXPermutationTable[(hash_00 + z+1) & 0xFF];
+ hash_010.i[i] = NoiseXPermutationTable[(hash_01 + z) & 0xFF];
+ hash_011.i[i] = NoiseXPermutationTable[(hash_01 + z+1) & 0xFF];
+ hash_100.i[i] = NoiseXPermutationTable[(hash_10 + z) & 0xFF];
+ hash_101.i[i] = NoiseXPermutationTable[(hash_10 + z+1) & 0xFF];
+ hash_110.i[i] = NoiseXPermutationTable[(hash_11 + z) & 0xFF];
+ hash_111.i[i] = NoiseXPermutationTable[(hash_11 + z+1) & 0xFF];
+ }
+
+ __m128 value_000 = grad(hash_000.s, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_001 = grad(hash_001.s, offset_of_x, offset_of_y,
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
+ __m128 value_010 = grad(hash_010.s, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
+ __m128 value_011 = grad(hash_011.s, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), _mm_sub_ps(offset_of_z,
_mm_set_ps1(1)));
+ __m128 value_100 = grad(hash_100.s, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, offset_of_z);
+ __m128 value_101 = grad(hash_101.s, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, _mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
+ __m128 value_110 = grad(hash_110.s, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
+ __m128 value_111 = grad(hash_111.s, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)),
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
+
+#else
#if 0
__m128i hash_0 = permutationSSE(integer_of_x);
__m128i hash_00 = permutationSSE(_mm_add_epi32(hash_0,
integer_of_y));
__m128i hash_000 = permutationSSE(_mm_add_epi32(hash_00,
integer_of_z));
__m128i hash_001 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_00, integer_of_z),
_mm_set1_epi32(1)));
-#else
- __m128i hash_000 = ssehash(integer_of_x, integer_of_y, integer_of_z);
- __m128i hash_001 = ssehash(integer_of_x, integer_of_y,
_mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
-#endif
- __m128 value_000 = grad(hash_000, offset_of_x, offset_of_y,
offset_of_z);
- __m128 value_001 = grad(hash_001, offset_of_x, offset_of_y,
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
-#if 0
__m128i hash_01 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_0
, integer_of_y), _mm_set1_epi32(1)));
__m128i hash_010 = permutationSSE(_mm_add_epi32(hash_01,
integer_of_z));
__m128i hash_011 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_01, integer_of_z),
_mm_set1_epi32(1)));
-#else
- __m128i hash_010 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y,
_mm_set1_epi32(1)), integer_of_z);
- __m128i hash_011 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y,
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
-#endif
- __m128 value_010 = grad(hash_010, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
- __m128 value_011 = grad(hash_011, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), _mm_sub_ps(offset_of_z,
_mm_set_ps1(1)));
-#if 0
__m128i hash_1 = permutationSSE(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)));
__m128i hash_10 = permutationSSE(_mm_add_epi32(hash_1,
integer_of_y));
__m128i hash_100 = permutationSSE(_mm_add_epi32(hash_10,
integer_of_z));
__m128i hash_101 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_10, integer_of_z),
_mm_set1_epi32(1)));
-#else
- __m128i hash_100 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), integer_of_y, integer_of_z);
- __m128i hash_101 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), integer_of_y, _mm_add_epi32(integer_of_z,
_mm_set1_epi32(1)));
-#endif
- __m128 value_100 = grad(hash_100, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, offset_of_z);
- __m128 value_101 = grad(hash_101, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, _mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
-#if 0
__m128i hash_11 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_1
, integer_of_y), _mm_set1_epi32(1)));
__m128i hash_110 = permutationSSE(_mm_add_epi32(hash_11,
integer_of_z));
__m128i hash_111 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_11, integer_of_z),
_mm_set1_epi32(1)));
#else
+ __m128i hash_000 = ssehash(integer_of_x, integer_of_y, integer_of_z);
+ __m128i hash_001 = ssehash(integer_of_x, integer_of_y,
_mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+ __m128i hash_010 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y,
_mm_set1_epi32(1)), integer_of_z);
+ __m128i hash_011 = ssehash(integer_of_x, _mm_add_epi32(integer_of_y,
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
+ __m128i hash_100 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), integer_of_y, integer_of_z);
+ __m128i hash_101 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), integer_of_y, _mm_add_epi32(integer_of_z,
_mm_set1_epi32(1)));
__m128i hash_110 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_y, _mm_set1_epi32(1)),
integer_of_z);
__m128i hash_111 = ssehash(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)), _mm_add_epi32(integer_of_y, _mm_set1_epi32(1)),
_mm_add_epi32(integer_of_z, _mm_set1_epi32(1)));
#endif
+
+ __m128 value_000 = grad(hash_000, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_001 = grad(hash_001, offset_of_x, offset_of_y,
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
+ __m128 value_010 = grad(hash_010, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
+ __m128 value_011 = grad(hash_011, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), _mm_sub_ps(offset_of_z,
_mm_set_ps1(1)));
+ __m128 value_100 = grad(hash_100, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, offset_of_z);
+ __m128 value_101 = grad(hash_101, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, _mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
__m128 value_110 = grad(hash_110, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
__m128 value_111 = grad(hash_111, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)),
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
+#endif // #if USE_PERMUTATION_UNPACKED
//
__m128 value_00 = Interpolate(value_000, value_001, fade_z);
__m128 value_01 = Interpolate(value_010, value_011, fade_z);
- [MANTA] r1455 - trunk/Core/Math, bigler, 07/09/2007
Archive powered by MHonArc 2.6.16.