Text archives Help
- From: bigler@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1423 - trunk/Core/Math
- Date: Tue, 26 Jun 2007 15:28:26 -0600 (MDT)
Author: bigler
Date: Tue Jun 26 15:28:25 2007
New Revision: 1423
Modified:
trunk/Core/Math/Noise.cc
Log:
Core/Math/Noise.cc
Added _mm_mullo_epi32 function.
New version of Interpolate that has only one multiply.
Added CheapRNG and Table lookup for permutationSSE. Current code
defaults to Table.
Fixed ScalarNoiseSSE by subtracting one from the weights of the
opposite corners.
Modified: trunk/Core/Math/Noise.cc
==============================================================================
--- trunk/Core/Math/Noise.cc (original)
+++ trunk/Core/Math/Noise.cc Tue Jun 26 15:28:25 2007
@@ -330,9 +330,27 @@
}
#if MANTA_SSE
+ __m128i _mm_mullo_epi32( __m128i a, __m128i b )
+ {
+ __m128i t0;
+ __m128i t1;
+
+ t0 = _mm_mul_epu32(a,b);
+ t1 = _mm_mul_epu32( _mm_shuffle_epi32( a, 0xB1 ),
+ _mm_shuffle_epi32( b, 0xB1 ) );
+
+ t0 = _mm_shuffle_epi32( t0, 0xD8 );
+ t1 = _mm_shuffle_epi32( t1, 0xD8 );
+
+ return _mm_unpacklo_epi32( t0, t1 );
+ }
+
// This will return a random hash number from 0 to 255
/*static inline*/ __m128i permutationSSE(const __m128i& index)
{
+#if 0 // 0 for table, 1 for RNG
+#if 1 // 0 for CheapRNG, 1 for DissolveRNG
+
// val = val & 255;
// if (val & 1)
// val = (val >> 1) ^ mask;
@@ -342,6 +360,22 @@
__m128i ifmask = _mm_cmpeq_epi32(_mm_and_si128(index_masked,
_mm_set1_epi32(1)), _mm_set1_epi32(1));
return _mm_xor_si128(_mm_srli_epi32(index_masked, 1),
_mm_and_si128(ifmask, _mm_set1_epi32(0xB8)));
+#else
+ return _mm_and_si128(_mm_set1_epi32(0xFF),
+ _mm_add_epi32(_mm_set1_epi32(1013904223),
+
_mm_mullo_epi32(_mm_set1_epi32(1664525), index)));
+
+#endif
+#else
+ union {
+ unsigned int i[4];
+ __m128i s;
+ } indicies, results;
+ indicies.s = index;
+ for(unsigned int i = 0; i < 4; ++i)
+ results.i[i] = NoiseXPermutationTable[indicies.i[i]&255];
+ return results.s;
+#endif
}
/*static inline*/ __m128 grad(const __m128i& hash,
@@ -349,7 +383,6 @@
const __m128 & y,
const __m128 & z)
{
-#if 1
// CONVERT LO 4 BITS OF HASH CODE
// int h = hash & 15;
__m128i h = _mm_and_si128(hash, _mm_set1_epi32(15));
@@ -381,15 +414,13 @@
_mm_set1_epi32(0x80000000))),
v);
return _mm_add_ps(uPart, vPart);
-#else
-
-#endif
}
static inline __m128 Interpolate(const __m128& d1, const __m128& d2, const
__m128& weight)
{
- return _mm_add_ps(_mm_mul_ps(d1, _mm_sub_ps(_mm_set_ps1(1.f), weight)),
- _mm_mul_ps(d2, weight));
+ return _mm_add_ps(d1,
+ _mm_mul_ps(weight,
+ _mm_sub_ps(d2, d1)));
}
__m128 ScalarNoiseSSE( const __m128& location_x,
@@ -398,7 +429,13 @@
{
__m128 offset_of_x = fracSSE(location_x);
__m128i integer_of_x = _mm_cvttps_epi32(_mm_sub_ps(location_x,
offset_of_x));
- __m128 fade_x = _mm_mul_ps(_mm_mul_ps(offset_of_x, offset_of_x),
_mm_mul_ps(offset_of_x, _mm_add_ps(_mm_mul_ps(offset_of_x,
_mm_sub_ps(_mm_mul_ps(offset_of_x, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))),
_mm_set_ps1(10.f))));
+ __m128 fade_x = _mm_mul_ps(_mm_mul_ps(offset_of_x, offset_of_x),
+ _mm_mul_ps(offset_of_x,
+
_mm_add_ps(_mm_mul_ps(offset_of_x,
+
_mm_sub_ps(_mm_mul_ps(offset_of_x,
+
_mm_set_ps1(6.f)),
+
_mm_set_ps1(15.f))),
+
_mm_set_ps1(10.f))));
__m128 offset_of_y = fracSSE(location_y);
__m128i integer_of_y = _mm_cvttps_epi32(_mm_sub_ps(location_y,
offset_of_y));
@@ -413,26 +450,26 @@
__m128i hash_000 = permutationSSE(_mm_add_epi32(hash_00,
integer_of_z));
__m128i hash_001 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_00, integer_of_z),
_mm_set1_epi32(1)));
__m128 value_000 = grad(hash_000, offset_of_x, offset_of_y,
offset_of_z);
- __m128 value_001 = grad(hash_001, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_001 = grad(hash_001, offset_of_x, offset_of_y,
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
__m128i hash_01 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_0
, integer_of_y), _mm_set1_epi32(1)));
__m128i hash_010 = permutationSSE(_mm_add_epi32(hash_01,
integer_of_z));
__m128i hash_011 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_01, integer_of_z),
_mm_set1_epi32(1)));
- __m128 value_010 = grad(hash_010, offset_of_x, offset_of_y,
offset_of_z);
- __m128 value_011 = grad(hash_011, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_010 = grad(hash_010, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
+ __m128 value_011 = grad(hash_011, offset_of_x,
_mm_sub_ps(offset_of_y, _mm_set_ps1(1)), _mm_sub_ps(offset_of_z,
_mm_set_ps1(1)));
__m128i hash_1 = permutationSSE(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)));
__m128i hash_10 = permutationSSE(_mm_add_epi32(hash_1,
integer_of_y));
__m128i hash_100 = permutationSSE(_mm_add_epi32(hash_10,
integer_of_z));
__m128i hash_101 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_10, integer_of_z),
_mm_set1_epi32(1)));
- __m128 value_100 = grad(hash_100, offset_of_x, offset_of_y,
offset_of_z);
- __m128 value_101 = grad(hash_101, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_100 = grad(hash_100, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, offset_of_z);
+ __m128 value_101 = grad(hash_101, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), offset_of_y, _mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
__m128i hash_11 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_1
, integer_of_y), _mm_set1_epi32(1)));
__m128i hash_110 = permutationSSE(_mm_add_epi32(hash_11,
integer_of_z));
__m128i hash_111 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_11, integer_of_z),
_mm_set1_epi32(1)));
- __m128 value_110 = grad(hash_110, offset_of_x, offset_of_y,
offset_of_z);
- __m128 value_111 = grad(hash_111, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_110 = grad(hash_110, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)), offset_of_z);
+ __m128 value_111 = grad(hash_111, _mm_sub_ps(offset_of_x,
_mm_set_ps1(1)), _mm_sub_ps(offset_of_y, _mm_set_ps1(1)),
_mm_sub_ps(offset_of_z, _mm_set_ps1(1)));
//
__m128 value_00 = Interpolate(value_000, value_001, fade_z);
- [MANTA] r1423 - trunk/Core/Math, bigler, 06/26/2007
Archive powered by MHonArc 2.6.16.