Text archives Help
- From: bigler@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1422 - trunk/Core/Math
- Date: Sat, 23 Jun 2007 15:18:31 -0600 (MDT)
Author: bigler
Date: Sat Jun 23 15:18:29 2007
New Revision: 1422
Modified:
trunk/Core/Math/Noise.cc
trunk/Core/Math/Noise.h
Log:
Core/Math/Noise.cc
Core/Math/Noise.h
Got something kind of running for ScalarNoiseSSE. There are some
weird bugs I'm going to try and work out now.
Modified: trunk/Core/Math/Noise.cc
==============================================================================
--- trunk/Core/Math/Noise.cc (original)
+++ trunk/Core/Math/Noise.cc Sat Jun 23 15:18:29 2007
@@ -4,7 +4,7 @@
#include <Core/Math/Noise.h>
#ifdef MANTA_SSE
-# include <SSEDefs.h>
+# include <Core/Math/SSEDefs.h>
#endif
using namespace SCIRun;
@@ -329,16 +329,67 @@
return Interpolate( value_0, value_1, fade_x );
}
+#if MANTA_SSE
// This will return a random hash number from 0 to 255
- static __m128i permutationSSE(cosnt __m128i& index)
+ /*static inline*/ __m128i permutationSSE(const __m128i& index)
{
+// val = val & 255;
// if (val & 1)
// val = (val >> 1) ^ mask;
// else
// val = val >> 1;
- __m128i index_masked = _mm_and_si128(index, _mm_set_epi32(0xFF));
- __m128i ifmask = _mm_cmpeq_epi32(_mm_and_si128(index_masked,
_mm_set_epi32(1)), _mm_set_epi32(1));
- return _mm_xor_si128(_mm_srli_epi32(index_masked, _mm_srli_epi32(1)),
_mm_and_si128(ifmask, _mm_set_epi32(0xB8)));
+ __m128i index_masked = _mm_and_si128(index, _mm_set1_epi32(0xFF));
+ __m128i ifmask = _mm_cmpeq_epi32(_mm_and_si128(index_masked,
_mm_set1_epi32(1)), _mm_set1_epi32(1));
+ return _mm_xor_si128(_mm_srli_epi32(index_masked, 1),
+ _mm_and_si128(ifmask, _mm_set1_epi32(0xB8)));
+ }
+
+ /*static inline*/ __m128 grad(const __m128i& hash,
+ const __m128 & x,
+ const __m128 & y,
+ const __m128 & z)
+ {
+#if 1
+ // CONVERT LO 4 BITS OF HASH CODE
+ // int h = hash & 15;
+ __m128i h = _mm_and_si128(hash, _mm_set1_epi32(15));
+ // INTO 12 GRADIENT DIRECTIONS.
+ // double u = h<8 ? x : y;
+ __m128 u = mask4(_mm_castsi128_ps(_mm_cmplt_epi32(h,
_mm_set1_epi32(8))),
+ x,
+ y);
+ // double v = h<4 ? y : h==12||h==14 ? x : z;
+ __m128 v = mask4(_mm_castsi128_ps(_mm_cmplt_epi32(h,
_mm_set1_epi32(4))),
+ y,
+
mask4(_mm_castsi128_ps(_mm_or_si128(_mm_cmpeq_epi32(h, _mm_set1_epi32(12)),
+
_mm_cmpeq_epi32(h, _mm_set1_epi32(14)))),
+ x,
+ z));
+ // return ((h&1) == 0 ? u : -u) + ((h&2) == 0 ? v : -v);
+ // return ((h&1) == 1 ? -u : u) + ((h&2) == 2 ? -v : v);
+
+ // To do the unary negation we will xor 0x80000000 with the sign
+ // bit. xor'ing with zero will leave the result unchanged, so if
+ // we 'and' the comparison mask with 0x80000000 we will be able to
+ // selectively change the sign bit.
+ __m128 uPart =
_mm_xor_ps(_mm_castsi128_ps(_mm_and_si128(_mm_cmpeq_epi32(_mm_and_si128(h,
_mm_set1_epi32(1)),
+
_mm_set1_epi32(1)),
+
_mm_set1_epi32(0x80000000))),
+ u);
+ __m128 vPart =
_mm_xor_ps(_mm_castsi128_ps(_mm_and_si128(_mm_cmpeq_epi32(_mm_and_si128(h,
_mm_set1_epi32(2)),
+
_mm_set1_epi32(2)),
+
_mm_set1_epi32(0x80000000))),
+ v);
+ return _mm_add_ps(uPart, vPart);
+#else
+
+#endif
+ }
+
+ static inline __m128 Interpolate(const __m128& d1, const __m128& d2, const
__m128& weight)
+ {
+ return _mm_add_ps(_mm_mul_ps(d1, _mm_sub_ps(_mm_set_ps1(1.f), weight)),
+ _mm_mul_ps(d2, weight));
}
__m128 ScalarNoiseSSE( const __m128& location_x,
@@ -348,11 +399,55 @@
__m128 offset_of_x = fracSSE(location_x);
__m128i integer_of_x = _mm_cvttps_epi32(_mm_sub_ps(location_x,
offset_of_x));
__m128 fade_x = _mm_mul_ps(_mm_mul_ps(offset_of_x, offset_of_x),
_mm_mul_ps(offset_of_x, _mm_add_ps(_mm_mul_ps(offset_of_x,
_mm_sub_ps(_mm_mul_ps(offset_of_x, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))),
_mm_set_ps1(10.f))));
+
+ __m128 offset_of_y = fracSSE(location_y);
+ __m128i integer_of_y = _mm_cvttps_epi32(_mm_sub_ps(location_y,
offset_of_y));
+ __m128 fade_y = _mm_mul_ps(_mm_mul_ps(offset_of_y, offset_of_y),
_mm_mul_ps(offset_of_y, _mm_add_ps(_mm_mul_ps(offset_of_y,
_mm_sub_ps(_mm_mul_ps(offset_of_y, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))),
_mm_set_ps1(10.f))));
+
+ __m128 offset_of_z = fracSSE(location_z);
+ __m128i integer_of_z = _mm_cvttps_epi32(_mm_sub_ps(location_z,
offset_of_z));
+ __m128 fade_z = _mm_mul_ps(_mm_mul_ps(offset_of_z, offset_of_z),
_mm_mul_ps(offset_of_z, _mm_add_ps(_mm_mul_ps(offset_of_z,
_mm_sub_ps(_mm_mul_ps(offset_of_z, _mm_set_ps1(6.f)), _mm_set_ps1(15.f))),
_mm_set_ps1(10.f))));
+
__m128i hash_0 = permutationSSE(integer_of_x);
- __m128i hash_00 = permutationSSE(_mm_add_ps(hash_0, integer_of_y));
- __m128i hash_000 = permutationSSE(_mm_add_ps(hash_00, integer_of_z));
- __m128i hash_001 = permutationSSE(_mm_add_ps(_mm_add_ps(hash_00,
integer_of_z), _mm_set_epi32(1)));
+ __m128i hash_00 = permutationSSE(_mm_add_epi32(hash_0,
integer_of_y));
+ __m128i hash_000 = permutationSSE(_mm_add_epi32(hash_00,
integer_of_z));
+ __m128i hash_001 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_00, integer_of_z),
_mm_set1_epi32(1)));
+ __m128 value_000 = grad(hash_000, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_001 = grad(hash_001, offset_of_x, offset_of_y,
offset_of_z);
+
+ __m128i hash_01 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_0
, integer_of_y), _mm_set1_epi32(1)));
+ __m128i hash_010 = permutationSSE(_mm_add_epi32(hash_01,
integer_of_z));
+ __m128i hash_011 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_01, integer_of_z),
_mm_set1_epi32(1)));
+ __m128 value_010 = grad(hash_010, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_011 = grad(hash_011, offset_of_x, offset_of_y,
offset_of_z);
+
+ __m128i hash_1 = permutationSSE(_mm_add_epi32(integer_of_x,
_mm_set1_epi32(1)));
+ __m128i hash_10 = permutationSSE(_mm_add_epi32(hash_1,
integer_of_y));
+ __m128i hash_100 = permutationSSE(_mm_add_epi32(hash_10,
integer_of_z));
+ __m128i hash_101 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_10, integer_of_z),
_mm_set1_epi32(1)));
+ __m128 value_100 = grad(hash_100, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_101 = grad(hash_101, offset_of_x, offset_of_y,
offset_of_z);
+
+ __m128i hash_11 = permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_1
, integer_of_y), _mm_set1_epi32(1)));
+ __m128i hash_110 = permutationSSE(_mm_add_epi32(hash_11,
integer_of_z));
+ __m128i hash_111 =
permutationSSE(_mm_add_epi32(_mm_add_epi32(hash_11, integer_of_z),
_mm_set1_epi32(1)));
+ __m128 value_110 = grad(hash_110, offset_of_x, offset_of_y,
offset_of_z);
+ __m128 value_111 = grad(hash_111, offset_of_x, offset_of_y,
offset_of_z);
+
+ //
+ __m128 value_00 = Interpolate(value_000, value_001, fade_z);
+ __m128 value_01 = Interpolate(value_010, value_011, fade_z);
+ __m128 value_0 = Interpolate(value_00, value_01, fade_y);
+ //
+ __m128 value_10 = Interpolate(value_100, value_101, fade_z);
+ __m128 value_11 = Interpolate(value_110, value_111, fade_z);
+ __m128 value_1 = Interpolate(value_10, value_11, fade_y);
+ //
+ __m128 value = Interpolate(value_0, value_1, fade_x);
+
+ return value;
}
+#endif
Vector const VectorNoise( Vector const& location )
{
Modified: trunk/Core/Math/Noise.h
==============================================================================
--- trunk/Core/Math/Noise.h (original)
+++ trunk/Core/Math/Noise.h Sat Jun 23 15:18:29 2007
@@ -29,6 +29,11 @@
__m128 ScalarNoiseSSE( const __m128& location_x,
const __m128& location_y,
const __m128& location_z);
+ __m128 grad(const __m128i& hash,
+ const __m128 & x,
+ const __m128 & y,
+ const __m128 & z);
+ __m128i permutationSSE(const __m128i& index);
#endif
/**
- [MANTA] r1422 - trunk/Core/Math, bigler, 06/23/2007
Archive powered by MHonArc 2.6.16.