Text archives Help
- From: bigler@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1238 - trunk/Model/Primitives
- Date: Mon, 27 Nov 2006 13:50:11 -0700 (MST)
Author: bigler
Date: Mon Nov 27 13:50:10 2006
New Revision: 1238
Modified:
trunk/Model/Primitives/Parallelogram.cc
Log:
Fixes to make the texture coordinates saved during SSE blocks of
code. There are currently two versions. The first uses the
scratchpad and second copies the values directly to the texture
coordinates.
Also makes use of the new RayPacket::hitWithoutTminCheck function.
Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc (original)
+++ trunk/Model/Primitives/Parallelogram.cc Mon Nov 27 13:50:10 2006
@@ -9,6 +9,13 @@
using namespace Manta;
using SCIRun::Abs;
+#define USE_SCRATCHPAD 0
+
+#define maskedStore_ps(mask, oldD, newD) \
+ _mm_store_ps(oldD, \
+ _mm_or_ps(_mm_and_ps(mask, newD), \
+ _mm_andnot_ps(mask, _mm_load_ps(oldD))))
+
Parallelogram::Parallelogram(Material* material, const Vector& anchor,
const Vector& in_v1, const Vector& in_v2)
: PrimitiveCommon(material, this), anchor(anchor), v1(in_v1), v2(in_v2)
@@ -76,7 +83,11 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
} else {
int i = rays.rayBegin;
@@ -96,7 +107,11 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
RayPacketData* data = rays.data;
__m128 normalx = _mm_set1_ps(normal[0]);
@@ -119,53 +134,64 @@
if(_mm_movemask_ps(hit) == 0)
continue;
+ // Real t=num/dt
+ // if(t>rays.getMinT(i))
+ // continue;
+ // Vector vi(rays.getDirection(i)*t);
+ // Real a1 = Dot(v1, vi)*o1;
__m128 vix = _mm_mul_ps(dx, t);
__m128 viy = _mm_mul_ps(dy, t);
__m128 viz = _mm_mul_ps(dz, t);
__m128 a1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(vix,
_mm_set1_ps(v1[0])), _mm_mul_ps(viy, _mm_set1_ps(v1[1]))), _mm_mul_ps(viz,
_mm_set1_ps(v1[2]))), vec_o1);
+ // if (a1 < 0 || a1 > 1)
+ // continue;
__m128 zero = _mm_setzero_ps();
__m128 one = _mm_set1_ps(1.0f);
hit = _mm_and_ps(hit, _mm_and_ps(_mm_cmpge_ps(a1, zero),
_mm_cmple_ps(a1, one)));
if(_mm_movemask_ps(hit) == 0)
continue;
+ // Real a2 = Dot(v2, vi)+o2;
+ // if (a2 < 0 || a2 > 1)
+ // continue;
+
__m128 a2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(vix,
_mm_set1_ps(v2[0])), _mm_mul_ps(viy, _mm_set1_ps(v2[1]))), _mm_mul_ps(viz,
_mm_set1_ps(v2[2]))), vec_o2);
hit = _mm_and_ps(hit, _mm_and_ps(_mm_cmpge_ps(a2, zero),
_mm_cmple_ps(a2, one)));
if(_mm_movemask_ps(hit) == 0)
continue;
+ rays.hitWithoutTminCheck(i, hit, t, getMaterial(), this,
getTexCoordMapper());
+
+#if USE_SCRATCHPAD
+ // Copy the barycentric coordinates to the scratch pad
+ MANTA_ALIGN(16) float ra1[4];
+ MANTA_ALIGN(16) float ra2[4];
+ _mm_store_ps(ra1, a1);
+ _mm_store_ps(ra2, a2);
+
if(_mm_movemask_ps(hit) == 15){
- _mm_store_ps(&data->minT[i], t);
-#ifdef __x86_64
- _mm_store_si128((__m128i*)&data->hitMatl[i], _mm_set1_epi64x((long
long)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long long)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i], _mm_set1_epi64x((long
long)this));
- _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long long)this));
- _mm_store_si128((__m128i*)&data->hitTex[i], _mm_set1_epi64x((long
long)getTexCoordMapper()));
- _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long long)getTexCoordMapper()));
-#else
- _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
- _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
-#endif
+ rays.scratchpad<Vector>(i+0) = Vector(ra1[0], ra2[0], 0);
+ rays.scratchpad<Vector>(i+1) = Vector(ra1[1], ra2[1], 0);
+ rays.scratchpad<Vector>(i+2) = Vector(ra1[2], ra2[2], 0);
+ rays.scratchpad<Vector>(i+3) = Vector(ra1[3], ra2[3], 0);
} else {
- _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t),
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
-#ifdef __x86_64
- __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit,
hit));
- __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit,
hit));
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
lohit, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
-
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), lohit,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), hihit,
(char*)&data->hitPrim[i+2]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
-#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
-#endif
+ int hit_mask = _mm_movemask_ps(hit);
+ for(int j = 0; j < 4; ++j) {
+ if (hit_mask & (1 << j))
+ rays.scratchpad<Vector>(i+j) = Vector(ra1[j], ra2[j], 0);
+ }
}
+#else
+ if (_mm_movemask_ps(hit) == 15) {
+ _mm_store_ps(&data->texCoords[0][i], a1);
+ _mm_store_ps(&data->texCoords[1][i], a2);
+ _mm_store_ps(&data->texCoords[2][i], _mm_setzero_ps());
+ } else {
+ maskedStore_ps(hit, (float*)&data->texCoords[0][i], a1);
+ maskedStore_ps(hit, (float*)&data->texCoords[1][i], a2);
+ maskedStore_ps(hit, (float*)&data->texCoords[2][i],
_mm_setzero_ps());
+ }
+#endif
}
for(;i<rays.rayEnd;i++){
Real dt=Dot(rays.getDirection(i), normal);
@@ -183,7 +209,11 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
}
#else
@@ -203,7 +233,11 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
- rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#if USE_SCRATCHPAD
+ rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
#endif
} else {
@@ -230,7 +264,11 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
} else {
int i = rays.rayBegin;
@@ -253,7 +291,11 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
RayPacketData* data = rays.data;
__m128 normalx = _mm_set1_ps(normal[0]);
@@ -298,38 +340,38 @@
if(_mm_movemask_ps(hit) == 0)
continue;
+ rays.hitWithoutTminCheck(i, hit, t, getMaterial(), this,
getTexCoordMapper());
+
+#if USE_SCRATCHPAD
+ // Copy the barycentric coordinates to the scratch pad
+ MANTA_ALIGN(16) float ra1[4];
+ MANTA_ALIGN(16) float ra2[4];
+ _mm_store_ps(ra1, a1);
+ _mm_store_ps(ra2, a2);
+
if(_mm_movemask_ps(hit) == 15){
- _mm_store_ps(&data->minT[i], t);
-#ifdef __x86_64
- _mm_store_si128((__m128i*)&data->hitMatl[i], _mm_set1_epi64x((long
long)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitMatl[i+2],
_mm_set1_epi64x((long long)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i], _mm_set1_epi64x((long
long)this));
- _mm_store_si128((__m128i*)&data->hitPrim[i+2],
_mm_set1_epi64x((long long)this));
- _mm_store_si128((__m128i*)&data->hitTex[i], _mm_set1_epi64x((long
long)getTexCoordMapper()));
- _mm_store_si128((__m128i*)&data->hitTex[i+2],
_mm_set1_epi64x((long long)getTexCoordMapper()));
-#else
- _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
- _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
- _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
-#endif
+ rays.scratchpad<Vector>(i+0) = Vector(ra1[0], ra2[0], 0);
+ rays.scratchpad<Vector>(i+1) = Vector(ra1[1], ra2[1], 0);
+ rays.scratchpad<Vector>(i+2) = Vector(ra1[2], ra2[2], 0);
+ rays.scratchpad<Vector>(i+3) = Vector(ra1[3], ra2[3], 0);
} else {
- _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t),
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
-#ifdef __x86_64
- __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit,
hit));
- __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit,
hit));
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
lohit, (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()),
hihit, (char*)&data->hitMatl[i+2]);
-
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), lohit,
(char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), hihit,
(char*)&data->hitPrim[i+2]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
- _mm_maskmoveu_si128(_mm_set1_epi64x((long
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
-#else
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)this),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
- _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
-#endif
+ int hit_mask = _mm_movemask_ps(hit);
+ for(int j = 0; j < 4; ++j) {
+ if (hit_mask & (1 << j))
+ rays.scratchpad<Vector>(i+j) = Vector(ra1[j], ra2[j], 0);
+ }
+ }
+#else
+ if (_mm_movemask_ps(hit) == 15) {
+ _mm_store_ps(&data->texCoords[0][i], a1);
+ _mm_store_ps(&data->texCoords[1][i], a2);
+ _mm_store_ps(&data->texCoords[2][i], _mm_setzero_ps());
+ } else {
+ maskedStore_ps(hit, (float*)&data->texCoords[0][i], a1);
+ maskedStore_ps(hit, (float*)&data->texCoords[1][i], a2);
+ maskedStore_ps(hit, (float*)&data->texCoords[2][i],
_mm_setzero_ps());
}
+#endif
}
for(;i<rays.rayEnd;i++){
Vector dir = rays.getDirection(i);
@@ -350,7 +392,11 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
}
#else
@@ -373,26 +419,34 @@
continue;
if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
- rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#if USE_SCRATCHPAD
+ rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+ rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
}
#endif
}
}
void Parallelogram::computeTexCoords2(const RenderContext&,
- RayPacket& rays) const
+ RayPacket& rays) const
{
+#if USE_SCRATCHPAD
for(int i=rays.begin();i<rays.end();i++){
rays.setTexCoords(i, rays.scratchpad<Vector>(i));
}
+#endif
rays.setFlag(RayPacket::HaveTexture2|RayPacket::HaveTexture3);
}
void Parallelogram::computeTexCoords3(const RenderContext&,
- RayPacket& rays) const
+ RayPacket& rays) const
{
+#if USE_SCRATCHPAD
for(int i=rays.begin();i<rays.end();i++){
rays.setTexCoords(i, rays.scratchpad<Vector>(i));
}
+#endif
rays.setFlag(RayPacket::HaveTexture2|RayPacket::HaveTexture3);
}
- [MANTA] r1238 - trunk/Model/Primitives, bigler, 11/27/2006
Archive powered by MHonArc 2.6.16.