Text archives Help
- From: sparker@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1051 - in trunk: Engine/Shadows Interface Model/Lights Model/Materials Model/Primitives Model/Textures
- Date: Tue, 9 May 2006 15:13:57 -0600 (MDT)
Author: sparker
Date: Tue May 9 15:13:46 2006
New Revision: 1051
Modified:
trunk/Engine/Shadows/HardShadows.cc
trunk/Engine/Shadows/NoShadows.cc
trunk/Interface/Light.h
trunk/Interface/Packet.h
trunk/Interface/RayPacket.cc
trunk/Interface/RayPacket.h
trunk/Model/Lights/PointLight.cc
trunk/Model/Lights/PointLight.h
trunk/Model/Materials/Phong.cc
trunk/Model/Primitives/Parallelogram.cc
trunk/Model/Primitives/Sphere.cc
trunk/Model/Textures/CheckerTexture.cc
trunk/Model/Textures/CheckerTexture.h
Log:
Verticalize and simplify light interface. Still needs a little work for area
lights
SSEify a bunch of other code
Modified: trunk/Engine/Shadows/HardShadows.cc
==============================================================================
--- trunk/Engine/Shadows/HardShadows.cc (original)
+++ trunk/Engine/Shadows/HardShadows.cc Tue May 9 15:13:46 2006
@@ -6,6 +6,12 @@
#include <Interface/Object.h>
#include <Interface/RayPacket.h>
#include <Interface/Scene.h>
+#include <MantaSSE.h>
+
+// TODO
+// 0 copy in light stuff
+// eliminate cleanup loops???
+// 2-sided lighting
using namespace Manta;
@@ -45,13 +51,104 @@
// Compute the contribution for this light.
int last = -1;
do {
- Color lightColors[RayPacket::MaxSize];
- Vector lightDirections[RayPacket::MaxSize];
- lights->getLight(j)->computeLight( lightColors, lightDirections,
context, sourceRays);
+ lights->getLight(j)->computeLight(shadowRays, context, sourceRays);
+#ifdef MANTA_SSE
+ int b = (sourceRays.rayBegin + 3) & (~3);
+ int e = sourceRays.rayEnd & (~3);
+ if(b == e){
+ for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
+ Vector dir = shadowRays.getDirection(i);
+ if(Dot(dir, sourceRays.getNormal(i)) > 0) {
+ // If so normalize and compute length.
+ Real length = dir.normalize();
+
+ // Populate the shadow ray.
+ shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
+ shadowRays.resetHit(i, length );
+ last = i;
+ } else {
+ shadowRays.maskRay(i);
+ }
+ }
+ } else {
+ int i = shadowRays.rayBegin;
+ for(;i<b;i++){
+ Vector dir = shadowRays.getDirection(i);
+ if(Dot(dir, sourceRays.getNormal(i)) > 0) {
+
+ // If so normalize and compute length.
+ Real length = dir.normalize();
+
+ // Populate the shadow ray.
+ shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
+ shadowRays.resetHit(i, length );
+ last = i;
+ } else {
+ shadowRays.maskRay(i);
+ }
+ }
+ RayPacketData* sourceData = sourceRays.data;
+ RayPacketData* shadowData = shadowRays.data;
+ for(;i<e;i+=4){
+ __m128 normalx = _mm_load_ps(&sourceData->normal[0][i]);
+ __m128 normaly = _mm_load_ps(&sourceData->normal[1][i]);
+ __m128 normalz = _mm_load_ps(&sourceData->normal[2][i]);
+ __m128 dx = _mm_load_ps(&shadowData->direction[0][i]);
+ __m128 dy = _mm_load_ps(&shadowData->direction[1][i]);
+ __m128 dz = _mm_load_ps(&shadowData->direction[2][i]);
+ __m128 dir = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, normalx),
_mm_mul_ps(dy, normaly)), _mm_mul_ps(dz, normalz));
+
+ __m128 mask = _mm_cmple_ps(dir, _mm_setzero_ps());
+ _mm_store_ps((float*)&shadowData->hitMatl[i], mask);
+ if(_mm_movemask_ps(mask) != 0xf){
+ // Mask is inverted to make our life easier. 1 means do not
compute shadow ray
+
+ // Normalize and compute length.
+
+ __m128 length2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, dx),
_mm_mul_ps(dy, dy)), _mm_mul_ps(dz, dz));
+ __m128 inv_length = _mm_rsqrt_ps(length2);
+ inv_length = _mm_mul_ps(_mm_mul_ps(inv_length,
_mm_sub_ps(_mm_set1_ps(3.f), _mm_mul_ps(length2, _mm_mul_ps(inv_length,
inv_length)))), _mm_set1_ps(0.5f));
+ __m128 length = _mm_rcp_ps(inv_length);
+ length = _mm_mul_ps(length, _mm_sub_ps(_mm_set1_ps(2.f),
_mm_mul_ps(inv_length, length)));
+
+ // Populate the shadow ray.
+ _mm_store_ps(&shadowData->direction[0][i], _mm_mul_ps(dx,
inv_length));
+ _mm_store_ps(&shadowData->direction[1][i], _mm_mul_ps(dy,
inv_length));
+ _mm_store_ps(&shadowData->direction[2][i], _mm_mul_ps(dz,
inv_length));
+ _mm_store_ps(&shadowData->origin[0][i],
_mm_load_ps(&sourceData->hitPosition[0][i]));
+ _mm_store_ps(&shadowData->origin[1][i],
_mm_load_ps(&sourceData->hitPosition[1][i]));
+ _mm_store_ps(&shadowData->origin[2][i],
_mm_load_ps(&sourceData->hitPosition[2][i]));
+
+ // Reset the hits
+ // We want length if mask is false, or -maxt if mask is true
+ __m128 combo = _mm_or_ps(_mm_andnot_ps(mask, length),
_mm_and_ps(_mm_set1_ps(-MAXT), mask));
+ _mm_store_ps(&shadowData->minT[i], combo);
+ last = i+3;
+ } else {
+ _mm_store_ps(&shadowData->minT[i], _mm_set1_ps(-MAXT));
+ }
+ }
+ for(;i<sourceRays.rayEnd;i++){
+ Vector dir = shadowRays.getDirection(i);
+ if(Dot(dir, sourceRays.getNormal(i)) > 0) {
+
+ // If so normalize and compute length.
+ Real length = dir.normalize();
+
+ // Populate the shadow ray.
+ shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
+ shadowRays.resetHit(i, length );
+ last = i;
+ } else {
+ shadowRays.maskRay(i);
+ }
+ }
+ }
+#else
for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
// Check to see if the light is on the front face.
- Vector dir = lightDirections[i];
+ Vector dir = shadowRays.getDirection(i);
if(Dot(dir, sourceRays.getNormal(i)) > 0) {
// If so normalize and compute length.
@@ -59,13 +156,13 @@
// Populate the shadow ray.
shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
- shadowRays.setColor(i, lightColors[i]);
shadowRays.resetHit(i, length );
last = i;
} else {
shadowRays.maskRay(i);
}
}
+#endif
j++;
} while(last == -1 && j < nlights);
Modified: trunk/Engine/Shadows/NoShadows.cc
==============================================================================
--- trunk/Engine/Shadows/NoShadows.cc (original)
+++ trunk/Engine/Shadows/NoShadows.cc Tue May 9 15:13:46 2006
@@ -38,13 +38,11 @@
// Compute the contribution for this light.
int last = -1;
do {
- Color lightColors[RayPacket::MaxSize];
- Vector lightDirections[RayPacket::MaxSize];
- lights->getLight(j)->computeLight( lightColors, lightDirections,
context, sourceRays);
+ lights->getLight(j)->computeLight(shadowRays, context, sourceRays);
for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
// Check to see if the light is on the front face.
- Vector dir = lightDirections[i];
+ Vector dir = shadowRays.getDirection(i);
if(Dot(dir, sourceRays.getNormal(i)) > 0) {
// If so normalize and compute length.
@@ -52,7 +50,6 @@
// Populate the direction and color only
shadowRays.setDirection(i, dir );
- shadowRays.setColor(i, lightColors[i]);
shadowRays.resetHit(i);
last = i;
} else {
Modified: trunk/Interface/Light.h
==============================================================================
--- trunk/Interface/Light.h (original)
+++ trunk/Interface/Light.h Tue May 9 15:13:46 2006
@@ -16,14 +16,13 @@
Light();
virtual ~Light();
- virtual void preprocess( const PreprocessContext& context ) = 0;
+ virtual void preprocess(const PreprocessContext& context ) = 0;
// This method is called on the light by the shadow algorithm. The color
and direction
// produced by the light may change for each ray in the packet, and may
change based
// on the render context.
- virtual void computeLight( Color resultColor[RayPacket::MaxSize],
- Vector lightDirection[RayPacket::MaxSize],
- const RenderContext &context, RayPacket
&rays) const = 0;
+ virtual void computeLight(RayPacket& destRays, const RenderContext
&context,
+ RayPacket& sourceRays) const = 0;
private:
// Lights may not be copied.
Modified: trunk/Interface/Packet.h
==============================================================================
--- trunk/Interface/Packet.h (original)
+++ trunk/Interface/Packet.h Tue May 9 15:13:46 2006
@@ -4,6 +4,7 @@
#include <RayPacketParameters.h>
#include <Core/Color/Color.h>
+#include <Core/Geometry/Vector.h>
#include <Core/Util/Align.h>
namespace Manta {
@@ -38,6 +39,22 @@
colordata[0][idx] = value[0];
colordata[1][idx] = value[1];
colordata[2][idx] = value[2];
+ }
+ };
+ template<>
+ class MANTA_ALIGN(16) Packet<Vector> {
+ public:
+ enum {
+ MaxSize = RAYPACKET_MAXSIZE
+ };
+ MANTA_ALIGN(16) Real vectordata[3][MaxSize];
+ Vector get(int idx) const {
+ return Vector(vectordata[0][idx], vectordata[1][idx],
vectordata[2][idx]);
+ }
+ void set(int idx, const Vector& value) {
+ vectordata[0][idx] = value[0];
+ vectordata[1][idx] = value[1];
+ vectordata[2][idx] = value[2];
}
};
}
Modified: trunk/Interface/RayPacket.cc
==============================================================================
--- trunk/Interface/RayPacket.cc (original)
+++ trunk/Interface/RayPacket.cc Tue May 9 15:13:46 2006
@@ -10,6 +10,61 @@
return;
if(flags & HaveHitRecords){
+#ifdef MANTA_SSE
+ int b = (rayBegin + 3) & (~3);
+ int e = rayEnd & (~3);
+ if(b == e){
+ for(int i=rayBegin;i<rayEnd;i++){
+ Real sum = 0;
+ for(int j=0;j<3;j++)
+ sum += data->direction[j][i] * data->direction[j][i];
+ Real length = SCIRun::Sqrt(sum);
+ if(data->hitMatl[i] != 0)
+ data->minT[i] *= length;
+ Real scale = 1/length;
+ for(int j=0;j<3;j++)
+ data->direction[j][i] *= scale;
+ }
+ } else {
+ int i = rayBegin;
+ for(;i<b;i++){
+ Real sum = 0;
+ for(int j=0;j<3;j++)
+ sum += data->direction[j][i] * data->direction[j][i];
+ Real length = SCIRun::Sqrt(sum);
+ if(data->hitMatl[i] != 0)
+ data->minT[i] *= length;
+ Real scale = 1/length;
+ for(int j=0;j<3;j++)
+ data->direction[j][i] *= scale;
+ }
+ for(;i<e;i+=4){
+ __m128 xd = _mm_load_ps(&data->direction[0][i]);
+ __m128 yd = _mm_load_ps(&data->direction[1][i]);
+ __m128 zd = _mm_load_ps(&data->direction[2][i]);
+ __m128 sum = _mm_add_ps(_mm_add_ps(_mm_mul_ps(xd, xd),
_mm_mul_ps(yd, yd)), _mm_mul_ps(zd, zd));
+ __m128 scale = _mm_rsqrt_ps(sum);
+ // Do one newton-raphson iteration to get the accuracy we need
+ scale = _mm_mul_ps(_mm_mul_ps(scale, _mm_sub_ps(_mm_set1_ps(3.f),
_mm_mul_ps(sum, _mm_mul_ps(scale, scale)))), _mm_set1_ps(0.5f));
+ _mm_store_ps(&data->direction[0][i], _mm_mul_ps(xd, scale));
+ _mm_store_ps(&data->direction[1][i], _mm_mul_ps(yd, scale));
+ _mm_store_ps(&data->direction[2][i], _mm_mul_ps(zd, scale));
+
+ _mm_store_ps(&data->minT[i], _mm_div_ps(_mm_load_ps(&data->minT[i]),
scale));
+ }
+ for(;i<rayEnd;i++){
+ Real sum = 0;
+ for(int j=0;j<3;j++)
+ sum += data->direction[j][i] * data->direction[j][i];
+ Real length = SCIRun::Sqrt(sum);
+ if(data->hitMatl[i] != 0)
+ data->minT[i] *= length;
+ Real scale = 1/length;
+ for(int j=0;j<3;j++)
+ data->direction[j][i] *= scale;
+ }
+ }
+#else
for(int i=rayBegin;i<rayEnd;i++){
Real sum = 0;
for(int j=0;j<3;j++)
@@ -21,6 +76,7 @@
for(int j=0;j<3;j++)
data->direction[j][i] *= scale;
}
+#endif
} else {
#ifdef MANTA_SSE
int b = (rayBegin + 3) & (~3);
@@ -79,3 +135,41 @@
flags |= NormalizedDirections;
flags &= ~HaveInverseDirections;
}
+
+
+void RayPacket::actualComputeHitPositions()
+{
+#ifdef MANTA_SSE
+ int b = (rayBegin + 3) & (~3);
+ int e = rayEnd & (~3);
+ if(b == e){
+ for(int i = begin(); i < end(); i++){
+ for(int j=0;j<3;j++)
+ data->hitPosition[j][i] = data->origin[j][i] +
data->direction[j][i] * data->minT[i];
+ }
+ } else {
+ int i = rayBegin;
+ for(;i<b;i++){
+ for(int j=0;j<3;j++)
+ data->hitPosition[j][i] = data->origin[j][i] +
data->direction[j][i] * data->minT[i];
+ }
+ for(;i<e;i+=4){
+ __m128 minT = _mm_load_ps(&data->minT[i]);
+ _mm_store_ps(&data->hitPosition[0][i],
_mm_add_ps(_mm_load_ps(&data->origin[0][i]),
_mm_mul_ps(_mm_load_ps(&data->direction[0][i]), minT)));
+ _mm_store_ps(&data->hitPosition[1][i],
_mm_add_ps(_mm_load_ps(&data->origin[1][i]),
_mm_mul_ps(_mm_load_ps(&data->direction[1][i]), minT)));
+ _mm_store_ps(&data->hitPosition[2][i],
_mm_add_ps(_mm_load_ps(&data->origin[2][i]),
_mm_mul_ps(_mm_load_ps(&data->direction[2][i]), minT)));
+ }
+ for(;i<rayEnd;i++){
+ for(int j=0;j<3;j++)
+ data->hitPosition[j][i] = data->origin[j][i] +
data->direction[j][i] * data->minT[i];
+ }
+ }
+#else
+ for(int i = rayBegin; i < rayEnd; i++){
+ for(int j=0;j<3;j++)
+ data->hitPosition[j][i] = data->origin[j][i] + data->direction[j][i]
* data->minT[i];
+ }
+#endif
+ flags |= HaveHitPositions;
+}
+
Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Tue May 9 15:13:46 2006
@@ -521,14 +521,10 @@
{
if(flags & HaveHitPositions)
return;
- for(int i=rayBegin;i<rayEnd;i++) {
- for(int j=0;j<3;j++)
- data->hitPosition[j][i] = data->origin[j][i] +
data->direction[j][i] * data->minT[i];
- }
- flags |= HaveHitPositions;
+
+ actualComputeHitPositions();
}
-
// Scratchpad isn't quite "vertical" yet...
template<class T> T& scratchpad(int which) {
@@ -556,6 +552,7 @@
private:
void actualNormalizeDirections();
+ void actualComputeHitPositions();
// Prevent accidental copying of RayPackets
RayPacket(const RayPacket&);
Modified: trunk/Model/Lights/PointLight.cc
==============================================================================
--- trunk/Model/Lights/PointLight.cc (original)
+++ trunk/Model/Lights/PointLight.cc Tue May 9 15:13:46 2006
@@ -1,5 +1,6 @@
#include <Model/Lights/PointLight.h>
+#include <MantaSSE.h>
using namespace Manta;
@@ -16,14 +17,43 @@
{
}
-void PointLight::computeLight( Color resultColor[RayPacket::MaxSize],
- Vector lightDirection[RayPacket::MaxSize],
- const RenderContext &context,
- RayPacket &rays) const
+void PointLight::computeLight(RayPacket& destRays, const RenderContext
&context,
+ RayPacket& sourceRays) const
{
- rays.computeHitPositions();
- for(int i = rays.begin(); i < rays.end(); i++){
- resultColor[i] = color;
- lightDirection[i] = position - rays.getHitPosition(i);
- }
+ sourceRays.computeHitPositions();
+#ifdef MANTA_SSE
+ int b = (sourceRays.rayBegin + 3) & (~3);
+ int e = sourceRays.rayEnd & (~3);
+ if(b == e){
+ for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
+ destRays.setColor(i, color);
+ destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+ }
+ } else {
+ int i = sourceRays.rayBegin;
+ for(;i<b;i++){
+ destRays.setColor(i, color);
+ destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+ }
+ RayPacketData* sourceData = sourceRays.data;
+ RayPacketData* destData = destRays.data;
+ for(;i<e;i+=4){
+ _mm_store_ps(&destData->color[0][i], _mm_set1_ps(color[0]));
+ _mm_store_ps(&destData->color[1][i], _mm_set1_ps(color[1]));
+ _mm_store_ps(&destData->color[2][i], _mm_set1_ps(color[2]));
+ _mm_store_ps(&destData->direction[0][i],
_mm_sub_ps(_mm_set1_ps(position[0]),
_mm_load_ps(&sourceData->hitPosition[0][i])));
+ _mm_store_ps(&destData->direction[1][i],
_mm_sub_ps(_mm_set1_ps(position[1]),
_mm_load_ps(&sourceData->hitPosition[1][i])));
+ _mm_store_ps(&destData->direction[2][i],
_mm_sub_ps(_mm_set1_ps(position[2]),
_mm_load_ps(&sourceData->hitPosition[2][i])));
+ }
+ for(;i<sourceRays.rayEnd;i++){
+ destRays.setColor(i, color);
+ destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+ }
+ }
+#else
+ for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
+ destRays.setColor(i, color);
+ destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+ }
+#endif
}
Modified: trunk/Model/Lights/PointLight.h
==============================================================================
--- trunk/Model/Lights/PointLight.h (original)
+++ trunk/Model/Lights/PointLight.h Tue May 9 15:13:46 2006
@@ -14,10 +14,8 @@
virtual void preprocess(const PreprocessContext&);
- virtual void computeLight( Color resultColor[RayPacket::MaxSize],
- Vector lightDirection[RayPacket::MaxSize],
- const RenderContext &context,
- RayPacket &rays) const;
+ virtual void computeLight(RayPacket& rays, const RenderContext &context,
+ RayPacket& source) const;
private:
Vector position;
Color color;
Modified: trunk/Model/Materials/Phong.cc
==============================================================================
--- trunk/Model/Materials/Phong.cc (original)
+++ trunk/Model/Materials/Phong.cc Tue May 9 15:13:46 2006
@@ -51,20 +51,20 @@
Phong::Phong(const Color& diffuse, const Color& specular,
int specpow, ColorComponent refl)
- : specpow(specpow/2)
+ : specpow(specpow)
{
diffusetex = new Constant<Color>(diffuse);
speculartex = new Constant<Color>(specular);
refltex = new Constant<ColorComponent>(refl);
do_refl = (refl != 0);
- highlight_threshold = pow(COLOR_EPSILON, 1./(2*specpow));
+ highlight_threshold = pow(COLOR_EPSILON, 1./specpow);
}
Phong::Phong(const Texture<Color>* diffusetex,
const Texture<Color>* speculartex,
int specpow, const Texture<ColorComponent>* refltex)
: diffusetex(diffusetex), speculartex(speculartex), refltex(refltex),
- specpow(specpow/2)
+ specpow(specpow)
{
do_refl=true;
if (refltex) {
@@ -75,6 +75,7 @@
} else {
do_refl = false;
}
+ highlight_threshold = pow(COLOR_EPSILON, 1./specpow);
}
Phong::~Phong()
@@ -147,8 +148,10 @@
Vector H = shadowdir-dir;
ColorComponent cos_alpha = Dot(H, normal);
if(cos_alpha > highlight_threshold){
- Color::ComponentType length = H.length2();
- Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length,
specpow);
+ Color::ComponentType length2 = H.length2();
+ Color::ComponentType inv_length;
+ _mm_store_ss(&inv_length, _mm_rsqrt_ss(_mm_set_ss(length2)));
+ Color::ComponentType scale = ipow(cos_alpha*inv_length, specpow);
for(int k=0;k<Color::NumComponents;k++)
specularLight[k][i] += light[k] * scale;
}
@@ -169,8 +172,10 @@
Vector H = shadowdir-dir;
ColorComponent cos_alpha = Dot(H, normal);
if(cos_alpha > highlight_threshold){
- Color::ComponentType length = H.length2();
- Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length,
specpow);
+ Color::ComponentType length2 = H.length2();
+ Color::ComponentType inv_length;
+ _mm_store_ss(&inv_length, _mm_rsqrt_ss(_mm_set_ss(length2)));
+ Color::ComponentType scale = ipow(cos_alpha*inv_length, specpow);
for(int k=0;k<Color::NumComponents;k++)
specularLight[k][i] += light[k] * scale;
}
@@ -213,9 +218,10 @@
if(_mm_movemask_ps(mask) == 0)
continue;
- __m128 length = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Hx, Hx),
_mm_mul_ps(Hy, Hy)), _mm_mul_ps(Hz, Hz));
+ __m128 length2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Hx, Hx),
_mm_mul_ps(Hy, Hy)), _mm_mul_ps(Hz, Hz));
+ __m128 inv_length = _mm_rsqrt_ps(length2);
__m128 scale = _mm_set1_ps(1.0f);
- __m128 x = _mm_div_ps(_mm_mul_ps(cos_alpha, cos_alpha), length);
+ __m128 x = _mm_mul_ps(cos_alpha, inv_length);
int p = specpow;
while(p){
if(p&1){
@@ -248,8 +254,10 @@
Vector H = shadowdir-dir;
ColorComponent cos_alpha = Dot(H, normal);
if(cos_alpha > highlight_threshold){
- Color::ComponentType length = H.length2();
- Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length,
specpow);
+ Color::ComponentType length2 = H.length2();
+ Color::ComponentType inv_length;
+ _mm_store_ss(&inv_length, _mm_rsqrt_ss(_mm_set_ss(length2)));
+ Color::ComponentType scale = ipow(cos_alpha*inv_length, specpow);
for(int k=0;k<Color::NumComponents;k++)
specularLight[k][i] += light[k] * scale;
}
@@ -270,8 +278,8 @@
Vector H = shadowdir-dir;
ColorComponent cos_alpha = Dot(H, normal);
if(cos_alpha > phong_threshold){
- Color::ComponentType length = H.length2();
- Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length,
specpow);
+ Color::ComponentType length = H.length();
+ Color::ComponentType scale = ipow(cos_alpha/length, specpow);
for(int k=0;k<Color::NumComponents;k++)
specularLight[k][i] += light[k] * scale;
}
Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc (original)
+++ trunk/Model/Primitives/Parallelogram.cc Tue May 9 15:13:46 2006
@@ -109,7 +109,7 @@
__m128 dy = _mm_load_ps(&data->direction[1][i]);
__m128 dz = _mm_load_ps(&data->direction[2][i]);
__m128 dt = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, normalx),
_mm_mul_ps(dy, normaly)), _mm_mul_ps(dz, normalz));
-
+
//if(Abs(dt) < (Real)1.e-6)
//continue;
__m128 t = _mm_div_ps(vec_num, dt);
Modified: trunk/Model/Primitives/Sphere.cc
==============================================================================
--- trunk/Model/Primitives/Sphere.cc (original)
+++ trunk/Model/Primitives/Sphere.cc Tue May 9 15:13:46 2006
@@ -5,6 +5,7 @@
#include <Core/Math/MiscMath.h>
#include <Core/Math/Trig.h>
#include <Core/Math/Expon.h>
+#include <MantaSSE.h>
using namespace Manta;
using namespace SCIRun;
@@ -105,6 +106,118 @@
break;
case RayPacket::NormalizedDirections:
{
+#ifdef MANTA_SSE
+ int b = (rays.rayBegin + 3) & (~3);
+ int e = rays.rayEnd & (~3);
+ if(b == e){
+ for(int i = rays.begin(); i < rays.end(); i++){
+ Vector O(rays.getOrigin(i)-center);
+ Vector D(rays.getDirection(i));
+ Real B = Dot(O, D);
+ Real C = Dot(O, O) - radius*radius;
+ Real disc = B*B-C;
+ if(disc >= 0){
+ Real r = Sqrt(disc);
+ Real t0 = -(r+B);
+ if(t0 > T_EPSILON){
+ rays.hit(i, t0, getMaterial(), this, getTexCoordMapper());
+ } else {
+ Real t1 = r-B;
+ rays.hit(i, t1, getMaterial(), this, getTexCoordMapper());
+ }
+ }
+ }
+ } else {
+ int i = rays.rayBegin;
+ for(;i<b;i++){
+ Vector O(rays.getOrigin(i)-center);
+ Vector D(rays.getDirection(i));
+ Real B = Dot(O, D);
+ Real C = Dot(O, O) - radius*radius;
+ Real disc = B*B-C;
+ if(disc >= 0){
+ Real r = Sqrt(disc);
+ Real t0 = -(r+B);
+ if(t0 > T_EPSILON){
+ rays.hit(i, t0, getMaterial(), this, getTexCoordMapper());
+ } else {
+ Real t1 = r-B;
+ rays.hit(i, t1, getMaterial(), this, getTexCoordMapper());
+ }
+ }
+ }
+ RayPacketData* data = rays.data;
+ for(;i<e;i+=4){
+ __m128 Ox = _mm_sub_ps(_mm_load_ps(&data->origin[0][i]),
_mm_set1_ps(center[0]));
+ __m128 Oy = _mm_sub_ps(_mm_load_ps(&data->origin[1][i]),
_mm_set1_ps(center[1]));
+ __m128 Oz = _mm_sub_ps(_mm_load_ps(&data->origin[2][i]),
_mm_set1_ps(center[2]));
+ __m128 Dx = _mm_load_ps(&data->direction[0][i]);
+ __m128 Dy = _mm_load_ps(&data->direction[1][i]);
+ __m128 Dz = _mm_load_ps(&data->direction[2][i]);
+ __m128 B = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Ox, Dx),
_mm_mul_ps(Oy, Dy)), _mm_mul_ps(Oz, Dz));
+ __m128 C = _mm_sub_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(Ox, Ox),
_mm_mul_ps(Oy, Oy)), _mm_mul_ps(Oz, Oz)), _mm_set1_ps(radius*radius));
+ __m128 disc = _mm_sub_ps(_mm_mul_ps(B, B), C);
+ __m128 hit = _mm_cmpge_ps(disc, _mm_setzero_ps());
+ if(_mm_movemask_ps(hit) == 0)
+ continue;
+
+ __m128 r = _mm_sqrt_ps(disc);
+ // -(r+B) The xor negates the value
+ __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B),
(__m128)_mm_set1_epi32(0x80000000));
+ __m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0,
_mm_set1_ps(T_EPSILON)));
+ if(_mm_movemask_ps(hit0) != 0){
+ hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0,
_mm_load_ps(&data->minT[i])));
+ if(_mm_movemask_ps(hit0) == 15){
+ _mm_store_ps(&data->minT[i], t0);
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+ } else if(_mm_movemask_ps(hit0) != 0) {
+ _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit0, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit0, (char*)&data->hitTex[i]);
+ }
+ // Mask off rays that successfully hit at t0
+ hit = _mm_andnot_ps(hit, hit0);
+ if(_mm_movemask_ps(hit) == 0)
+ continue;
+ }
+
+ __m128 t1 = _mm_sub_ps(r, B);
+ __m128 hit1 = _mm_and_ps(hit, _mm_cmpgt_ps(t1,
_mm_set1_ps(T_EPSILON)));
+ hit1 = _mm_and_ps(hit1, _mm_cmplt_ps(t1,
_mm_load_ps(&data->minT[i])));
+ if(_mm_movemask_ps(hit1) == 15){
+ _mm_store_ps(&data->minT[i], t1);
+ _mm_store_si128((__m128i*)&data->hitMatl[i],
_mm_set1_epi32((int)getMaterial()));
+ _mm_store_si128((__m128i*)&data->hitPrim[i],
_mm_set1_epi32((int)this));
+ _mm_store_si128((__m128i*)&data->hitTex[i],
_mm_set1_epi32((int)getTexCoordMapper()));
+ } else if(_mm_movemask_ps(hit1) != 0){
+ _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1,
(char*)&data->minT[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()),
(__m128i)hit1, (char*)&data->hitMatl[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1,
(char*)&data->hitPrim[i]);
+ _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()),
(__m128i)hit1, (char*)&data->hitTex[i]);
+ }
+ }
+ for(;i<rays.rayEnd;i++){
+ Vector O(rays.getOrigin(i)-center);
+ Vector D(rays.getDirection(i));
+ Real B = Dot(O, D);
+ Real C = Dot(O, O) - radius*radius;
+ Real disc = B*B-C;
+ if(disc >= 0){
+ Real r = Sqrt(disc);
+ Real t0 = -(r+B);
+ if(t0 > T_EPSILON){
+ rays.hit(i, t0, getMaterial(), this, getTexCoordMapper());
+ } else {
+ Real t1 = r-B;
+ rays.hit(i, t1, getMaterial(), this, getTexCoordMapper());
+ }
+ }
+ }
+ }
+#else
// Rays of non-constant origin and normalized directions
for(int i = rays.begin();i<rays.end();i++){
Vector O(rays.getOrigin(i)-center);
@@ -123,6 +236,7 @@
}
}
}
+#endif
}
break;
case 0:
Modified: trunk/Model/Textures/CheckerTexture.cc
==============================================================================
--- trunk/Model/Textures/CheckerTexture.cc (original)
+++ trunk/Model/Textures/CheckerTexture.cc Tue May 9 15:13:46 2006
@@ -1,4 +1,185 @@
#include <Model/Textures/CheckerTexture.h>
+using namespace Manta;
+
+template<>
+void CheckerTexture<Color>::mapValues(Packet<Color>& results,
+ const RenderContext& context,
+ RayPacket& rays) const
+{
+ if(need_w)
+ rays.computeTextureCoordinates3(context);
+ else
+ rays.computeTextureCoordinates2(context);
+#ifdef MANTA_SSE
+ int b = (rays.rayBegin + 3) & (~3);
+ int e = rays.rayEnd & (~3);
+ if(b == e){
+ for(int i = rays.begin(); i < rays.end(); i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+ } else {
+ int i = rays.rayBegin;
+ for(;i<b;i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+ RayPacketData* data = rays.data;
+ // Set rounding modes to round toward -inf
+ int old_csr = _mm_getcsr();
+ _mm_setcsr((old_csr & ~_MM_ROUND_MASK) | _MM_ROUND_DOWN);
+ for(;i<e;i+=4){
+ __m128 tx = _mm_load_ps(&data->texCoords[0][i]);
+ __m128 ty = _mm_load_ps(&data->texCoords[1][i]);
+ __m128 tz = _mm_load_ps(&data->texCoords[2][i]);
+ __m128 vv1 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx,
_mm_set1_ps(v1[0])), _mm_mul_ps(ty, _mm_set1_ps(v1[1]))), _mm_mul_ps(tz,
_mm_set1_ps(v1[2])));
+ __m128 vv2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx,
_mm_set1_ps(v2[0])), _mm_mul_ps(ty, _mm_set1_ps(v2[1]))), _mm_mul_ps(tz,
_mm_set1_ps(v2[2])));
+ __m128i i1 = _mm_cvtps_epi32(vv1);
+ __m128i i2 = _mm_cvtps_epi32(vv2);
+ __m128i which = _mm_and_si128(_mm_add_epi32(i1, i2),
_mm_set1_epi32(1));
+ __m128i mask = _mm_cmpeq_epi32(which, _mm_setzero_si128());
+ __m128 valuer = _mm_or_ps(_mm_and_ps((__m128)mask,
_mm_set1_ps(values[0][0])),
+ _mm_andnot_ps((__m128)mask,
_mm_set1_ps(values[1][0])));
+ _mm_store_ps(&results.colordata[0][i], valuer);
+ __m128 valueg = _mm_or_ps(_mm_and_ps((__m128)mask,
_mm_set1_ps(values[0][1])),
+ _mm_andnot_ps((__m128)mask,
_mm_set1_ps(values[1][1])));
+ _mm_store_ps(&results.colordata[1][i], valueg);
+ __m128 valueb = _mm_or_ps(_mm_and_ps((__m128)mask,
_mm_set1_ps(values[0][2])),
+ _mm_andnot_ps((__m128)mask,
_mm_set1_ps(values[1][2])));
+ _mm_store_ps(&results.colordata[2][i], valueb);
+ }
+ _mm_setcsr(old_csr);
+ for(;i<rays.rayEnd;i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+ }
+#else
+ for(int i = rays.begin(); i < rays.end(); i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+#endif
+}
+
+template<>
+void CheckerTexture<float>::mapValues(Packet<float>& results,
+ const RenderContext& context,
+ RayPacket& rays) const
+{
+ if(need_w)
+ rays.computeTextureCoordinates3(context);
+ else
+ rays.computeTextureCoordinates2(context);
+#ifdef MANTA_SSE
+ int b = (rays.rayBegin + 3) & (~3);
+ int e = rays.rayEnd & (~3);
+ if(b == e){
+ for(int i = rays.begin(); i < rays.end(); i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+ } else {
+ int i = rays.rayBegin;
+ for(;i<b;i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+ RayPacketData* data = rays.data;
+ // Set rounding modes to round toward -inf
+ int old_csr = _mm_getcsr();
+ _mm_setcsr((old_csr & ~_MM_ROUND_MASK) | _MM_ROUND_DOWN);
+ for(;i<e;i+=4){
+ __m128 tx = _mm_load_ps(&data->texCoords[0][i]);
+ __m128 ty = _mm_load_ps(&data->texCoords[1][i]);
+ __m128 tz = _mm_load_ps(&data->texCoords[2][i]);
+ __m128 vv1 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx,
_mm_set1_ps(v1[0])), _mm_mul_ps(ty, _mm_set1_ps(v1[1]))), _mm_mul_ps(tz,
_mm_set1_ps(v1[2])));
+ __m128 vv2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx,
_mm_set1_ps(v2[0])), _mm_mul_ps(ty, _mm_set1_ps(v2[1]))), _mm_mul_ps(tz,
_mm_set1_ps(v2[2])));
+ __m128i i1 = _mm_cvtps_epi32(vv1);
+ __m128i i2 = _mm_cvtps_epi32(vv2);
+ __m128i which = _mm_and_si128(_mm_add_epi32(i1, i2),
_mm_set1_epi32(1));
+ __m128i mask = _mm_cmpeq_epi32(which, _mm_setzero_si128());
+ __m128 value = _mm_or_ps(_mm_and_ps((__m128)mask,
_mm_set1_ps(values[0])),
+ _mm_andnot_ps((__m128)mask,
_mm_set1_ps(values[1])));
+ _mm_store_ps(&results.data[i], value);
+ }
+ _mm_setcsr(old_csr);
+ for(;i<rays.rayEnd;i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+ }
+#else
+ for(int i = rays.begin(); i < rays.end(); i++){
+ Real vv1 = Dot(rays.getTexCoords(i), v1);
+ Real vv2 = Dot(rays.getTexCoords(i), v2);
+ if(vv1<0)
+ vv1=-vv1+1;
+ if(vv2<0)
+ vv2=-vv2+1;
+ int i1 = (int)vv1;
+ int i2 = (int)vv2;
+ int which = (i1+i2)&1;
+ results.set(i, values[which]);
+ }
+#endif
+}
Modified: trunk/Model/Textures/CheckerTexture.h
==============================================================================
--- trunk/Model/Textures/CheckerTexture.h (original)
+++ trunk/Model/Textures/CheckerTexture.h Tue May 9 15:13:46 2006
@@ -3,8 +3,10 @@
#define Manta_Model_CheckerTexture_h
#include <Interface/Texture.h>
+#include <Core/Color/Color.h>
#include <Core/Geometry/Vector.h>
#include <Interface/RayPacket.h>
+#include <MantaSSE.h>
namespace Manta {
class RayPacket;
@@ -49,7 +51,8 @@
}
template<class ValueType>
- void CheckerTexture<ValueType>::mapValues(Packet<ValueType>& results,
const RenderContext& context,
+ void CheckerTexture<ValueType>::mapValues(Packet<ValueType>& results,
+ const RenderContext& context,
RayPacket& rays) const
{
if(need_w)
@@ -70,6 +73,16 @@
}
}
+#ifdef MANTA_SSE
+ template<>
+ void CheckerTexture<Color>::mapValues(Packet<Color>& results,
+ const RenderContext& context,
+ RayPacket& rays) const;
+ template<>
+ void CheckerTexture<float>::mapValues(Packet<float>& results,
+ const RenderContext& context,
+ RayPacket& rays) const;
+#endif
}
- [MANTA] r1051 - in trunk: Engine/Shadows Interface Model/Lights Model/Materials Model/Primitives Model/Textures, sparker, 05/09/2006
Archive powered by MHonArc 2.6.16.