Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1051 - in trunk: Engine/Shadows Interface Model/Lights Model/Materials Model/Primitives Model/Textures


Chronological Thread 
  • From: sparker@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1051 - in trunk: Engine/Shadows Interface Model/Lights Model/Materials Model/Primitives Model/Textures
  • Date: Tue, 9 May 2006 15:13:57 -0600 (MDT)

Author: sparker
Date: Tue May  9 15:13:46 2006
New Revision: 1051

Modified:
   trunk/Engine/Shadows/HardShadows.cc
   trunk/Engine/Shadows/NoShadows.cc
   trunk/Interface/Light.h
   trunk/Interface/Packet.h
   trunk/Interface/RayPacket.cc
   trunk/Interface/RayPacket.h
   trunk/Model/Lights/PointLight.cc
   trunk/Model/Lights/PointLight.h
   trunk/Model/Materials/Phong.cc
   trunk/Model/Primitives/Parallelogram.cc
   trunk/Model/Primitives/Sphere.cc
   trunk/Model/Textures/CheckerTexture.cc
   trunk/Model/Textures/CheckerTexture.h
Log:
Verticalize and simplify light interface.  Still needs a little work for area 
lights
SSEify a bunch of other code


Modified: trunk/Engine/Shadows/HardShadows.cc
==============================================================================
--- trunk/Engine/Shadows/HardShadows.cc (original)
+++ trunk/Engine/Shadows/HardShadows.cc Tue May  9 15:13:46 2006
@@ -6,6 +6,12 @@
 #include <Interface/Object.h>
 #include <Interface/RayPacket.h>
 #include <Interface/Scene.h>
+#include <MantaSSE.h>
+
+// TODO
+// 0 copy in light stuff
+// eliminate cleanup loops???
+// 2-sided lighting
 
 using namespace Manta;
 
@@ -45,13 +51,104 @@
   // Compute the contribution for this light.
   int last = -1;
   do {
-    Color lightColors[RayPacket::MaxSize];
-    Vector lightDirections[RayPacket::MaxSize];
-    lights->getLight(j)->computeLight( lightColors, lightDirections, 
context, sourceRays);
+    lights->getLight(j)->computeLight(shadowRays, context, sourceRays);
 
+#ifdef MANTA_SSE
+    int b = (sourceRays.rayBegin + 3) & (~3);
+    int e = sourceRays.rayEnd & (~3);
+    if(b == e){
+      for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
+        Vector dir = shadowRays.getDirection(i);
+        if(Dot(dir, sourceRays.getNormal(i)) > 0) {
+          // If so normalize and compute length.
+          Real length = dir.normalize();
+       
+          // Populate the shadow ray.
+          shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
+          shadowRays.resetHit(i, length );
+          last = i;
+        } else {
+          shadowRays.maskRay(i);
+        }
+      }
+    } else {
+      int i = shadowRays.rayBegin;
+      for(;i<b;i++){
+        Vector dir = shadowRays.getDirection(i);
+        if(Dot(dir, sourceRays.getNormal(i)) > 0) {
+        
+          // If so normalize and compute length.
+          Real length = dir.normalize();
+       
+          // Populate the shadow ray.
+          shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
+          shadowRays.resetHit(i, length );
+          last = i;
+        } else {
+          shadowRays.maskRay(i);
+        }
+      }
+      RayPacketData* sourceData = sourceRays.data;
+      RayPacketData* shadowData = shadowRays.data;
+      for(;i<e;i+=4){
+        __m128 normalx = _mm_load_ps(&sourceData->normal[0][i]);
+        __m128 normaly = _mm_load_ps(&sourceData->normal[1][i]);
+        __m128 normalz = _mm_load_ps(&sourceData->normal[2][i]);
+        __m128 dx = _mm_load_ps(&shadowData->direction[0][i]);
+        __m128 dy = _mm_load_ps(&shadowData->direction[1][i]);
+        __m128 dz = _mm_load_ps(&shadowData->direction[2][i]);
+        __m128 dir = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, normalx), 
_mm_mul_ps(dy, normaly)), _mm_mul_ps(dz, normalz));
+
+        __m128 mask = _mm_cmple_ps(dir, _mm_setzero_ps());
+        _mm_store_ps((float*)&shadowData->hitMatl[i], mask);
+        if(_mm_movemask_ps(mask) != 0xf){
+          // Mask is inverted to make our life easier.  1 means do not 
compute shadow ray
+
+          // Normalize and compute length.
+
+          __m128 length2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, dx), 
_mm_mul_ps(dy, dy)), _mm_mul_ps(dz, dz));
+          __m128 inv_length = _mm_rsqrt_ps(length2);
+          inv_length = _mm_mul_ps(_mm_mul_ps(inv_length, 
_mm_sub_ps(_mm_set1_ps(3.f), _mm_mul_ps(length2, _mm_mul_ps(inv_length, 
inv_length)))), _mm_set1_ps(0.5f));
+          __m128 length = _mm_rcp_ps(inv_length);
+          length = _mm_mul_ps(length, _mm_sub_ps(_mm_set1_ps(2.f), 
_mm_mul_ps(inv_length, length)));
+       
+          // Populate the shadow ray.
+          _mm_store_ps(&shadowData->direction[0][i], _mm_mul_ps(dx, 
inv_length));
+          _mm_store_ps(&shadowData->direction[1][i], _mm_mul_ps(dy, 
inv_length));
+          _mm_store_ps(&shadowData->direction[2][i], _mm_mul_ps(dz, 
inv_length));
+          _mm_store_ps(&shadowData->origin[0][i], 
_mm_load_ps(&sourceData->hitPosition[0][i]));
+          _mm_store_ps(&shadowData->origin[1][i], 
_mm_load_ps(&sourceData->hitPosition[1][i]));
+          _mm_store_ps(&shadowData->origin[2][i], 
_mm_load_ps(&sourceData->hitPosition[2][i]));
+
+          // Reset the hits
+          // We want length if mask is false, or -maxt if mask is true
+          __m128 combo = _mm_or_ps(_mm_andnot_ps(mask, length), 
_mm_and_ps(_mm_set1_ps(-MAXT), mask));
+          _mm_store_ps(&shadowData->minT[i], combo);
+          last = i+3;
+        } else {
+          _mm_store_ps(&shadowData->minT[i], _mm_set1_ps(-MAXT));
+        }
+      }
+      for(;i<sourceRays.rayEnd;i++){
+        Vector dir = shadowRays.getDirection(i);
+        if(Dot(dir, sourceRays.getNormal(i)) > 0) {
+        
+          // If so normalize and compute length.
+          Real length = dir.normalize();
+       
+          // Populate the shadow ray.
+          shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
+          shadowRays.resetHit(i, length );
+          last = i;
+        } else {
+          shadowRays.maskRay(i);
+        }
+      }
+    }
+#else
     for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
       // Check to see if the light is on the front face.
-      Vector dir = lightDirections[i];
+      Vector dir = shadowRays.getDirection(i);
       if(Dot(dir, sourceRays.getNormal(i)) > 0) {
         
         // If so normalize and compute length.
@@ -59,13 +156,13 @@
        
         // Populate the shadow ray.
         shadowRays.setRay(i, sourceRays.getHitPosition(i), dir );
-        shadowRays.setColor(i, lightColors[i]);
         shadowRays.resetHit(i, length );
         last = i;
       } else {
         shadowRays.maskRay(i);
       }
     }
+#endif
     j++;
   } while(last == -1 && j < nlights);
       

Modified: trunk/Engine/Shadows/NoShadows.cc
==============================================================================
--- trunk/Engine/Shadows/NoShadows.cc   (original)
+++ trunk/Engine/Shadows/NoShadows.cc   Tue May  9 15:13:46 2006
@@ -38,13 +38,11 @@
   // Compute the contribution for this light.
   int last = -1;
   do {
-    Color lightColors[RayPacket::MaxSize];
-    Vector lightDirections[RayPacket::MaxSize];
-    lights->getLight(j)->computeLight( lightColors, lightDirections, 
context, sourceRays);
+    lights->getLight(j)->computeLight(shadowRays, context, sourceRays);
 
     for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
       // Check to see if the light is on the front face.
-      Vector dir = lightDirections[i];
+      Vector dir = shadowRays.getDirection(i);
       if(Dot(dir, sourceRays.getNormal(i)) > 0) {
         
         // If so normalize and compute length.
@@ -52,7 +50,6 @@
        
         // Populate the direction and color only
         shadowRays.setDirection(i, dir );
-        shadowRays.setColor(i, lightColors[i]);
         shadowRays.resetHit(i);
         last = i;
       } else {

Modified: trunk/Interface/Light.h
==============================================================================
--- trunk/Interface/Light.h     (original)
+++ trunk/Interface/Light.h     Tue May  9 15:13:46 2006
@@ -16,14 +16,13 @@
     Light();
     virtual ~Light();
                
-    virtual void preprocess( const PreprocessContext& context ) = 0;
+    virtual void preprocess(const PreprocessContext& context ) = 0;
 
     // This method is called on the light by the shadow algorithm. The color 
and direction 
     // produced by the light may change for each ray in the packet, and may 
change based 
     // on the render context.
-    virtual void computeLight( Color  resultColor[RayPacket::MaxSize], 
-                               Vector lightDirection[RayPacket::MaxSize], 
-                               const RenderContext &context, RayPacket 
&rays) const = 0;
+    virtual void computeLight(RayPacket& destRays, const RenderContext 
&context,
+                              RayPacket& sourceRays) const = 0;
 
   private:
     // Lights may not be copied.

Modified: trunk/Interface/Packet.h
==============================================================================
--- trunk/Interface/Packet.h    (original)
+++ trunk/Interface/Packet.h    Tue May  9 15:13:46 2006
@@ -4,6 +4,7 @@
 
 #include <RayPacketParameters.h>
 #include <Core/Color/Color.h>
+#include <Core/Geometry/Vector.h>
 #include <Core/Util/Align.h>
 
 namespace Manta {
@@ -38,6 +39,22 @@
       colordata[0][idx] = value[0];
       colordata[1][idx] = value[1];
       colordata[2][idx] = value[2];
+    }
+  };
+  template<>
+    class MANTA_ALIGN(16) Packet<Vector> {
+  public:
+    enum {
+      MaxSize = RAYPACKET_MAXSIZE
+    };
+    MANTA_ALIGN(16) Real vectordata[3][MaxSize];
+    Vector get(int idx) const {
+      return Vector(vectordata[0][idx], vectordata[1][idx], 
vectordata[2][idx]);
+    }
+    void set(int idx, const Vector& value) {
+      vectordata[0][idx] = value[0];
+      vectordata[1][idx] = value[1];
+      vectordata[2][idx] = value[2];
     }
   };
 }

Modified: trunk/Interface/RayPacket.cc
==============================================================================
--- trunk/Interface/RayPacket.cc        (original)
+++ trunk/Interface/RayPacket.cc        Tue May  9 15:13:46 2006
@@ -10,6 +10,61 @@
     return;
 
   if(flags & HaveHitRecords){
+#ifdef MANTA_SSE
+    int b = (rayBegin + 3) & (~3);
+    int e = rayEnd & (~3);
+    if(b == e){
+      for(int i=rayBegin;i<rayEnd;i++){
+        Real sum = 0;
+        for(int j=0;j<3;j++)
+          sum += data->direction[j][i] * data->direction[j][i];
+        Real length = SCIRun::Sqrt(sum);
+        if(data->hitMatl[i] != 0)
+          data->minT[i] *= length;
+        Real scale = 1/length;
+        for(int j=0;j<3;j++)
+          data->direction[j][i] *= scale;
+      }
+    } else {
+      int i = rayBegin;
+      for(;i<b;i++){
+        Real sum = 0;
+        for(int j=0;j<3;j++)
+          sum += data->direction[j][i] * data->direction[j][i];
+        Real length = SCIRun::Sqrt(sum);
+        if(data->hitMatl[i] != 0)
+          data->minT[i] *= length;
+        Real scale = 1/length;
+        for(int j=0;j<3;j++)
+          data->direction[j][i] *= scale;
+      }
+      for(;i<e;i+=4){
+        __m128 xd = _mm_load_ps(&data->direction[0][i]);
+        __m128 yd = _mm_load_ps(&data->direction[1][i]);
+        __m128 zd = _mm_load_ps(&data->direction[2][i]);
+        __m128 sum = _mm_add_ps(_mm_add_ps(_mm_mul_ps(xd, xd), 
_mm_mul_ps(yd, yd)), _mm_mul_ps(zd, zd));
+        __m128 scale =  _mm_rsqrt_ps(sum);
+        // Do one newton-raphson iteration to get the accuracy we need
+        scale = _mm_mul_ps(_mm_mul_ps(scale, _mm_sub_ps(_mm_set1_ps(3.f), 
_mm_mul_ps(sum, _mm_mul_ps(scale, scale)))), _mm_set1_ps(0.5f));
+        _mm_store_ps(&data->direction[0][i], _mm_mul_ps(xd, scale));
+        _mm_store_ps(&data->direction[1][i], _mm_mul_ps(yd, scale));
+        _mm_store_ps(&data->direction[2][i], _mm_mul_ps(zd, scale));
+
+        _mm_store_ps(&data->minT[i], _mm_div_ps(_mm_load_ps(&data->minT[i]), 
scale));
+      }
+      for(;i<rayEnd;i++){
+        Real sum = 0;
+        for(int j=0;j<3;j++)
+          sum += data->direction[j][i] * data->direction[j][i];
+        Real length = SCIRun::Sqrt(sum);
+        if(data->hitMatl[i] != 0)
+          data->minT[i] *= length;
+        Real scale = 1/length;
+        for(int j=0;j<3;j++)
+          data->direction[j][i] *= scale;
+      }
+    }
+#else
     for(int i=rayBegin;i<rayEnd;i++){
       Real sum = 0;
       for(int j=0;j<3;j++)
@@ -21,6 +76,7 @@
       for(int j=0;j<3;j++)
         data->direction[j][i] *= scale;
     }
+#endif
   } else {
 #ifdef MANTA_SSE
     int b = (rayBegin + 3) & (~3);
@@ -79,3 +135,41 @@
   flags |= NormalizedDirections;
   flags &= ~HaveInverseDirections;
 }
+
+
+void RayPacket::actualComputeHitPositions()
+{
+#ifdef MANTA_SSE
+    int b = (rayBegin + 3) & (~3);
+    int e = rayEnd & (~3);
+    if(b == e){
+      for(int i = begin(); i < end(); i++){
+        for(int j=0;j<3;j++)
+          data->hitPosition[j][i] = data->origin[j][i] + 
data->direction[j][i] * data->minT[i];
+      }
+    } else {
+      int i = rayBegin;
+      for(;i<b;i++){
+        for(int j=0;j<3;j++)
+          data->hitPosition[j][i] = data->origin[j][i] + 
data->direction[j][i] * data->minT[i];
+      }
+      for(;i<e;i+=4){
+        __m128 minT = _mm_load_ps(&data->minT[i]);
+        _mm_store_ps(&data->hitPosition[0][i], 
_mm_add_ps(_mm_load_ps(&data->origin[0][i]), 
_mm_mul_ps(_mm_load_ps(&data->direction[0][i]), minT)));
+        _mm_store_ps(&data->hitPosition[1][i], 
_mm_add_ps(_mm_load_ps(&data->origin[1][i]), 
_mm_mul_ps(_mm_load_ps(&data->direction[1][i]), minT)));
+        _mm_store_ps(&data->hitPosition[2][i], 
_mm_add_ps(_mm_load_ps(&data->origin[2][i]), 
_mm_mul_ps(_mm_load_ps(&data->direction[2][i]), minT)));
+      }
+      for(;i<rayEnd;i++){
+        for(int j=0;j<3;j++)
+          data->hitPosition[j][i] = data->origin[j][i] + 
data->direction[j][i] * data->minT[i];
+      }
+    }
+#else
+    for(int i = rayBegin; i < rayEnd; i++){
+      for(int j=0;j<3;j++)
+        data->hitPosition[j][i] = data->origin[j][i] + data->direction[j][i] 
* data->minT[i];
+    }
+#endif
+  flags |= HaveHitPositions;
+}
+

Modified: trunk/Interface/RayPacket.h
==============================================================================
--- trunk/Interface/RayPacket.h (original)
+++ trunk/Interface/RayPacket.h Tue May  9 15:13:46 2006
@@ -521,14 +521,10 @@
     {
       if(flags & HaveHitPositions)
         return;
-      for(int i=rayBegin;i<rayEnd;i++) {
-        for(int j=0;j<3;j++)
-          data->hitPosition[j][i] = data->origin[j][i] + 
data->direction[j][i] * data->minT[i];
-      }
-      flags |= HaveHitPositions;
+      
+      actualComputeHitPositions();
     }
 
-
     // Scratchpad isn't quite "vertical" yet...
     template<class T> T& scratchpad(int which) {
 
@@ -556,6 +552,7 @@
 
   private:
     void actualNormalizeDirections();
+    void actualComputeHitPositions();
 
     // Prevent accidental copying of RayPackets
     RayPacket(const RayPacket&);

Modified: trunk/Model/Lights/PointLight.cc
==============================================================================
--- trunk/Model/Lights/PointLight.cc    (original)
+++ trunk/Model/Lights/PointLight.cc    Tue May  9 15:13:46 2006
@@ -1,5 +1,6 @@
 
 #include <Model/Lights/PointLight.h>
+#include <MantaSSE.h>
 
 using namespace Manta;
 
@@ -16,14 +17,43 @@
 {
 }
 
-void PointLight::computeLight( Color  resultColor[RayPacket::MaxSize],
-                               Vector lightDirection[RayPacket::MaxSize],
-                               const RenderContext &context,
-                               RayPacket &rays) const
+void PointLight::computeLight(RayPacket& destRays, const RenderContext 
&context,
+                              RayPacket& sourceRays) const
 {
-  rays.computeHitPositions();
-  for(int i = rays.begin(); i < rays.end(); i++){
-    resultColor[i] = color;
-    lightDirection[i] = position - rays.getHitPosition(i);
-  }
+  sourceRays.computeHitPositions();
+#ifdef MANTA_SSE
+    int b = (sourceRays.rayBegin + 3) & (~3);
+    int e = sourceRays.rayEnd & (~3);
+    if(b == e){
+      for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
+        destRays.setColor(i, color);
+        destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+      }
+    } else {
+      int i = sourceRays.rayBegin;
+      for(;i<b;i++){
+        destRays.setColor(i, color);
+        destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+      }
+      RayPacketData* sourceData = sourceRays.data;
+      RayPacketData* destData = destRays.data;
+      for(;i<e;i+=4){
+        _mm_store_ps(&destData->color[0][i], _mm_set1_ps(color[0]));
+        _mm_store_ps(&destData->color[1][i], _mm_set1_ps(color[1]));
+        _mm_store_ps(&destData->color[2][i], _mm_set1_ps(color[2]));
+        _mm_store_ps(&destData->direction[0][i], 
_mm_sub_ps(_mm_set1_ps(position[0]), 
_mm_load_ps(&sourceData->hitPosition[0][i])));
+        _mm_store_ps(&destData->direction[1][i], 
_mm_sub_ps(_mm_set1_ps(position[1]), 
_mm_load_ps(&sourceData->hitPosition[1][i])));
+        _mm_store_ps(&destData->direction[2][i], 
_mm_sub_ps(_mm_set1_ps(position[2]), 
_mm_load_ps(&sourceData->hitPosition[2][i])));
+      }
+      for(;i<sourceRays.rayEnd;i++){
+        destRays.setColor(i, color);
+        destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+      }
+    }
+#else
+    for(int i = sourceRays.begin(); i < sourceRays.end(); i++){
+      destRays.setColor(i, color);
+      destRays.setDirection(i, position - sourceRays.getHitPosition(i));
+    }
+#endif
 }

Modified: trunk/Model/Lights/PointLight.h
==============================================================================
--- trunk/Model/Lights/PointLight.h     (original)
+++ trunk/Model/Lights/PointLight.h     Tue May  9 15:13:46 2006
@@ -14,10 +14,8 @@
 
     virtual void preprocess(const PreprocessContext&);
 
-    virtual void computeLight( Color  resultColor[RayPacket::MaxSize],
-                               Vector lightDirection[RayPacket::MaxSize],
-                               const RenderContext &context,
-                               RayPacket &rays) const;
+    virtual void computeLight(RayPacket& rays, const RenderContext &context,
+                              RayPacket& source) const;
   private:
     Vector position;
     Color color;

Modified: trunk/Model/Materials/Phong.cc
==============================================================================
--- trunk/Model/Materials/Phong.cc      (original)
+++ trunk/Model/Materials/Phong.cc      Tue May  9 15:13:46 2006
@@ -51,20 +51,20 @@
 
 Phong::Phong(const Color& diffuse, const Color& specular,
              int specpow, ColorComponent refl)
-  : specpow(specpow/2)
+  : specpow(specpow)
 {
   diffusetex = new Constant<Color>(diffuse);
   speculartex = new Constant<Color>(specular);
   refltex = new Constant<ColorComponent>(refl);
   do_refl = (refl != 0);
-  highlight_threshold = pow(COLOR_EPSILON, 1./(2*specpow));
+  highlight_threshold = pow(COLOR_EPSILON, 1./specpow);
 }
 
 Phong::Phong(const Texture<Color>* diffusetex,
              const Texture<Color>* speculartex,
              int specpow, const Texture<ColorComponent>* refltex)
   : diffusetex(diffusetex), speculartex(speculartex), refltex(refltex),
-    specpow(specpow/2)
+    specpow(specpow)
 {
   do_refl=true;
   if (refltex) {
@@ -75,6 +75,7 @@
   } else {
     do_refl = false;
   }
+  highlight_threshold = pow(COLOR_EPSILON, 1./specpow);
 }
 
 Phong::~Phong()
@@ -147,8 +148,10 @@
           Vector H = shadowdir-dir;
           ColorComponent cos_alpha = Dot(H, normal);
           if(cos_alpha > highlight_threshold){
-            Color::ComponentType length = H.length2();
-            Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length, 
specpow);
+            Color::ComponentType length2 = H.length2();
+            Color::ComponentType inv_length;
+            _mm_store_ss(&inv_length, _mm_rsqrt_ss(_mm_set_ss(length2)));
+            Color::ComponentType scale = ipow(cos_alpha*inv_length, specpow);
             for(int k=0;k<Color::NumComponents;k++)
               specularLight[k][i] += light[k] * scale;
           }
@@ -169,8 +172,10 @@
           Vector H = shadowdir-dir;
           ColorComponent cos_alpha = Dot(H, normal);
           if(cos_alpha > highlight_threshold){
-            Color::ComponentType length = H.length2();
-            Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length, 
specpow);
+            Color::ComponentType length2 = H.length2();
+            Color::ComponentType inv_length;
+            _mm_store_ss(&inv_length, _mm_rsqrt_ss(_mm_set_ss(length2)));
+            Color::ComponentType scale = ipow(cos_alpha*inv_length, specpow);
             for(int k=0;k<Color::NumComponents;k++)
               specularLight[k][i] += light[k] * scale;
           }
@@ -213,9 +218,10 @@
         if(_mm_movemask_ps(mask) == 0)
           continue;
 
-        __m128 length = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Hx, Hx), 
_mm_mul_ps(Hy, Hy)), _mm_mul_ps(Hz, Hz));
+        __m128 length2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Hx, Hx), 
_mm_mul_ps(Hy, Hy)), _mm_mul_ps(Hz, Hz));
+        __m128 inv_length = _mm_rsqrt_ps(length2);
         __m128 scale = _mm_set1_ps(1.0f);
-        __m128 x = _mm_div_ps(_mm_mul_ps(cos_alpha, cos_alpha), length);
+        __m128 x = _mm_mul_ps(cos_alpha, inv_length);
         int p = specpow;
         while(p){
           if(p&1){
@@ -248,8 +254,10 @@
           Vector H = shadowdir-dir;
           ColorComponent cos_alpha = Dot(H, normal);
           if(cos_alpha > highlight_threshold){
-            Color::ComponentType length = H.length2();
-            Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length, 
specpow);
+            Color::ComponentType length2 = H.length2();
+            Color::ComponentType inv_length;
+            _mm_store_ss(&inv_length, _mm_rsqrt_ss(_mm_set_ss(length2)));
+            Color::ComponentType scale = ipow(cos_alpha*inv_length, specpow);
             for(int k=0;k<Color::NumComponents;k++)
               specularLight[k][i] += light[k] * scale;
           }
@@ -270,8 +278,8 @@
         Vector H = shadowdir-dir;
         ColorComponent cos_alpha = Dot(H, normal);
         if(cos_alpha > phong_threshold){
-          Color::ComponentType length = H.length2();
-          Color::ComponentType scale = ipow(cos_alpha*cos_alpha/length, 
specpow);
+          Color::ComponentType length = H.length();
+          Color::ComponentType scale = ipow(cos_alpha/length, specpow);
           for(int k=0;k<Color::NumComponents;k++)
             specularLight[k][i] += light[k] * scale;
         }

Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc     (original)
+++ trunk/Model/Primitives/Parallelogram.cc     Tue May  9 15:13:46 2006
@@ -109,7 +109,7 @@
         __m128 dy = _mm_load_ps(&data->direction[1][i]);
         __m128 dz = _mm_load_ps(&data->direction[2][i]);
         __m128 dt = _mm_add_ps(_mm_add_ps(_mm_mul_ps(dx, normalx), 
_mm_mul_ps(dy, normaly)), _mm_mul_ps(dz, normalz));
-        
+
         //if(Abs(dt) < (Real)1.e-6)
         //continue;
         __m128 t = _mm_div_ps(vec_num, dt);

Modified: trunk/Model/Primitives/Sphere.cc
==============================================================================
--- trunk/Model/Primitives/Sphere.cc    (original)
+++ trunk/Model/Primitives/Sphere.cc    Tue May  9 15:13:46 2006
@@ -5,6 +5,7 @@
 #include <Core/Math/MiscMath.h>
 #include <Core/Math/Trig.h>
 #include <Core/Math/Expon.h>
+#include <MantaSSE.h>
 
 using namespace Manta;
 using namespace SCIRun;
@@ -105,6 +106,118 @@
     break;
   case RayPacket::NormalizedDirections:
     {
+#ifdef MANTA_SSE
+      int b = (rays.rayBegin + 3) & (~3);
+      int e = rays.rayEnd & (~3);
+      if(b == e){
+        for(int i = rays.begin(); i < rays.end(); i++){
+          Vector O(rays.getOrigin(i)-center);
+          Vector D(rays.getDirection(i));
+          Real B = Dot(O, D);
+          Real C = Dot(O, O) - radius*radius;
+          Real disc = B*B-C;
+          if(disc >= 0){
+            Real r = Sqrt(disc);
+            Real t0 = -(r+B);
+            if(t0 > T_EPSILON){
+              rays.hit(i, t0, getMaterial(), this, getTexCoordMapper());
+            } else {
+              Real t1 = r-B;
+              rays.hit(i, t1, getMaterial(), this, getTexCoordMapper());
+            }
+          }
+        }
+      } else {
+        int i = rays.rayBegin;
+        for(;i<b;i++){
+          Vector O(rays.getOrigin(i)-center);
+          Vector D(rays.getDirection(i));
+          Real B = Dot(O, D);
+          Real C = Dot(O, O) - radius*radius;
+          Real disc = B*B-C;
+          if(disc >= 0){
+            Real r = Sqrt(disc);
+            Real t0 = -(r+B);
+            if(t0 > T_EPSILON){
+              rays.hit(i, t0, getMaterial(), this, getTexCoordMapper());
+            } else {
+              Real t1 = r-B;
+              rays.hit(i, t1, getMaterial(), this, getTexCoordMapper());
+            }
+          }
+        }
+        RayPacketData* data = rays.data;
+        for(;i<e;i+=4){
+          __m128 Ox = _mm_sub_ps(_mm_load_ps(&data->origin[0][i]), 
_mm_set1_ps(center[0]));
+          __m128 Oy = _mm_sub_ps(_mm_load_ps(&data->origin[1][i]), 
_mm_set1_ps(center[1]));
+          __m128 Oz = _mm_sub_ps(_mm_load_ps(&data->origin[2][i]), 
_mm_set1_ps(center[2]));
+          __m128 Dx = _mm_load_ps(&data->direction[0][i]);
+          __m128 Dy = _mm_load_ps(&data->direction[1][i]);
+          __m128 Dz = _mm_load_ps(&data->direction[2][i]);
+          __m128 B = _mm_add_ps(_mm_add_ps(_mm_mul_ps(Ox, Dx), 
_mm_mul_ps(Oy, Dy)), _mm_mul_ps(Oz, Dz));
+          __m128 C = _mm_sub_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(Ox, Ox), 
_mm_mul_ps(Oy, Oy)), _mm_mul_ps(Oz, Oz)), _mm_set1_ps(radius*radius));
+          __m128 disc = _mm_sub_ps(_mm_mul_ps(B, B), C);
+          __m128 hit = _mm_cmpge_ps(disc, _mm_setzero_ps());
+          if(_mm_movemask_ps(hit) == 0)
+            continue;
+
+          __m128 r = _mm_sqrt_ps(disc);
+          // -(r+B)   The xor negates the value
+          __m128 t0 = _mm_xor_ps(_mm_add_ps(r, B), 
(__m128)_mm_set1_epi32(0x80000000));
+          __m128 hit0 = _mm_and_ps(hit, _mm_cmpgt_ps(t0, 
_mm_set1_ps(T_EPSILON)));
+          if(_mm_movemask_ps(hit0) != 0){
+            hit0 = _mm_and_ps(hit, _mm_cmplt_ps(t0, 
_mm_load_ps(&data->minT[i])));
+            if(_mm_movemask_ps(hit0) == 15){
+              _mm_store_ps(&data->minT[i], t0);
+              _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
+              _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
+              _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+            } else if(_mm_movemask_ps(hit0) != 0) {
+              _mm_maskmoveu_si128((__m128i)t0, (__m128i)hit0, 
(char*)&data->minT[i]);
+              _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit0, (char*)&data->hitMatl[i]);
+              _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit0, 
(char*)&data->hitPrim[i]);
+              _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit0, (char*)&data->hitTex[i]);
+            }
+            // Mask off rays that successfully hit at t0
+            hit = _mm_andnot_ps(hit, hit0);
+            if(_mm_movemask_ps(hit) == 0)
+              continue;
+          }
+
+          __m128 t1 = _mm_sub_ps(r, B);
+          __m128 hit1 = _mm_and_ps(hit, _mm_cmpgt_ps(t1, 
_mm_set1_ps(T_EPSILON)));
+          hit1 = _mm_and_ps(hit1, _mm_cmplt_ps(t1, 
_mm_load_ps(&data->minT[i])));
+          if(_mm_movemask_ps(hit1) == 15){
+            _mm_store_ps(&data->minT[i], t1);
+            _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
+            _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
+            _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
+          } else if(_mm_movemask_ps(hit1) != 0){
+            _mm_maskmoveu_si128((__m128i)t1, (__m128i)hit1, 
(char*)&data->minT[i]);
+            _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)hit1, (char*)&data->hitMatl[i]);
+            _mm_maskmoveu_si128(_mm_set1_epi32((int)this), (__m128i)hit1, 
(char*)&data->hitPrim[i]);
+            _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)hit1, (char*)&data->hitTex[i]);
+          }
+        }
+        for(;i<rays.rayEnd;i++){
+          Vector O(rays.getOrigin(i)-center);
+          Vector D(rays.getDirection(i));
+          Real B = Dot(O, D);
+          Real C = Dot(O, O) - radius*radius;
+          Real disc = B*B-C;
+          if(disc >= 0){
+            Real r = Sqrt(disc);
+            Real t0 = -(r+B);
+            if(t0 > T_EPSILON){
+              rays.hit(i, t0, getMaterial(), this, getTexCoordMapper());
+            } else {
+              Real t1 = r-B;
+              rays.hit(i, t1, getMaterial(), this, getTexCoordMapper());
+            }
+          }
+        }
+      }
+#else
       // Rays of non-constant origin and normalized directions
       for(int i = rays.begin();i<rays.end();i++){
         Vector O(rays.getOrigin(i)-center);
@@ -123,6 +236,7 @@
           }
         }
       }
+#endif
     }
     break;
   case 0:

Modified: trunk/Model/Textures/CheckerTexture.cc
==============================================================================
--- trunk/Model/Textures/CheckerTexture.cc      (original)
+++ trunk/Model/Textures/CheckerTexture.cc      Tue May  9 15:13:46 2006
@@ -1,4 +1,185 @@
 
 #include <Model/Textures/CheckerTexture.h>
+using namespace Manta;
+
+template<>
+void CheckerTexture<Color>::mapValues(Packet<Color>& results,
+                                      const RenderContext& context,
+                                      RayPacket& rays) const
+{
+    if(need_w)
+      rays.computeTextureCoordinates3(context);
+    else
+      rays.computeTextureCoordinates2(context);
+#ifdef MANTA_SSE
+    int b = (rays.rayBegin + 3) & (~3);
+    int e = rays.rayEnd & (~3);
+    if(b == e){
+      for(int i = rays.begin(); i < rays.end(); i++){
+        Real vv1 = Dot(rays.getTexCoords(i), v1);
+        Real vv2 = Dot(rays.getTexCoords(i), v2);
+        if(vv1<0)
+          vv1=-vv1+1;
+        if(vv2<0)
+          vv2=-vv2+1;
+        int i1 = (int)vv1;
+        int i2 = (int)vv2;
+        int which = (i1+i2)&1;
+        results.set(i, values[which]);
+      }
+    } else {
+      int i = rays.rayBegin;
+      for(;i<b;i++){
+        Real vv1 = Dot(rays.getTexCoords(i), v1);
+        Real vv2 = Dot(rays.getTexCoords(i), v2);
+        if(vv1<0)
+          vv1=-vv1+1;
+        if(vv2<0)
+          vv2=-vv2+1;
+        int i1 = (int)vv1;
+        int i2 = (int)vv2;
+        int which = (i1+i2)&1;
+        results.set(i, values[which]);
+      }
+      RayPacketData* data = rays.data;
+      // Set rounding modes to round toward -inf
+      int old_csr = _mm_getcsr();
+      _mm_setcsr((old_csr & ~_MM_ROUND_MASK) | _MM_ROUND_DOWN);
+      for(;i<e;i+=4){
+        __m128 tx = _mm_load_ps(&data->texCoords[0][i]);
+        __m128 ty = _mm_load_ps(&data->texCoords[1][i]);
+        __m128 tz = _mm_load_ps(&data->texCoords[2][i]);
+        __m128 vv1 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx, 
_mm_set1_ps(v1[0])), _mm_mul_ps(ty, _mm_set1_ps(v1[1]))), _mm_mul_ps(tz, 
_mm_set1_ps(v1[2])));
+        __m128 vv2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx, 
_mm_set1_ps(v2[0])), _mm_mul_ps(ty, _mm_set1_ps(v2[1]))), _mm_mul_ps(tz, 
_mm_set1_ps(v2[2])));
+        __m128i i1 = _mm_cvtps_epi32(vv1);
+        __m128i i2 = _mm_cvtps_epi32(vv2);
+        __m128i which = _mm_and_si128(_mm_add_epi32(i1, i2), 
_mm_set1_epi32(1));
+        __m128i mask = _mm_cmpeq_epi32(which, _mm_setzero_si128());
+        __m128 valuer = _mm_or_ps(_mm_and_ps((__m128)mask, 
_mm_set1_ps(values[0][0])),
+                                 _mm_andnot_ps((__m128)mask, 
_mm_set1_ps(values[1][0])));
+        _mm_store_ps(&results.colordata[0][i], valuer);
+        __m128 valueg = _mm_or_ps(_mm_and_ps((__m128)mask, 
_mm_set1_ps(values[0][1])),
+                                 _mm_andnot_ps((__m128)mask, 
_mm_set1_ps(values[1][1])));
+        _mm_store_ps(&results.colordata[1][i], valueg);
+        __m128 valueb = _mm_or_ps(_mm_and_ps((__m128)mask, 
_mm_set1_ps(values[0][2])),
+                                 _mm_andnot_ps((__m128)mask, 
_mm_set1_ps(values[1][2])));
+        _mm_store_ps(&results.colordata[2][i], valueb);
+      }
+      _mm_setcsr(old_csr);
+      for(;i<rays.rayEnd;i++){
+        Real vv1 = Dot(rays.getTexCoords(i), v1);
+        Real vv2 = Dot(rays.getTexCoords(i), v2);
+        if(vv1<0)
+          vv1=-vv1+1;
+        if(vv2<0)
+          vv2=-vv2+1;
+        int i1 = (int)vv1;
+        int i2 = (int)vv2;
+        int which = (i1+i2)&1;
+        results.set(i, values[which]);
+      }
+    }
+#else
+    for(int i = rays.begin(); i < rays.end(); i++){
+      Real vv1 = Dot(rays.getTexCoords(i), v1);
+      Real vv2 = Dot(rays.getTexCoords(i), v2);
+      if(vv1<0)
+        vv1=-vv1+1;
+      if(vv2<0)
+        vv2=-vv2+1;
+      int i1 = (int)vv1;
+      int i2 = (int)vv2;
+      int which = (i1+i2)&1;
+      results.set(i, values[which]);
+    }
+#endif
+}
+
+template<>
+void CheckerTexture<float>::mapValues(Packet<float>& results,
+                                      const RenderContext& context,
+                                      RayPacket& rays) const
+{
+    if(need_w)
+      rays.computeTextureCoordinates3(context);
+    else
+      rays.computeTextureCoordinates2(context);
+#ifdef MANTA_SSE
+    int b = (rays.rayBegin + 3) & (~3);
+    int e = rays.rayEnd & (~3);
+    if(b == e){
+      for(int i = rays.begin(); i < rays.end(); i++){
+        Real vv1 = Dot(rays.getTexCoords(i), v1);
+        Real vv2 = Dot(rays.getTexCoords(i), v2);
+        if(vv1<0)
+          vv1=-vv1+1;
+        if(vv2<0)
+          vv2=-vv2+1;
+        int i1 = (int)vv1;
+        int i2 = (int)vv2;
+        int which = (i1+i2)&1;
+        results.set(i, values[which]);
+      }
+    } else {
+      int i = rays.rayBegin;
+      for(;i<b;i++){
+        Real vv1 = Dot(rays.getTexCoords(i), v1);
+        Real vv2 = Dot(rays.getTexCoords(i), v2);
+        if(vv1<0)
+          vv1=-vv1+1;
+        if(vv2<0)
+          vv2=-vv2+1;
+        int i1 = (int)vv1;
+        int i2 = (int)vv2;
+        int which = (i1+i2)&1;
+        results.set(i, values[which]);
+      }
+      RayPacketData* data = rays.data;
+      // Set rounding modes to round toward -inf
+      int old_csr = _mm_getcsr();
+      _mm_setcsr((old_csr & ~_MM_ROUND_MASK) | _MM_ROUND_DOWN);
+      for(;i<e;i+=4){
+        __m128 tx = _mm_load_ps(&data->texCoords[0][i]);
+        __m128 ty = _mm_load_ps(&data->texCoords[1][i]);
+        __m128 tz = _mm_load_ps(&data->texCoords[2][i]);
+        __m128 vv1 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx, 
_mm_set1_ps(v1[0])), _mm_mul_ps(ty, _mm_set1_ps(v1[1]))), _mm_mul_ps(tz, 
_mm_set1_ps(v1[2])));
+        __m128 vv2 = _mm_add_ps(_mm_add_ps(_mm_mul_ps(tx, 
_mm_set1_ps(v2[0])), _mm_mul_ps(ty, _mm_set1_ps(v2[1]))), _mm_mul_ps(tz, 
_mm_set1_ps(v2[2])));
+        __m128i i1 = _mm_cvtps_epi32(vv1);
+        __m128i i2 = _mm_cvtps_epi32(vv2);
+        __m128i which = _mm_and_si128(_mm_add_epi32(i1, i2), 
_mm_set1_epi32(1));
+        __m128i mask = _mm_cmpeq_epi32(which, _mm_setzero_si128());
+        __m128 value = _mm_or_ps(_mm_and_ps((__m128)mask, 
_mm_set1_ps(values[0])),
+                                 _mm_andnot_ps((__m128)mask, 
_mm_set1_ps(values[1])));
+        _mm_store_ps(&results.data[i], value);
+      }
+      _mm_setcsr(old_csr);
+      for(;i<rays.rayEnd;i++){
+        Real vv1 = Dot(rays.getTexCoords(i), v1);
+        Real vv2 = Dot(rays.getTexCoords(i), v2);
+        if(vv1<0)
+          vv1=-vv1+1;
+        if(vv2<0)
+          vv2=-vv2+1;
+        int i1 = (int)vv1;
+        int i2 = (int)vv2;
+        int which = (i1+i2)&1;
+        results.set(i, values[which]);
+      }
+    }
+#else
+    for(int i = rays.begin(); i < rays.end(); i++){
+      Real vv1 = Dot(rays.getTexCoords(i), v1);
+      Real vv2 = Dot(rays.getTexCoords(i), v2);
+      if(vv1<0)
+        vv1=-vv1+1;
+      if(vv2<0)
+        vv2=-vv2+1;
+      int i1 = (int)vv1;
+      int i2 = (int)vv2;
+      int which = (i1+i2)&1;
+      results.set(i, values[which]);
+    }
+#endif
+}
 
 

Modified: trunk/Model/Textures/CheckerTexture.h
==============================================================================
--- trunk/Model/Textures/CheckerTexture.h       (original)
+++ trunk/Model/Textures/CheckerTexture.h       Tue May  9 15:13:46 2006
@@ -3,8 +3,10 @@
 #define Manta_Model_CheckerTexture_h
 
 #include <Interface/Texture.h>
+#include <Core/Color/Color.h>
 #include <Core/Geometry/Vector.h>
 #include <Interface/RayPacket.h>
+#include <MantaSSE.h>
 
 namespace Manta {
   class RayPacket;
@@ -49,7 +51,8 @@
   }
   
   template<class ValueType>
-    void CheckerTexture<ValueType>::mapValues(Packet<ValueType>& results, 
const RenderContext& context,
+    void CheckerTexture<ValueType>::mapValues(Packet<ValueType>& results,
+                                              const RenderContext& context,
                                               RayPacket& rays) const
   {
     if(need_w)
@@ -70,6 +73,16 @@
     }
   }
 
+#ifdef MANTA_SSE
+  template<>
+    void CheckerTexture<Color>::mapValues(Packet<Color>& results,
+                                          const RenderContext& context,
+                                          RayPacket& rays) const;
+  template<>
+    void CheckerTexture<float>::mapValues(Packet<float>& results,
+                                          const RenderContext& context,
+                                          RayPacket& rays) const;
+#endif
 
 }
 




  • [MANTA] r1051 - in trunk: Engine/Shadows Interface Model/Lights Model/Materials Model/Primitives Model/Textures, sparker, 05/09/2006

Archive powered by MHonArc 2.6.16.

Top of page