Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1029 - in trunk: Engine/PixelSamplers Model/Cameras


Chronological Thread 
  • From: sparker@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1029 - in trunk: Engine/PixelSamplers Model/Cameras
  • Date: Sat, 29 Apr 2006 21:48:44 -0600 (MDT)

Author: sparker
Date: Sat Apr 29 21:48:36 2006
New Revision: 1029

Modified:
   trunk/Engine/PixelSamplers/SingleSampler.cc
   trunk/Engine/PixelSamplers/SingleSampler.h
   trunk/Model/Cameras/PinholeCamera.cc
Log:
Sissify PinholeCamera and SingleSampler
On a macbook pro with -renderer raygen we now get 114 fps for 2 cpus and no 
image display and 58 fps with image display. 


Modified: trunk/Engine/PixelSamplers/SingleSampler.cc
==============================================================================
--- trunk/Engine/PixelSamplers/SingleSampler.cc (original)
+++ trunk/Engine/PixelSamplers/SingleSampler.cc Sat Apr 29 21:48:36 2006
@@ -4,6 +4,7 @@
 #include <Interface/Fragment.h>
 #include <Interface/RayPacket.h>
 #include <Interface/Renderer.h>
+#include <MantaSSE.h>
 using namespace Manta;
 
 PixelSampler* SingleSampler::create(const vector<string>& args)
@@ -37,6 +38,14 @@
   ci.yscale = ci.xscale;
   ci.xoffset = (-xres/(Real)2+(Real)0.5)*ci.xscale; // Offset to pixel center
   ci.yoffset = (-yres/(Real)2+(Real)0.5)*ci.yscale;
+#if MANTA_SSE
+  ci.vec_xscale = _mm_set1_ps(ci.xscale);
+  ci.vec_xscale4 = _mm_set1_ps(ci.xscale*4);
+  ci.vec_xscale_cascade = _mm_set_ps(ci.xscale*3, ci.xscale*2, ci.xscale, 0);
+  ci.vec_yscale = _mm_set1_ps(ci.yscale);
+  ci.vec_xoffset = _mm_set1_ps(ci.xoffset);
+  ci.vec_yoffset = _mm_set1_ps(ci.yoffset);
+#endif
   context.renderer->setupDisplayChannel(context);
 }
 
@@ -71,34 +80,70 @@
 
       // If so place each pixel in the ray packet relative to the first
       // fragment.
-      int b = fragment.begin();
+      int b = fragment.begin()+f;
       Real px = fragment.getX(b)*ci.xscale+ci.xoffset;
       Real py = fragment.getY(b)*ci.yscale+ci.yoffset;
       int eye = fragment.getWhichEye(b);
       
-      for(int i=0;i<size;i++){
+#if MANTA_SSE
+      __m128 vec_px = _mm_add_ps(_mm_set1_ps(px), ci.vec_xscale_cascade);
+      __m128 vec_py = _mm_set1_ps(py);
+      __m128i vec_eye = _mm_set1_epi32(eye);
+      RayPacketData* data = rays.data;
+      int e = size&(~3);
+      for(int i=0;i<e;i+=4){
+        _mm_store_si128((__m128i*)&data->whichEye[f+i], vec_eye);
+        _mm_store_ps(&data->image[0][f+i], vec_px);
+        _mm_store_ps(&data->image[1][f+i], vec_py);
+        vec_px = _mm_add_ps(vec_px, ci.vec_xscale4);
+      }
+#else
+      int e = 0;
+#endif
+      for(int i=e;i<size;i++){
         rays.setPixel(i, eye, px, py);
         px += ci.xscale;
       }
-
     }
 
     // Otherwise, set each pixel individually.
     else {
-      for(int i=0;i<size;i++){
-        Real px = fragment.getX(i)*ci.xscale+ci.xoffset;
-        Real py = fragment.getY(i)*ci.yscale+ci.yoffset;
-        rays.setPixel(i, fragment.getWhichEye(i), px, py);
+#if MANTA_SSE
+      RayPacketData* data = rays.data;
+      int e = size&(~3);
+      for(int i=0;i<e;i+=4) {
+        _mm_store_si128((__m128i*)&data->whichEye[f+i], 
_mm_load_si128((__m128i*)&fragment.whichEye[f+i]));
+        __m128 fx = 
_mm_cvtepi32_ps(_mm_load_si128((__m128i*)&fragment.pixel[0][f+i]));
+        _mm_store_ps(&data->image[0][f+i], _mm_add_ps(_mm_mul_ps(fx, 
ci.vec_xscale), ci.vec_xoffset));
+        __m128 fy = 
_mm_cvtepi32_ps(_mm_load_si128((__m128i*)&fragment.pixel[1][f+i]));
+        _mm_store_ps(&data->image[1][f+i], _mm_add_ps(_mm_mul_ps(fy, 
ci.vec_yscale), ci.vec_yoffset));
+      }
+#else
+      int e = 0;
+#endif
+      for(int i=e;i<size;i++){
+        Real px = fragment.getX(f+i)*ci.xscale+ci.xoffset;
+        Real py = fragment.getY(f+i)*ci.yscale+ci.yoffset;
+        rays.setPixel(i, fragment.getWhichEye(f+i), px, py);
       }
     }
     
     // Trace the rays.  The results will automatically go into the fragment
     context.renderer->traceEyeRays(context, rays);
 
-    for(int i=0;i<size;i++)
-    {
-        for ( int c = 0; c < Color::NumComponents; c++ )
-            fragment.color[c][i] = raydata.color[c][i];
+#if MANTA_SSE
+    int e = size&(~3);
+    for(int i=0;i<e;i+=4){
+      _mm_store_ps(&fragment.color[0][f+i], 
_mm_load_ps(&raydata.color[0][i]));
+      _mm_store_ps(&fragment.color[1][f+i], 
_mm_load_ps(&raydata.color[1][i]));
+      _mm_store_ps(&fragment.color[2][f+i], 
_mm_load_ps(&raydata.color[2][i]));
+    }
+#else
+    int e = 0;
+#endif
+    for(int i=e;i<size;i++) {
+      for ( int c = 0; c < Color::NumComponents; c++ )
+        fragment.color[c][f+i] = raydata.color[c][i];
     }
   }
 }

Modified: trunk/Engine/PixelSamplers/SingleSampler.h
==============================================================================
--- trunk/Engine/PixelSamplers/SingleSampler.h  (original)
+++ trunk/Engine/PixelSamplers/SingleSampler.h  Sat Apr 29 21:48:36 2006
@@ -4,6 +4,8 @@
 
 #include <MantaTypes.h>
 #include <Interface/PixelSampler.h>
+#include <Core/Util/Align.h>
+#include <MantaSSE.h>
 #include <sgi_stl_warnings_off.h>
 #include <string>
 #include <vector>
@@ -26,7 +28,15 @@
     SingleSampler(const SingleSampler&);
     SingleSampler& operator=(const SingleSampler&);
 
-    struct ChannelInfo {
+    struct MANTA_ALIGN(16) ChannelInfo {
+#ifdef MANTA_SSE
+      __m128 vec_xscale;
+      __m128 vec_xscale4;
+      __m128 vec_xscale_cascade;
+      __m128 vec_xoffset;
+      __m128 vec_yscale;
+      __m128 vec_yoffset;
+#endif
       Real xscale;
       Real xoffset;
       Real yscale;

Modified: trunk/Model/Cameras/PinholeCamera.cc
==============================================================================
--- trunk/Model/Cameras/PinholeCamera.cc        (original)
+++ trunk/Model/Cameras/PinholeCamera.cc        Sat Apr 29 21:48:36 2006
@@ -9,6 +9,7 @@
 #include <Core/Math/MiscMath.h>
 #include <Core/Math/Trig.h>
 #include <Core/Util/Assert.h>
+#include <MantaSSE.h>
 #include <iostream>
 
 using namespace Manta;
@@ -140,23 +141,153 @@
     }
     rays.setFlag(RayPacket::NormalizedDirections);
   } else {
-    Real u_vec[3] = { u[0], u[1], u[2] };
-    Real v_vec[3] = { v[0], v[1], v[2] };
-    Real d_vec[3] = { direction[0], direction[1], direction[2] };
-    
+#if MANTA_SSE
     RayPacketData* data = rays.data;
-    for(int i=rays.begin();i<rays.end();i++)
+    // Only enabled if the packet starts and ends on a multiple of 4
+    if(((rays.rayBegin | rays.rayEnd) & 0x3) == 0){
+#define VERSION2
+#ifdef VERSION0
+      RayPacketData* data = rays.data;
+      Real u_vec[3] = { u[0], u[1], u[2] };
+      Real v_vec[3] = { v[0], v[1], v[2] };
+      Real d_vec[3] = { direction[0], direction[1], direction[2] };
+    
+      for(int i=rays.begin();i<rays.end();i++)
       {
         data->origin[0][i] = eye.data[0];
         data->origin[1][i] = eye.data[1];
         data->origin[2][i] = eye.data[2];
-
+          
+        const Real u_coord = data->image[1][i];
+        const Real v_coord = data->image[0][i];
+        
+        data->direction[0][i] = d_vec[0] + u_coord * u_vec[0] + v_coord * 
v_vec[0];
+        data->direction[1][i] = d_vec[1] + u_coord * u_vec[1] + v_coord * 
v_vec[1];
+        data->direction[2][i] = d_vec[2] + u_coord * u_vec[2] + v_coord * 
v_vec[2];        
+      }
+#endif
+#ifdef VERSION1
+      __m128 eyex = _mm_set1_ps(eye.data[0]);
+      __m128 eyey = _mm_set1_ps(eye.data[1]);
+      __m128 eyez = _mm_set1_ps(eye.data[2]);
+      __m128 dirx = _mm_set1_ps(direction[0]);
+      __m128 diry = _mm_set1_ps(direction[1]);
+      __m128 dirz = _mm_set1_ps(direction[2]);
+      __m128 ux = _mm_set1_ps(u[0]);
+      __m128 uy = _mm_set1_ps(u[1]);
+      __m128 uz = _mm_set1_ps(u[2]);
+      __m128 vx = _mm_set1_ps(v[0]);
+      __m128 vy = _mm_set1_ps(v[1]);
+      __m128 vz = _mm_set1_ps(v[2]);
+      for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+        _mm_store_ps(&data->origin[0][i], eyex);
+        _mm_store_ps(&data->origin[1][i], eyey);
+        _mm_store_ps(&data->origin[2][i], eyez);
+
+        __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+        __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+        __m128 dx = _mm_add_ps(dirx, _mm_add_ps(_mm_mul_ps(u_coord, ux), 
_mm_mul_ps(v_coord, vx)));
+        _mm_store_ps(&data->direction[0][i], dx);
+        __m128 dy = _mm_add_ps(diry, _mm_add_ps(_mm_mul_ps(u_coord, uy), 
_mm_mul_ps(v_coord, vy)));
+        _mm_store_ps(&data->direction[1][i], dy);
+        __m128 dz = _mm_add_ps(dirz, _mm_add_ps(_mm_mul_ps(u_coord, uz), 
_mm_mul_ps(v_coord, vz)));
+        _mm_store_ps(&data->direction[2][i], dz);
+      }
+#endif
+#ifdef VERSION2
+      __m128 eyex = _mm_set1_ps(eye.data[0]);
+      __m128 eyey = _mm_set1_ps(eye.data[1]);
+      __m128 eyez = _mm_set1_ps(eye.data[2]);
+      for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+        _mm_store_ps(&data->origin[0][i], eyex);
+        _mm_store_ps(&data->origin[1][i], eyey);
+        _mm_store_ps(&data->origin[2][i], eyez);
+      }
+      __m128 dirx = _mm_set1_ps(direction[0]);
+      __m128 diry = _mm_set1_ps(direction[1]);
+      __m128 dirz = _mm_set1_ps(direction[2]);
+      __m128 ux = _mm_set1_ps(u[0]);
+      __m128 uy = _mm_set1_ps(u[1]);
+      __m128 uz = _mm_set1_ps(u[2]);
+      __m128 vx = _mm_set1_ps(v[0]);
+      __m128 vy = _mm_set1_ps(v[1]);
+      __m128 vz = _mm_set1_ps(v[2]);
+      for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+        __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+        __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+        __m128 dx = _mm_add_ps(dirx, _mm_add_ps(_mm_mul_ps(u_coord, ux), 
_mm_mul_ps(v_coord, vx)));
+        _mm_store_ps(&data->direction[0][i], dx);
+        __m128 dy = _mm_add_ps(diry, _mm_add_ps(_mm_mul_ps(u_coord, uy), 
_mm_mul_ps(v_coord, vy)));
+        _mm_store_ps(&data->direction[1][i], dy);
+        __m128 dz = _mm_add_ps(dirz, _mm_add_ps(_mm_mul_ps(u_coord, uz), 
_mm_mul_ps(v_coord, vz)));
+        _mm_store_ps(&data->direction[2][i], dz);
+      }
+#endif
+#ifdef VERSION3
+      __m128 eyex = _mm_set1_ps(eye.data[0]);
+      __m128 eyey = _mm_set1_ps(eye.data[1]);
+      __m128 eyez = _mm_set1_ps(eye.data[2]);
+      for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+        _mm_store_ps(&data->origin[0][i], eyex);
+        _mm_store_ps(&data->origin[1][i], eyey);
+        _mm_store_ps(&data->origin[2][i], eyez);
+      }
+      __m128 dirx = _mm_set1_ps(direction[0]);
+      __m128 ux = _mm_set1_ps(u[0]);
+      __m128 vx = _mm_set1_ps(v[0]);
+      for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+        __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+        __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+        __m128 dx = _mm_add_ps(dirx, _mm_add_ps(_mm_mul_ps(u_coord, ux), 
_mm_mul_ps(v_coord, vx)));
+        _mm_store_ps(&data->direction[0][i], dx);
+      }
+
+      __m128 diry = _mm_set1_ps(direction[1]);
+      __m128 uy = _mm_set1_ps(u[1]);
+      __m128 vy = _mm_set1_ps(v[1]);
+      for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+        __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+        __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+        __m128 dy = _mm_add_ps(diry, _mm_add_ps(_mm_mul_ps(u_coord, uy), 
_mm_mul_ps(v_coord, vy)));
+        _mm_store_ps(&data->direction[1][i], dy);
+      }
+
+      __m128 dirz = _mm_set1_ps(direction[2]);
+      __m128 uz = _mm_set1_ps(u[2]);
+      __m128 vz = _mm_set1_ps(v[2]);
+      for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+        __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+        __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+        __m128 dz = _mm_add_ps(dirz, _mm_add_ps(_mm_mul_ps(u_coord, uz), 
_mm_mul_ps(v_coord, vz)));
+        _mm_store_ps(&data->direction[2][i], dz);
+      }
+#endif
+    } else
+#endif
+    {
+      RayPacketData* data = rays.data;
+      Real u_vec[3] = { u[0], u[1], u[2] };
+      Real v_vec[3] = { v[0], v[1], v[2] };
+      Real d_vec[3] = { direction[0], direction[1], direction[2] };
+    
+      for(int i=rays.begin();i<rays.end();i++)
+      {
+        data->origin[0][i] = eye.data[0];
+        data->origin[1][i] = eye.data[1];
+        data->origin[2][i] = eye.data[2];
+          
         const Real u_coord = data->image[1][i];
         const Real v_coord = data->image[0][i];
         
         data->direction[0][i] = d_vec[0] + u_coord * u_vec[0] + v_coord * 
v_vec[0];
         data->direction[1][i] = d_vec[1] + u_coord * u_vec[1] + v_coord * 
v_vec[1];
         data->direction[2][i] = d_vec[2] + u_coord * u_vec[2] + v_coord * 
v_vec[2];        
+      }
     }
   }
 }




  • [MANTA] r1029 - in trunk: Engine/PixelSamplers Model/Cameras, sparker, 04/29/2006

Archive powered by MHonArc 2.6.16.

Top of page