Text archives Help
- From: sparker@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1029 - in trunk: Engine/PixelSamplers Model/Cameras
- Date: Sat, 29 Apr 2006 21:48:44 -0600 (MDT)
Author: sparker
Date: Sat Apr 29 21:48:36 2006
New Revision: 1029
Modified:
trunk/Engine/PixelSamplers/SingleSampler.cc
trunk/Engine/PixelSamplers/SingleSampler.h
trunk/Model/Cameras/PinholeCamera.cc
Log:
Sissify PinholeCamera and SingleSampler
On a macbook pro with -renderer raygen we now get 114 fps for 2 cpus and no
image display and 58 fps with image display.
Modified: trunk/Engine/PixelSamplers/SingleSampler.cc
==============================================================================
--- trunk/Engine/PixelSamplers/SingleSampler.cc (original)
+++ trunk/Engine/PixelSamplers/SingleSampler.cc Sat Apr 29 21:48:36 2006
@@ -4,6 +4,7 @@
#include <Interface/Fragment.h>
#include <Interface/RayPacket.h>
#include <Interface/Renderer.h>
+#include <MantaSSE.h>
using namespace Manta;
PixelSampler* SingleSampler::create(const vector<string>& args)
@@ -37,6 +38,14 @@
ci.yscale = ci.xscale;
ci.xoffset = (-xres/(Real)2+(Real)0.5)*ci.xscale; // Offset to pixel center
ci.yoffset = (-yres/(Real)2+(Real)0.5)*ci.yscale;
+#if MANTA_SSE
+ ci.vec_xscale = _mm_set1_ps(ci.xscale);
+ ci.vec_xscale4 = _mm_set1_ps(ci.xscale*4);
+ ci.vec_xscale_cascade = _mm_set_ps(ci.xscale*3, ci.xscale*2, ci.xscale, 0);
+ ci.vec_yscale = _mm_set1_ps(ci.yscale);
+ ci.vec_xoffset = _mm_set1_ps(ci.xoffset);
+ ci.vec_yoffset = _mm_set1_ps(ci.yoffset);
+#endif
context.renderer->setupDisplayChannel(context);
}
@@ -71,34 +80,70 @@
// If so place each pixel in the ray packet relative to the first
// fragment.
- int b = fragment.begin();
+ int b = fragment.begin()+f;
Real px = fragment.getX(b)*ci.xscale+ci.xoffset;
Real py = fragment.getY(b)*ci.yscale+ci.yoffset;
int eye = fragment.getWhichEye(b);
- for(int i=0;i<size;i++){
+#if MANTA_SSE
+ __m128 vec_px = _mm_add_ps(_mm_set1_ps(px), ci.vec_xscale_cascade);
+ __m128 vec_py = _mm_set1_ps(py);
+ __m128i vec_eye = _mm_set1_epi32(eye);
+ RayPacketData* data = rays.data;
+ int e = size&(~3);
+ for(int i=0;i<e;i+=4){
+ _mm_store_si128((__m128i*)&data->whichEye[f+i], vec_eye);
+ _mm_store_ps(&data->image[0][f+i], vec_px);
+ _mm_store_ps(&data->image[1][f+i], vec_py);
+ vec_px = _mm_add_ps(vec_px, ci.vec_xscale4);
+ }
+#else
+ int e = 0;
+#endif
+ for(int i=e;i<size;i++){
rays.setPixel(i, eye, px, py);
px += ci.xscale;
}
-
}
// Otherwise, set each pixel individually.
else {
- for(int i=0;i<size;i++){
- Real px = fragment.getX(i)*ci.xscale+ci.xoffset;
- Real py = fragment.getY(i)*ci.yscale+ci.yoffset;
- rays.setPixel(i, fragment.getWhichEye(i), px, py);
+#if MANTA_SSE
+ RayPacketData* data = rays.data;
+ int e = size&(~3);
+ for(int i=0;i<e;i+=4) {
+ _mm_store_si128((__m128i*)&data->whichEye[f+i],
_mm_load_si128((__m128i*)&fragment.whichEye[f+i]));
+ __m128 fx =
_mm_cvtepi32_ps(_mm_load_si128((__m128i*)&fragment.pixel[0][f+i]));
+ _mm_store_ps(&data->image[0][f+i], _mm_add_ps(_mm_mul_ps(fx,
ci.vec_xscale), ci.vec_xoffset));
+ __m128 fy =
_mm_cvtepi32_ps(_mm_load_si128((__m128i*)&fragment.pixel[1][f+i]));
+ _mm_store_ps(&data->image[1][f+i], _mm_add_ps(_mm_mul_ps(fy,
ci.vec_yscale), ci.vec_yoffset));
+ }
+#else
+ int e = 0;
+#endif
+ for(int i=e;i<size;i++){
+ Real px = fragment.getX(f+i)*ci.xscale+ci.xoffset;
+ Real py = fragment.getY(f+i)*ci.yscale+ci.yoffset;
+ rays.setPixel(i, fragment.getWhichEye(f+i), px, py);
}
}
// Trace the rays. The results will automatically go into the fragment
context.renderer->traceEyeRays(context, rays);
- for(int i=0;i<size;i++)
- {
- for ( int c = 0; c < Color::NumComponents; c++ )
- fragment.color[c][i] = raydata.color[c][i];
+#if MANTA_SSE
+ int e = size&(~3);
+ for(int i=0;i<e;i+=4){
+ _mm_store_ps(&fragment.color[0][f+i],
_mm_load_ps(&raydata.color[0][i]));
+ _mm_store_ps(&fragment.color[1][f+i],
_mm_load_ps(&raydata.color[1][i]));
+ _mm_store_ps(&fragment.color[2][f+i],
_mm_load_ps(&raydata.color[2][i]));
+ }
+#else
+ int e = 0;
+#endif
+ for(int i=e;i<size;i++) {
+ for ( int c = 0; c < Color::NumComponents; c++ )
+ fragment.color[c][f+i] = raydata.color[c][i];
}
}
}
Modified: trunk/Engine/PixelSamplers/SingleSampler.h
==============================================================================
--- trunk/Engine/PixelSamplers/SingleSampler.h (original)
+++ trunk/Engine/PixelSamplers/SingleSampler.h Sat Apr 29 21:48:36 2006
@@ -4,6 +4,8 @@
#include <MantaTypes.h>
#include <Interface/PixelSampler.h>
+#include <Core/Util/Align.h>
+#include <MantaSSE.h>
#include <sgi_stl_warnings_off.h>
#include <string>
#include <vector>
@@ -26,7 +28,15 @@
SingleSampler(const SingleSampler&);
SingleSampler& operator=(const SingleSampler&);
- struct ChannelInfo {
+ struct MANTA_ALIGN(16) ChannelInfo {
+#ifdef MANTA_SSE
+ __m128 vec_xscale;
+ __m128 vec_xscale4;
+ __m128 vec_xscale_cascade;
+ __m128 vec_xoffset;
+ __m128 vec_yscale;
+ __m128 vec_yoffset;
+#endif
Real xscale;
Real xoffset;
Real yscale;
Modified: trunk/Model/Cameras/PinholeCamera.cc
==============================================================================
--- trunk/Model/Cameras/PinholeCamera.cc (original)
+++ trunk/Model/Cameras/PinholeCamera.cc Sat Apr 29 21:48:36 2006
@@ -9,6 +9,7 @@
#include <Core/Math/MiscMath.h>
#include <Core/Math/Trig.h>
#include <Core/Util/Assert.h>
+#include <MantaSSE.h>
#include <iostream>
using namespace Manta;
@@ -140,23 +141,153 @@
}
rays.setFlag(RayPacket::NormalizedDirections);
} else {
- Real u_vec[3] = { u[0], u[1], u[2] };
- Real v_vec[3] = { v[0], v[1], v[2] };
- Real d_vec[3] = { direction[0], direction[1], direction[2] };
-
+#if MANTA_SSE
RayPacketData* data = rays.data;
- for(int i=rays.begin();i<rays.end();i++)
+ // Only enabled if the packet starts and ends on a multiple of 4
+ if(((rays.rayBegin | rays.rayEnd) & 0x3) == 0){
+#define VERSION2
+#ifdef VERSION0
+ RayPacketData* data = rays.data;
+ Real u_vec[3] = { u[0], u[1], u[2] };
+ Real v_vec[3] = { v[0], v[1], v[2] };
+ Real d_vec[3] = { direction[0], direction[1], direction[2] };
+
+ for(int i=rays.begin();i<rays.end();i++)
{
data->origin[0][i] = eye.data[0];
data->origin[1][i] = eye.data[1];
data->origin[2][i] = eye.data[2];
-
+
+ const Real u_coord = data->image[1][i];
+ const Real v_coord = data->image[0][i];
+
+ data->direction[0][i] = d_vec[0] + u_coord * u_vec[0] + v_coord *
v_vec[0];
+ data->direction[1][i] = d_vec[1] + u_coord * u_vec[1] + v_coord *
v_vec[1];
+ data->direction[2][i] = d_vec[2] + u_coord * u_vec[2] + v_coord *
v_vec[2];
+ }
+#endif
+#ifdef VERSION1
+ __m128 eyex = _mm_set1_ps(eye.data[0]);
+ __m128 eyey = _mm_set1_ps(eye.data[1]);
+ __m128 eyez = _mm_set1_ps(eye.data[2]);
+ __m128 dirx = _mm_set1_ps(direction[0]);
+ __m128 diry = _mm_set1_ps(direction[1]);
+ __m128 dirz = _mm_set1_ps(direction[2]);
+ __m128 ux = _mm_set1_ps(u[0]);
+ __m128 uy = _mm_set1_ps(u[1]);
+ __m128 uz = _mm_set1_ps(u[2]);
+ __m128 vx = _mm_set1_ps(v[0]);
+ __m128 vy = _mm_set1_ps(v[1]);
+ __m128 vz = _mm_set1_ps(v[2]);
+ for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+ _mm_store_ps(&data->origin[0][i], eyex);
+ _mm_store_ps(&data->origin[1][i], eyey);
+ _mm_store_ps(&data->origin[2][i], eyez);
+
+ __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+ __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+ __m128 dx = _mm_add_ps(dirx, _mm_add_ps(_mm_mul_ps(u_coord, ux),
_mm_mul_ps(v_coord, vx)));
+ _mm_store_ps(&data->direction[0][i], dx);
+ __m128 dy = _mm_add_ps(diry, _mm_add_ps(_mm_mul_ps(u_coord, uy),
_mm_mul_ps(v_coord, vy)));
+ _mm_store_ps(&data->direction[1][i], dy);
+ __m128 dz = _mm_add_ps(dirz, _mm_add_ps(_mm_mul_ps(u_coord, uz),
_mm_mul_ps(v_coord, vz)));
+ _mm_store_ps(&data->direction[2][i], dz);
+ }
+#endif
+#ifdef VERSION2
+ __m128 eyex = _mm_set1_ps(eye.data[0]);
+ __m128 eyey = _mm_set1_ps(eye.data[1]);
+ __m128 eyez = _mm_set1_ps(eye.data[2]);
+ for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+ _mm_store_ps(&data->origin[0][i], eyex);
+ _mm_store_ps(&data->origin[1][i], eyey);
+ _mm_store_ps(&data->origin[2][i], eyez);
+ }
+ __m128 dirx = _mm_set1_ps(direction[0]);
+ __m128 diry = _mm_set1_ps(direction[1]);
+ __m128 dirz = _mm_set1_ps(direction[2]);
+ __m128 ux = _mm_set1_ps(u[0]);
+ __m128 uy = _mm_set1_ps(u[1]);
+ __m128 uz = _mm_set1_ps(u[2]);
+ __m128 vx = _mm_set1_ps(v[0]);
+ __m128 vy = _mm_set1_ps(v[1]);
+ __m128 vz = _mm_set1_ps(v[2]);
+ for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+ __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+ __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+ __m128 dx = _mm_add_ps(dirx, _mm_add_ps(_mm_mul_ps(u_coord, ux),
_mm_mul_ps(v_coord, vx)));
+ _mm_store_ps(&data->direction[0][i], dx);
+ __m128 dy = _mm_add_ps(diry, _mm_add_ps(_mm_mul_ps(u_coord, uy),
_mm_mul_ps(v_coord, vy)));
+ _mm_store_ps(&data->direction[1][i], dy);
+ __m128 dz = _mm_add_ps(dirz, _mm_add_ps(_mm_mul_ps(u_coord, uz),
_mm_mul_ps(v_coord, vz)));
+ _mm_store_ps(&data->direction[2][i], dz);
+ }
+#endif
+#ifdef VERSION3
+ __m128 eyex = _mm_set1_ps(eye.data[0]);
+ __m128 eyey = _mm_set1_ps(eye.data[1]);
+ __m128 eyez = _mm_set1_ps(eye.data[2]);
+ for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+ _mm_store_ps(&data->origin[0][i], eyex);
+ _mm_store_ps(&data->origin[1][i], eyey);
+ _mm_store_ps(&data->origin[2][i], eyez);
+ }
+ __m128 dirx = _mm_set1_ps(direction[0]);
+ __m128 ux = _mm_set1_ps(u[0]);
+ __m128 vx = _mm_set1_ps(v[0]);
+ for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+ __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+ __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+ __m128 dx = _mm_add_ps(dirx, _mm_add_ps(_mm_mul_ps(u_coord, ux),
_mm_mul_ps(v_coord, vx)));
+ _mm_store_ps(&data->direction[0][i], dx);
+ }
+
+ __m128 diry = _mm_set1_ps(direction[1]);
+ __m128 uy = _mm_set1_ps(u[1]);
+ __m128 vy = _mm_set1_ps(v[1]);
+ for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+ __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+ __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+ __m128 dy = _mm_add_ps(diry, _mm_add_ps(_mm_mul_ps(u_coord, uy),
_mm_mul_ps(v_coord, vy)));
+ _mm_store_ps(&data->direction[1][i], dy);
+ }
+
+ __m128 dirz = _mm_set1_ps(direction[2]);
+ __m128 uz = _mm_set1_ps(u[2]);
+ __m128 vz = _mm_set1_ps(v[2]);
+ for(int i=rays.rayBegin; i < rays.rayEnd; i+=4){
+ __m128 u_coord = _mm_load_ps(&data->image[1][i]);
+ __m128 v_coord = _mm_load_ps(&data->image[0][i]);
+
+ __m128 dz = _mm_add_ps(dirz, _mm_add_ps(_mm_mul_ps(u_coord, uz),
_mm_mul_ps(v_coord, vz)));
+ _mm_store_ps(&data->direction[2][i], dz);
+ }
+#endif
+ } else
+#endif
+ {
+ RayPacketData* data = rays.data;
+ Real u_vec[3] = { u[0], u[1], u[2] };
+ Real v_vec[3] = { v[0], v[1], v[2] };
+ Real d_vec[3] = { direction[0], direction[1], direction[2] };
+
+ for(int i=rays.begin();i<rays.end();i++)
+ {
+ data->origin[0][i] = eye.data[0];
+ data->origin[1][i] = eye.data[1];
+ data->origin[2][i] = eye.data[2];
+
const Real u_coord = data->image[1][i];
const Real v_coord = data->image[0][i];
data->direction[0][i] = d_vec[0] + u_coord * u_vec[0] + v_coord *
v_vec[0];
data->direction[1][i] = d_vec[1] + u_coord * u_vec[1] + v_coord *
v_vec[1];
data->direction[2][i] = d_vec[2] + u_coord * u_vec[2] + v_coord *
v_vec[2];
+ }
}
}
}
- [MANTA] r1029 - in trunk: Engine/PixelSamplers Model/Cameras, sparker, 04/29/2006
Archive powered by MHonArc 2.6.16.