Text archives Help
- From: sparker@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1035 - in trunk: Engine/ImageTraversers Interface
- Date: Tue, 2 May 2006 14:57:08 -0600 (MDT)
Author: sparker
Date: Tue May 2 14:57:08 2006
New Revision: 1035
Modified:
trunk/Engine/ImageTraversers/HardTile.cc
trunk/Engine/ImageTraversers/HardTile.h
trunk/Engine/ImageTraversers/TiledImageTraverser.cc
trunk/Engine/ImageTraversers/TiledImageTraverser.h
trunk/Interface/Fragment.h
Log:
SSEify and optimize image traverser code
Modified: trunk/Engine/ImageTraversers/HardTile.cc
==============================================================================
--- trunk/Engine/ImageTraversers/HardTile.cc (original)
+++ trunk/Engine/ImageTraversers/HardTile.cc Tue May 2 14:57:08 2006
@@ -37,6 +37,8 @@
#include <Interface/PixelSampler.h>
#include <Core/Thread/Mutex.h>
#include <Core/Util/NotFinished.h>
+#include <Core/Math/MinMax.h>
+#include <MantaSSE.h>
using namespace Manta;
@@ -59,6 +61,10 @@
throw IllegalArgument("HardTile", i, args);
}
}
+#if MANTA_SSE
+ vec_cascade = _mm_set_epi32(3, 2, 1, 0);
+ vec_4 = _mm_set1_epi32(4);
+#endif
}
HardTile::~HardTile()
@@ -103,6 +109,7 @@
bool stereo;
int xres, yres;
image->getResolution(stereo, xres, yres);
+ int numEyes = stereo? 2:1;
int s,e;
while(context.loadBalancer->getNextAssignment(context, s, e)){
@@ -124,21 +131,54 @@
yend = yres;
- for(int y = ystart; y<yend; y++)
- {
- for(int x = xstart; x<xend; x+= Fragment::MaxSize)
+ // Create a Fragment that is consecutive in X pixels
+ Fragment frag(Fragment::ConsecutiveX|Fragment::ConstantEye);
+ int fsize = SCIRun::Min(Fragment::MaxSize, xend-xstart);
+ for(int eye = 0; eye < numEyes; eye++){
+#ifdef MANTA_SSE
+ int e = (fsize+3)&(~3);
+ __m128i vec_eye = _mm_set1_epi32(eye);
+ for(int i=0;i<e;i+=4)
+ _mm_store_si128((__m128i*)&frag.whichEye[i], vec_eye);
+#else
+ for(int i=0;i<fsize;i++)
+ frag.whichEye[i] = eye;
+#endif
+ for(int y = ystart; y<yend; y++)
{
- // This catches cases where xend-xstart is larger than
- // Fragment::MaxSize.
- int xnarf = x+Fragment::MaxSize;
- if (xnarf > xend) xnarf = xend;
- // Create a Fragment that is consecutive in X pixels
- Fragment frag(x, xnarf, y, 0);
- context.pixelSampler->renderFragment(context, frag);
- image->set(frag);
+#ifdef MANTA_SSE
+ int e = (fsize+3)&(~3);
+ __m128i vec_y = _mm_set1_epi32(y);
+ for(int i=0;i<e;i+=4)
+ _mm_store_si128((__m128i*)&frag.pixel[1][i], vec_y);
+#else
+ for(int i=0;i<fsize;i++)
+ frag.pixel[1][i] = y;
+#endif
+ for(int x = xstart; x<xend; x+= Fragment::MaxSize)
+ {
+ // This catches cases where xend-xstart is larger than
+ // Fragment::MaxSize.
+ int xnarf = x+Fragment::MaxSize;
+ if (xnarf > xend) xnarf = xend;
+ int size = xnarf-x;
+#ifdef MANTA_SSE
+ __m128i vec_x = _mm_add_epi32(_mm_set1_epi32(x), vec_cascade);
+ for(int i=0;i<size;i+=4){
+ // This will spill over by up to 3 pixels
+ _mm_store_si128((__m128i*)&frag.pixel[0][i], vec_x);
+ vec_x = _mm_add_epi32(vec_x, vec_4);
+ }
+#else
+ for(int i=0;i<size;i++)
+ frag.pixel[0][i] = i+x;
+#endif
+ frag.setSize(size);
+ context.pixelSampler->renderFragment(context, frag);
+ image->set(frag);
+ }
}
}
-
}
}
// This can potentially happen before the other procesors are finished
Modified: trunk/Engine/ImageTraversers/HardTile.h
==============================================================================
--- trunk/Engine/ImageTraversers/HardTile.h (original)
+++ trunk/Engine/ImageTraversers/HardTile.h Tue May 2 14:57:08 2006
@@ -35,6 +35,7 @@
#include <string>
#include <vector>
#include <sgi_stl_warnings_on.h>
+#include <MantaSSE.h>
namespace Manta {
using namespace std;
@@ -51,6 +52,10 @@
private:
HardTile(const HardTile&);
HardTile& operator=(const HardTile&);
+#ifdef MANTA_SSE
+ __m128i vec_cascade;
+ __m128i vec_4;
+#endif
int xtilesize;
int ytilesize;
Modified: trunk/Engine/ImageTraversers/TiledImageTraverser.cc
==============================================================================
--- trunk/Engine/ImageTraversers/TiledImageTraverser.cc (original)
+++ trunk/Engine/ImageTraversers/TiledImageTraverser.cc Tue May 2 14:57:08
2006
@@ -29,14 +29,16 @@
#include <Engine/ImageTraversers/TiledImageTraverser.h>
#include <Core/Exceptions/IllegalArgument.h>
+#include <Core/Math/MinMax.h>
+#include <Core/Thread/Mutex.h>
#include <Core/Util/Args.h>
+#include <Core/Util/NotFinished.h>
#include <Interface/Context.h>
#include <Interface/Fragment.h>
#include <Interface/Image.h>
#include <Interface/LoadBalancer.h>
#include <Interface/PixelSampler.h>
-#include <Core/Thread/Mutex.h>
-#include <Core/Util/NotFinished.h>
+#include <MantaSSE.h>
using namespace Manta;
@@ -59,6 +61,10 @@
throw IllegalArgument("TiledImageTraverser", i, args);
}
}
+#if MANTA_SSE
+ vec_cascade = _mm_set_epi32(3, 2, 1, 0);
+ vec_4 = _mm_set1_epi32(4);
+#endif
}
TiledImageTraverser::~TiledImageTraverser()
@@ -103,6 +109,7 @@
bool stereo;
int xres, yres;
image->getResolution(stereo, xres, yres);
+ int numEyes = stereo?2:1;
int s,e;
while(context.loadBalancer->getNextAssignment(context, s, e)){
@@ -123,25 +130,84 @@
if(yend > yres)
yend = yres;
- for(int y = ystart; y<yend; y++){
- for(int x = xstart; x<xend; x+= Fragment::MaxSize){
- // This catches cases where xend-xstart is larger than
- // Fragment::MaxSize.
- int xnarf = x+Fragment::MaxSize;
- if (xnarf > xend) xnarf = xend;
-
- // Create a Fragment that is consecutive in X pixels
- Fragment frag(x, xnarf, y, 0);
- context.pixelSampler->renderFragment(context, frag);
- image->set(frag);
-
- // Check to see if we need to render another copy in setero.
- if(stereo){
- Fragment fragST(x, xnarf, y, 1);
- context.pixelSampler->renderFragment(context, fragST);
- image->set(fragST);
+ // Create a Fragment that is consecutive in X pixels
+ Fragment frag(Fragment::ConsecutiveX|Fragment::ConstantEye);
+ int fsize = SCIRun::Min(Fragment::MaxSize, xend-xstart);
+ for(int eye = 0; eye < numEyes; eye++){
+#ifdef MANTA_SSE
+ int e = (fsize+3)&(~3);
+ __m128i vec_eye = _mm_set1_epi32(eye);
+ for(int i=0;i<e;i+=4)
+ _mm_store_si128((__m128i*)&frag.whichEye[i], vec_eye);
+#else
+ for(int i=0;i<fsize;i++)
+ frag.whichEye[i] = eye;
+#endif
+
+ // Two versions. If the assignment is narrower than a fragment, we
+ // can enable a few optimizations
+ if(xend-xstart <= Fragment::MaxSize){
+ // Common case - one packet in X direction
+ int size = xend-xstart;
+#ifdef MANTA_SSE
+ __m128i vec_x = _mm_add_epi32(_mm_set1_epi32(xstart), vec_cascade);
+ for(int i=0;i<size;i+=4){
+ // This will spill over by up to 3 pixels
+ _mm_store_si128((__m128i*)&frag.pixel[0][i], vec_x);
+ vec_x = _mm_add_epi32(vec_x, vec_4);
+ }
+#else
+ for(int i=0;i<size;i++)
+ frag.pixel[0][i] = i+x;
+#endif
+ frag.setSize(size);
+ for(int y = ystart; y<yend; y++){
+#ifdef MANTA_SSE
+ int e = (fsize+3)&(~3);
+ __m128i vec_y = _mm_set1_epi32(y);
+ for(int i=0;i<e;i+=4)
+ _mm_store_si128((__m128i*)&frag.pixel[1][i], vec_y);
+#else
+ for(int i=0;i<fsize;i++)
+ frag.pixel[1][i] = y;
+#endif
+ context.pixelSampler->renderFragment(context, frag);
+ image->set(frag);
+ }
+ } else {
+ // General case (multiple packets in X direction)
+ for(int y = ystart; y<yend; y++){
+#ifdef MANTA_SSE
+ int e = (fsize+3)&(~3);
+ __m128i vec_y = _mm_set1_epi32(y);
+ for(int i=0;i<e;i+=4)
+ _mm_store_si128((__m128i*)&frag.pixel[1][i], vec_y);
+#else
+ for(int i=0;i<fsize;i++)
+ frag.pixel[1][i] = y;
+#endif
+ for(int x = xstart; x<xend; x+= Fragment::MaxSize){
+ // This catches cases where xend-xstart is larger than
+ // Fragment::MaxSize.
+ int xnarf = x+Fragment::MaxSize;
+ if (xnarf > xend) xnarf = xend;
+ int size = xnarf-x;
+#ifdef MANTA_SSE
+ __m128i vec_x = _mm_add_epi32(_mm_set1_epi32(x), vec_cascade);
+ for(int i=0;i<size;i+=4){
+ // This will spill over by up to 3 pixels
+ _mm_store_si128((__m128i*)&frag.pixel[0][i], vec_x);
+ vec_x = _mm_add_epi32(vec_x, vec_4);
+ }
+#else
+ for(int i=0;i<size;i++)
+ frag.pixel[0][i] = i+x;
+#endif
+ frag.setSize(size);
+ context.pixelSampler->renderFragment(context, frag);
+ image->set(frag);
+ }
}
-
}
}
}
Modified: trunk/Engine/ImageTraversers/TiledImageTraverser.h
==============================================================================
--- trunk/Engine/ImageTraversers/TiledImageTraverser.h (original)
+++ trunk/Engine/ImageTraversers/TiledImageTraverser.h Tue May 2 14:57:08
2006
@@ -35,6 +35,7 @@
#include <string>
#include <vector>
#include <sgi_stl_warnings_on.h>
+#include <MantaSSE.h>
namespace Manta {
using namespace std;
@@ -51,6 +52,11 @@
private:
TiledImageTraverser(const TiledImageTraverser&);
TiledImageTraverser& operator=(const TiledImageTraverser&);
+
+#ifdef MANTA_SSE
+ __m128i vec_cascade;
+ __m128i vec_4;
+#endif
int xtilesize;
int ytilesize;
Modified: trunk/Interface/Fragment.h
==============================================================================
--- trunk/Interface/Fragment.h (original)
+++ trunk/Interface/Fragment.h Tue May 2 14:57:08 2006
@@ -43,6 +43,9 @@
Fragment()
: flags(0), pixelBegin(0), pixelEnd(0)
{}
+ Fragment(int flags)
+ : flags(flags), pixelBegin(0), pixelEnd(0)
+ {}
// Creates a "Scan-line" fragment.
Fragment(int xstart, int xend, int y, int eye)
@@ -83,6 +86,10 @@
void resetSize() {
pixelBegin = 0;
pixelEnd = 0;
+ }
+ void setSize(int size) {
+ pixelBegin = 0;
+ pixelEnd = size;
}
///////////////////////////////////////////////////////////////////////////
- [MANTA] r1035 - in trunk: Engine/ImageTraversers Interface, sparker, 05/02/2006
Archive powered by MHonArc 2.6.16.