Text archives Help
- From: "Solomon Boulos" <boulos@cs.utah.edu>
- To: manta@sci.utah.edu
- Subject: [Manta] r2166 - in trunk: Engine/ImageTraversers Image StandAlone
- Date: Fri, 4 Apr 2008 15:41:38 -0600 (MDT)
Author: boulos
Date: Fri Apr 4 15:41:36 2008
New Revision: 2166
Modified:
trunk/Engine/ImageTraversers/DeadlineImageTraverser.cc
trunk/Engine/ImageTraversers/TiledImageTraverser.cc
trunk/Image/SimpleImage_special.cc
trunk/StandAlone/manta.cc
Log:
Engine/ImageTraversers/DeadlineImageTraverser.cc
Engine/ImageTraversers/TiledImageTraverser.cc
Image/SimpleImage_special.cc
StandAlone/manta.cc
Implementing a fast path for SquareShape fragments hitting the
framebuffer.
Updating DeadlineImageTraverser and TiledImageTraverser to properly
label tiles that aren't actually square as unknown (they're really
Rectangles, but we don't have that).
Setting the default imagetraverser to use square tiles. This now only
represents as 0.7% slowdown for bin/manta without display and 1.3%
with display on my laptop. In contrast, for a bunny with shadows it
gives almost a 2x speedup for the KDTree and a 1.6x speedup for the
BVH (our dashboard tests manually ensure that square tiles are used).
Modified: trunk/Engine/ImageTraversers/DeadlineImageTraverser.cc
==============================================================================
--- trunk/Engine/ImageTraversers/DeadlineImageTraverser.cc (original)
+++ trunk/Engine/ImageTraversers/DeadlineImageTraverser.cc Fri Apr 4
15:41:36 2008
@@ -355,17 +355,22 @@
int ytile = assignment%ytiles;
int xstart = xtile * xcoarsetilesize;
int xend = (xtile+1) * xcoarsetilesize;
-
- if(xend > xres)
+ bool isSquare = true;
+ if(xend > xres) {
xend = xres;
+ isSquare = false;
+ }
int ystart = ytile * ycoarsetilesize;
int yend = (ytile+1) * ycoarsetilesize;
- if(yend > yres)
+ if(yend > yres) {
yend = yres;
+ isSquare = false;
+ }
- Fragment frag(Fragment::SquareShape, Fragment::ConstantEye);
+ Fragment frag((isSquare) ? Fragment::SquareShape :
Fragment::UnknownShape,
+ Fragment::ConstantEye);
frag.setPixelSize(xcoarsepixelsize, ycoarsepixelsize);
for(int eye = 0; eye < numEyes; eye++){
@@ -554,19 +559,25 @@
} else {
for(int y = tile->ystart; y < tile->yend; y += ys){
for(int x = tile->xstart; x < tile->xend; x += xs){
+ bool isSquare = true;
int xend = x + xs;
- if(xend > tile->xend)
+ if(xend > tile->xend) {
xend = tile->xend;
+ isSquare = false;
+ }
int yend = y + ys;
- if(yend > tile->yend)
+ if(yend > tile->yend) {
yend = tile->yend;
+ isSquare = false;
+ }
// NOTE(boulos): This logic is now slightly different as we
// are going to make smaller fragments. This makes it so we
// don't need to change the render fragment logic (a single
// fragment will now just have many samples) so the fragment
// size will be smaller than before
- Fragment frag(Fragment::SquareShape, Fragment::ConstantEye);
+ Fragment frag((isSquare) ? Fragment::SquareShape :
Fragment::UnknownShape,
+ Fragment::ConstantEye);
frag.setPixelSize(1, 1);
int idx = 0;
for (int j = y; j < yend; j++) {
@@ -614,15 +625,19 @@
for(int y = tile->ystart; y < tile->yend; y += ys){
for(int x = tile->xstart; x < tile->xend; x += xs){
+ bool isSquare = true;
int xend = x + xs;
- if(xend > tile->xend)
+ if(xend > tile->xend) {
xend = tile->xend;
+ isSquare = false;
+ }
int yend = y + ys;
- if(yend > tile->yend)
+ if(yend > tile->yend) {
yend = tile->yend;
+ isSquare = false;
+ }
- Fragment frag(Fragment::SquareShape, Fragment::ConstantEye);
-
+ Fragment frag((isSquare) ? Fragment::SquareShape :
Fragment::UnknownShape, Fragment::ConstantEye);
frag.setPixelSize(newxmag_int, newymag_int);
int idx = 0;
for (int j = y; j < yend; j+=newymag_int) {
Modified: trunk/Engine/ImageTraversers/TiledImageTraverser.cc
==============================================================================
--- trunk/Engine/ImageTraversers/TiledImageTraverser.cc (original)
+++ trunk/Engine/ImageTraversers/TiledImageTraverser.cc Fri Apr 4 15:41:36
2008
@@ -57,7 +57,7 @@
shape = Fragment::SquareShape;
else
shape = Fragment::LineShape;
-
+
}
TiledImageTraverser::TiledImageTraverser(const vector<string>& args)
@@ -267,6 +267,13 @@
frag.pixel[0][j*i_end + i] = x + i;
frag.pixel[1][j*i_end + i] = y + j;
}
+ }
+ // NOTE(boulos): If these get clipped, it's not
+ // actually Square (but it is a
+ // Rectangle... maybe we should add that?)
+ if (i_end != sqrt_size ||
+ j_end != sqrt_size) {
+ frag.shape = Fragment::UnknownShape;
}
frag.setSize(j_end * i_end);
context.rng->seed(x*xres+y);
Modified: trunk/Image/SimpleImage_special.cc
==============================================================================
--- trunk/Image/SimpleImage_special.cc (original)
+++ trunk/Image/SimpleImage_special.cc Fri Apr 4 15:41:36 2008
@@ -1,3 +1,4 @@
+#include <Core/Exceptions/InternalError.h>
#include <Core/Math/ExponSSE.h>
#include <Core/Math/SSEDefs.h>
#include <Image/SimpleImage.h>
@@ -75,6 +76,80 @@
{
for(int i=fragment.begin(); i< fragment.end();i++)
convertToPixel(*pix++, fragment.getColor(i).convertRGB());
+ }
+ } else if (fragment.getFlag(Fragment::ConstantEye) &&
+ fragment.shape == Fragment::SquareShape) {
+#ifdef MANTA_SSE
+ int numPixels = fragment.end() - fragment.begin();
+ // NOTE(boulos): For SSE we need at least a 4x4 fragment, if we
+ // don't have that, skip out. We also need the same alignment
+ // rules from above. Note that pixelEnd is 1 after the last value,
+ // so it needs to be SIMD width aligned as well.
+ if (numPixels >= 16 &&
+ ((fragment.pixelBegin | fragment.pixelEnd) & 0x3) == 0) {
+ // NOTE(boulos): Only power of 2 numPixels should pass
+ bool isPowerOf2 = !(numPixels & (numPixels -1));
+ if (!isPowerOf2) throw InternalError("Not power of 2..");
+ int sqrtSize;
+ switch (numPixels) {
+ case 16:
+ sqrtSize = 4;
+ break;
+ case 64:
+ sqrtSize = 8;
+ break;
+ case 144:
+ sqrtSize = 12;
+ break;
+ case 256:
+ sqrtSize = 16;
+ break;
+ default:
+ sqrtSize = static_cast<int>(Sqrt(static_cast<Real>(numPixels)));
+ if (sqrtSize * sqrtSize != numPixels) throw InternalError("Not a
perfect square");
+ break;
+ }
+ int i = fragment.begin();
+ int eye = fragment.getWhichEye(i);
+ __m128 scale = _mm_set1_ps( 255.99999f );
+ for (int y = 0; y < sqrtSize; y++) {
+ ARGB8Pixel* pix = eyeStart[eye][fragment.getY(i)]+fragment.getX(i);
+ for (int x = 0; x < sqrtSize; x+=4, i+=4) {
+ __m128 r = _mm_load_ps(&fragment.color[0][i]);
+ __m128 g = _mm_load_ps(&fragment.color[1][i]);
+ __m128 b = _mm_load_ps(&fragment.color[2][i]);
+#if MANTA_USE_SRGB
+ r = RGBtoSRGB(r);
+ g = RGBtoSRGB(g);
+ b = RGBtoSRGB(b);
+#endif
+ r = _mm_mul_ps(r, scale);
+ g = _mm_mul_ps(g, scale);
+ b = _mm_mul_ps(b, scale);
+
+ __m128i alpha = _mm_set1_epi32(255); // alpha
+ __m128i r32 = _mm_cvttps_epi32(r); // 32 bits: r0r1r2r3
+ __m128i g32 = _mm_cvttps_epi32(g); // 32 bits: g0g1g2g3
+ __m128i b32 = _mm_cvttps_epi32(b); // 32 bits: b0b1b2b3
+ __m128i ag16 = _mm_packs_epi32(alpha, g32); // 16 bits:
a0a1a2a3g0g1g2g3
+ __m128i rb16 = _mm_packs_epi32(r32, b32); // 16 bits:
r0r1r2r3b0b1b2b3
+ __m128i ar16 = _mm_unpacklo_epi16(ag16, rb16); // 16 bits:
a0r0a1r1a2r2a3r3
+ __m128i gb16 = _mm_unpackhi_epi16(ag16, rb16); // 16 bits:
g0b0g1b1g2b0g3b3
+ __m128i argb16a = _mm_unpacklo_epi32(ar16, gb16); // 16 bits
a0r0g0b0a1a1g1b1
+ __m128i argb16b = _mm_unpackhi_epi32(ar16, gb16); // 16 bits
a2r2g2b2a3r3g3b3
+ __m128i result = _mm_packus_epi16(argb16a, argb16b); // 8 bits:
a0r0g0b0...a3r3g3b3
+ _mm_stream_si128((__m128i*)pix, result);
+ pix += 4;
+ }
+ }
+ } else
+#endif
+ {
+ int eye = fragment.getWhichEye(fragment.begin());
+ for(int i=fragment.begin(); i< fragment.end();i++) {
+ ARGB8Pixel* pix = eyeStart[eye][fragment.getY(i)]+fragment.getX(i);
+ convertToPixel(*pix, fragment.getColor(i).convertRGB());
+ }
}
} else {
for(int i=fragment.begin();i<fragment.end();i++){
Modified: trunk/StandAlone/manta.cc
==============================================================================
--- trunk/StandAlone/manta.cc (original)
+++ trunk/StandAlone/manta.cc Fri Apr 4 15:41:36 2008
@@ -559,7 +559,7 @@
if(!factory->selectLoadBalancer("workqueue"))
throw InternalError("default load balancer not found");
- if(!factory->selectImageTraverser("tiled"))
+ if(!factory->selectImageTraverser("tiled(-square)"))
throw InternalError("default image traverser not found");
if(!factory->selectPixelSampler("singlesample"))
- [Manta] r2166 - in trunk: Engine/ImageTraversers Image StandAlone, Solomon Boulos, 04/04/2008
Archive powered by MHonArc 2.6.16.