Text archives Help
- From: Carson Brownlee <
>
- To:
- Subject: [Manta] Re: r2445 - in trunk: Interface Model/Materials
- Date: Tue, 29 Jun 2010 19:02:27 -0600
Contexts are passed to computeLight so we could well make this a light
instead of a material as we discussed earlier. I think this makes the
most sense and should be an easy modification.
one bounce diffuse inter-reflection using the same AO calls would be a
worthy addition as well I would be willing to implement that.
Carson
On Tue, 2010-06-29 at 18:35 -0600,
wrote:
>
Author: thiago
>
Date: Tue Jun 29 18:35:01 2010
>
New Revision: 2445
>
>
Modified:
>
trunk/Interface/RayPacket.h
>
trunk/Model/Materials/AmbientOcclusion.cc
>
trunk/Model/Materials/AmbientOcclusion.h
>
Log:
>
M Interface/RayPacket.h
>
- Added a resetHits that takes a maxt just like the resetHit version which
>
has this option. If the performance hit isn't much, it would probably
>
be
>
nice to just have a single resetHits with a default argument.
>
>
M Model/Materials/AmbientOcclusion.cc
>
M Model/Materials/AmbientOcclusion.h
>
- Fixed a bug where sometimes the orthonormal basis was degenerate. This
>
produced artifacts.
>
- Added option of using stratified sampling (on by default). This gives
>
much nicer looking results. Only downside is that if few samples are
>
used
>
there is visible temporal incoherence. Just change numPermutations to
>
1
>
in the header file to get the previous look.
>
- Added SSE code and some other optimizations. This gives about a 10%
>
speedup in my sample scene. Much larger speedups are possible if most
>
of
>
the compute time is in the ambient occlusion material itself (i.e. ray
>
tracing a very simple scene).
>
>
Modified: trunk/Interface/RayPacket.h
>
==============================================================================
>
--- trunk/Interface/RayPacket.h (original)
>
+++ trunk/Interface/RayPacket.h Tue Jun 29 18:35:01 2010
>
@@ -440,6 +440,43 @@
>
}
>
#endif
>
}
>
+ void resetHits(Real maxt) {
>
+#ifdef MANTA_SSE
>
+ int b = (rayBegin + 3) & (~3);
>
+ int e = rayEnd & (~3);
>
+ if(b >= e){
>
+ for(int i = rayBegin; i < rayEnd; i++){
>
+ data->hitMatl[i] = 0;
>
+ data->minT[i] = maxt;
>
+ }
>
+ } else {
>
+ int i = rayBegin;
>
+ for(;i<b;i++){
>
+ data->hitMatl[i] = 0;
>
+ data->minT[i] = maxt;
>
+ }
>
+ for(;i<e;i+=4){
>
+#ifdef __x86_64
>
+ _mm_store_ps((float*)&data->hitMatl[i], _mm_setzero_ps());
>
+ _mm_store_ps((float*)&data->hitMatl[i+2], _mm_setzero_ps());
>
+#else
>
+ _mm_store_ps((float*)&data->hitMatl[i], _mm_setzero_ps());
>
+#endif
>
+ _mm_store_ps(&data->minT[i], _mm_set1_ps(maxt));
>
+ }
>
+ for(;i<rayEnd;i++){
>
+ data->hitMatl[i] = 0;
>
+ data->minT[i] = maxt;
>
+ }
>
+ }
>
+#else
>
+ for(int i = rayBegin; i < rayEnd; i++){
>
+ data->hitMatl[i] = 0;
>
+ data->minT[i] = maxt;
>
+ }
>
+#endif
>
+ }
>
+
>
void resetHit(int which) {
>
data->hitMatl[which] = 0;
>
data->minT[which] = MAXT;
>
>
Modified: trunk/Model/Materials/AmbientOcclusion.cc
>
==============================================================================
>
--- trunk/Model/Materials/AmbientOcclusion.cc (original)
>
+++ trunk/Model/Materials/AmbientOcclusion.cc Tue Jun 29 18:35:01 2010
>
@@ -7,12 +7,13 @@
>
#include <Model/Textures/Constant.h>
>
#include <Core/Math/MT_RNG.h>
>
#include <Core/Math/Trig.h>
>
+#include <MantaSSE.h>
>
+#include <assert.h>
>
>
-using namespace Manta;
>
+#include <iostream>
>
+using namespace std;
>
>
-// TODO: sort the rays generated in generateDirections to make them more
>
-// coherent for ray packets.
>
-// TODO: Try to pack as many occlusion rays into a single ray packet.
>
+using namespace Manta;
>
>
AmbientOcclusion::AmbientOcclusion(const Color& color, float cutoff_dist,
>
int num_dirs)
>
@@ -37,22 +38,70 @@
>
>
void AmbientOcclusion::generateDirections(int num_directions)
>
{
>
- directions.resize(num_directions);
>
MT_RNG rng;
>
- // generate cosine weighted directions
>
- for ( int i = 0; i < num_directions; i++ )
>
- {
>
- double r1 = rng.next<double>();
>
- double r2 = rng.next<double>();
>
-
>
- double phi = 2.0 * Pi * r1;
>
- double r = sqrt(r2);
>
- double x = r * Cos(phi);
>
- double y = r * Sin(phi);
>
- double z = 1.0 - x*x - y*y;
>
+ const int binsY = Sqrt(num_directions);
>
+ const int binsX = binsY;
>
+
>
+ if (binsX*binsY != num_directions) {
>
+ num_directions = binsX*binsY;
>
+ inv_num_directions = 1.0/num_directions;
>
+ cerr << "Warning, number of samples to use for Ambient Occlusion is
>
not a perfect square!\n";
>
+ cerr << "Using " << num_directions << " samples instead\n";
>
+ }
>
+
>
+ for (int k=0; k < numPermutations; ++k) {
>
+ directions[k][0].resize(num_directions);
>
+ directions[k][1].resize(num_directions);
>
+ directions[k][2].resize(num_directions);
>
+
>
+ pair<float, float> sortedSamples[num_directions];
>
+
>
+ int index = 0;
>
+
>
+ // wrap back and forth so that each sample is spatially next to its
>
+ // previous sample. Also, break the Y samples in half.
>
+ const int numYBins = binsY/2;
>
+
>
+ for (int startY=0; startY < binsY; startY += numYBins) {
>
+ for (int xBin=0; xBin < binsX; ++xBin) {
>
+ if (xBin%2==0)
>
+ for (int yBin=startY; yBin < startY+numYBins; ++yBin) {
>
+ float r1 = rng.next<float>() / binsX;
>
+ float r2 = rng.next<float>() / binsY;
>
+ r1 += static_cast<float>(xBin) / binsX;
>
+ r2 += static_cast<float>(yBin) / binsY;
>
+
>
+ sortedSamples[index++] = make_pair(r1, r2);
>
+ }
>
+ else
>
+ for (int yBin=startY+numYBins-1; yBin >= startY; --yBin) {
>
+ float r1 = rng.next<float>() / binsX;
>
+ float r2 = rng.next<float>() / binsY;
>
+ r1 += static_cast<float>(xBin) / binsX;
>
+ r2 += static_cast<float>(yBin) / binsY;
>
+
>
+ sortedSamples[index++] = make_pair(r1, r2);
>
+ }
>
+ }
>
+ }
>
+
>
+ for (int i = 0; i < num_directions; ++i) {
>
+ float r1 = sortedSamples[i].first;
>
+ float r2 = sortedSamples[i].second;
>
+
>
+ float phi = 2.0 * Pi * r1;
>
+ float r = sqrt(r2);
>
+ float s, c;
>
+ SinCos(phi, s, c);
>
+ float x = r * c;
>
+ float y = r * s;
>
+ float z = 1.0 - x*x - y*y;
>
z = (z > 0.0) ? Sqrt(z) : 0.0;
>
>
- directions[i] = Vector(x, y, z);
>
+ directions[k][0][i] = x;
>
+ directions[k][1][i] = y;
>
+ directions[k][2][i] = z;
>
+ }
>
}
>
}
>
>
@@ -63,59 +112,119 @@
>
rays.computeFFNormals<true>(context);
>
rays.computeHitPositions();
>
>
+ const int num_directions = static_cast<int>(directions[0][0].size());
>
+
>
+ Real AOopacities[RayPacket::MaxSize];
>
+
>
+
>
+ RayPacketData occlusion_data;
>
+ const int flag = RayPacket::NormalizedDirections |
>
+ RayPacket::ConstantOrigin;
>
+
>
+ RayPacket occlusion_rays(occlusion_data, RayPacket::UnknownShape,
>
+ 0, RayPacket::MaxSize, rays.getDepth(), flag);
>
+
>
// We are going to first compute the ambient values.
>
- ColorArray total;
>
for(int i = rays.begin(); i < rays.end(); ++i) {
>
+
>
+ // We use drand48 since it is thread safe.
>
+ const int whichDir = static_cast<int>(drand48()*numPermutations);
>
+
>
// for each position, compute a local coordinate frame
>
// and build a set of rays to push into a ray packet
>
- Vector W(rays.getFFNormal(i)); // surface ONB
>
- Vector U(Cross(W, Vector(1,0,0)));
>
- Real squared_length = U.length2();
>
- if ( squared_length < (Real)1e-6 )
>
- U = Cross(W, Vector(0,1,0));
>
- Vector V(Cross(W, U));
>
+ const Vector W(rays.getFFNormal(i)); // surface ONB
>
+ const Vector U = W.findPerpendicular().normal();
>
+ const Vector V(Cross(W, U));
>
+
>
+#ifdef MANTA_SSE
>
+ const sse_t W_x4 = set4(W[0]);
>
+ const sse_t W_y4 = set4(W[1]);
>
+ const sse_t W_z4 = set4(W[2]);
>
+
>
+ const sse_t U_x4 = set4(U[0]);
>
+ const sse_t U_y4 = set4(U[1]);
>
+ const sse_t U_z4 = set4(U[2]);
>
+
>
+ const sse_t V_x4 = set4(V[0]);
>
+ const sse_t V_y4 = set4(V[1]);
>
+ const sse_t V_z4 = set4(V[2]);
>
+#endif
>
+
>
+ occlusion_rays.setFFNormal(occlusion_rays.begin(),
>
rays.getFFNormal(i));
>
>
// Send out the ambient occlusion tests
>
int num_sent = 0;
>
int num_miss = 0;
>
- int num_directions = static_cast<int>(directions.size());
>
- while ( num_sent < num_directions ) {
>
+ while ( num_sent < num_directions) {
>
const int start = 0;
>
int end = start + RayPacket::MaxSize;
>
+
>
if ( (end-start + num_sent) > num_directions)
>
end = start + ((num_directions-1) % RayPacket::MaxSize) + 1;
>
>
- RayPacketData occlusion_data;
>
- // Should the normalized flag be set? The normals coming in
>
- // should already be normalized.
>
- int flag = RayPacket::NormalizedDirections |
>
RayPacket::ConstantOrigin;
>
- RayPacket occlusion_rays(occlusion_data, RayPacket::UnknownShape,
>
- start, end, rays.getDepth(), flag);
>
+ occlusion_rays.resize(start, end);
>
+ occlusion_rays.setAllFlags(flag);
>
+#ifdef MANTA_SSE
>
+ const sse_t origX = set4(rays.data->hitPosition[0][i]);
>
+ const sse_t origY = set4(rays.data->hitPosition[1][i]);
>
+ const sse_t origZ = set4(rays.data->hitPosition[2][i]);
>
+
>
+ const sse_t time = set4(rays.data->time[i]);
>
+
>
+ assert(start==0);
>
+ const int sse_end = (end) & (~3);
>
+ assert(sse_end == end); //Normally not always legit! This is a hack
>
for performance right now
>
+ for (int r=0; r < sse_end; r+=4) {
>
+ sse_t dirX = load44(&directions[whichDir][0][num_sent+r]);
>
+ sse_t dirY = load44(&directions[whichDir][1][num_sent+r]);
>
+ sse_t dirZ = load44(&directions[whichDir][2][num_sent+r]);
>
+
>
+ sse_t trans_dirX = add4(add4(mul4(dirX, U_x4),
>
+ mul4(dirY, V_x4)),
>
+ mul4(dirZ, W_x4));
>
+ sse_t trans_dirY = add4(add4(mul4(dirX, U_y4),
>
+ mul4(dirY, V_y4)),
>
+ mul4(dirZ, W_y4));
>
+ sse_t trans_dirZ = add4(add4(mul4(dirX, U_z4),
>
+ mul4(dirY, V_z4)),
>
+ mul4(dirZ, W_z4));
>
+
>
+ store44(&occlusion_rays.data->direction[0][r], trans_dirX);
>
+ store44(&occlusion_rays.data->direction[1][r], trans_dirY);
>
+ store44(&occlusion_rays.data->direction[2][r], trans_dirZ);
>
+
>
+ store44(&occlusion_rays.data->origin[0][r], origX);
>
+ store44(&occlusion_rays.data->origin[1][r], origY);
>
+ store44(&occlusion_rays.data->origin[2][r], origZ);
>
>
+ store44(&occlusion_rays.data->time[r], time);
>
+ }
>
+#else
>
for ( int r = start; r < end; r++ ) {
>
- Vector trans_dir = (directions[num_sent+r][0]*U +
>
- directions[num_sent+r][1]*V +
>
- directions[num_sent+r][2]*W);
>
+ Vector trans_dir = (directions[whichDir][0][num_sent+r]*U +
>
+ directions[whichDir][1][num_sent+r]*V +
>
+ directions[whichDir][2][num_sent+r]*W);
>
occlusion_rays.setRay(r, rays.getHitPosition(i), trans_dir);
>
occlusion_rays.setTime(r, rays.getTime(i));
>
- // set max distance
>
- occlusion_rays.resetHit(r, cutoff);
>
}
>
+#endif
>
+ // set max distance
>
+ occlusion_rays.resetHits(cutoff);
>
>
// packet is ready, test it for occlusion
>
context.scene->getObject()->intersect(context, occlusion_rays);
>
>
// count the number of occluded ones
>
for (int r = start; r < end; r++ ) {
>
- if(!occlusion_rays.wasHit(r))
>
- num_miss++;
>
+ num_miss += occlusion_rays.wasHit(r) ? 0: 1;
>
}
>
num_sent += end-start;
>
}
>
- for(int j=0;j<Color::NumComponents;j++)
>
- total[j][i] = num_miss * (inv_num_directions * (ColorComponent)0.4);
>
+ AOopacities[i] = num_miss * inv_num_directions;
>
}
>
>
+ ColorArray total = {{ColorComponent(0)}};
>
+
>
// Compute the diffuse shading
>
RayPacketData data;
>
ShadowAlgorithm::StateBuffer shadowState;
>
@@ -132,7 +241,7 @@
>
ColorComponent cos_theta = Dot(shadowdir, normal);
>
Color light = shadowRays.getColor(j);
>
for(int k = 0; k < Color::NumComponents;k++)
>
- total[k][j] += light[k]*cos_theta*(ColorComponent)0.6;
>
+ total[k][j] += light[k]*cos_theta;
>
}
>
}
>
} while(!shadowState.done());
>
@@ -144,8 +253,11 @@
>
// Sum up diffuse/specular contributions
>
for(int i = rays.begin(); i < rays.end(); i++){
>
Color result;
>
- for(int j=0;j<Color::NumComponents;j++)
>
- result[j] = diffuse.colordata[j][i] * total[j][i];
>
+ for(int j=0;j<Color::NumComponents;j++) {
>
+ // For now we just average the AO term with the other shadow terms.
>
+ // Something more clever should probably be done.
>
+ result[j] = diffuse.colordata[j][i] *
>
.5*(AOopacities[i]+total[j][i]);
>
+ }
>
rays.setColor(i, result);
>
}
>
}
>
>
Modified: trunk/Model/Materials/AmbientOcclusion.h
>
==============================================================================
>
--- trunk/Model/Materials/AmbientOcclusion.h (original)
>
+++ trunk/Model/Materials/AmbientOcclusion.h Tue Jun 29 18:35:01 2010
>
@@ -28,7 +28,11 @@
>
private:
>
const Texture<Color>* colortex;
>
float cutoff;
>
- std::vector<Vector> directions;
>
+
>
+ // increase the numPermutations past 1 to remove structural aliasing
>
at the
>
+ // expense of a performance loss and temporal aliasing.
>
+ static const int numPermutations = 13;
>
+ std::vector<Real> directions[numPermutations][3];
>
ColorComponent inv_num_directions;
>
};
>
}
- [Manta] Re: r2445 - in trunk: Interface Model/Materials, Carson Brownlee, 06/29/2010
Archive powered by MHonArc 2.6.16.