Text archives Help
- From: thiago@sci.utah.edu
- To: manta@sci.utah.edu
- Subject: [MANTA] r1626 - trunk/Model/Cameras
- Date: Sat, 11 Aug 2007 18:21:07 -0600 (MDT)
Author: thiago
Date: Sat Aug 11 18:21:07 2007
New Revision: 1626
Modified:
trunk/Model/Cameras/PinholeCamera.cc
Log:
Added the computation of corner rays to the creation of normalized
camera rays. This makes things that use SSE WaldTriangles (and square
ray packets) faster. For example, the conference scene rendered with
DynBVH becomes about 5% faster. All other primitives could also use
the corner rays to do frustum culling and get similar performance
improvements. I (or someone else) still need to add this to the
non-normalized camera ray generation. If someone thinks this code
should go somewhere else (cleaner without being slower to generate the
corner rays), let me know.
Modified: trunk/Model/Cameras/PinholeCamera.cc
==============================================================================
--- trunk/Model/Cameras/PinholeCamera.cc (original)
+++ trunk/Model/Cameras/PinholeCamera.cc Sat Aug 11 18:21:07 2007
@@ -14,6 +14,7 @@
#include <Core/Util/Assert.h>
#include <MantaSSE.h>
#include <iostream>
+#include <limits>
using namespace Manta;
using namespace std;
@@ -149,6 +150,16 @@
if(normalizeRays) {
#ifdef MANTA_SSE
+
+ //we need to find the max ray extents so that we can calculate the
+ //corner rays. These are used by the WaldTriangle intersector and
+ //CGT acceleration structure to do frustum culling/traversal, and
+ //possibly elsewhere.
+ float min_v = std::numeric_limits<float>::max();
+ float max_v = -std::numeric_limits<float>::max();
+ float min_u = std::numeric_limits<float>::max();
+ float max_u = -std::numeric_limits<float>::max();
+
int b = (rays.rayBegin + 3) & (~3);
int e = rays.rayEnd & (~3);
if(b >= e){
@@ -156,6 +167,12 @@
Vector raydir(v*rays.getImageCoordinates(i,
0)+u*rays.getImageCoordinates(i, 1)+direction);
raydir.normalize();
rays.setRay(i, eye, raydir);
+
+ //find max ray extents
+ min_v = SCIRun::Min(min_v, rays.getImageCoordinates(i, 0));
+ max_v = SCIRun::Max(max_v, rays.getImageCoordinates(i, 0));
+ min_u = SCIRun::Min(min_u, rays.getImageCoordinates(i, 1));
+ max_u = SCIRun::Max(max_u, rays.getImageCoordinates(i, 1));
}
} else {
int i = rays.rayBegin;
@@ -163,9 +180,32 @@
Vector raydir(v*rays.getImageCoordinates(i,
0)+u*rays.getImageCoordinates(i, 1)+direction);
raydir.normalize();
rays.setRay(i, eye, raydir);
+
+ //find max ray extents
+ min_v = SCIRun::Min(min_v, rays.getImageCoordinates(i, 0));
+ max_v = SCIRun::Max(max_v, rays.getImageCoordinates(i, 0));
+ min_u = SCIRun::Min(min_u, rays.getImageCoordinates(i, 1));
+ max_u = SCIRun::Max(max_u, rays.getImageCoordinates(i, 1));
+ }
+ for(i=e;i<rays.rayEnd;i++){
+ Vector raydir(v*rays.getImageCoordinates(i,
0)+u*rays.getImageCoordinates(i, 1)+direction);
+ raydir.normalize();
+ rays.setRay(i, eye, raydir);
+
+ //find max ray extents
+ min_v = SCIRun::Min(min_v, rays.getImageCoordinates(i, 0));
+ max_v = SCIRun::Max(max_v, rays.getImageCoordinates(i, 0));
+ min_u = SCIRun::Min(min_u, rays.getImageCoordinates(i, 1));
+ max_u = SCIRun::Max(max_u, rays.getImageCoordinates(i, 1));
}
+
+ sse_t min_vs = set4(min_v);
+ sse_t max_vs = set4(max_v);
+ sse_t min_us = set4(min_u);
+ sse_t max_us = set4(max_u);
+
RayPacketData* data = rays.data;
- for(;i<e;i+=4){
+ for(i=b;i<e;i+=4){
__m128 imagev = _mm_load_ps(&data->image[0][i]);
__m128 imageu = _mm_load_ps(&data->image[1][i]);
__m128 xd = _mm_add_ps(_mm_add_ps(_mm_mul_ps(_mm_set1_ps(v[0]),
imagev), _mm_mul_ps(_mm_set1_ps(u[0]), imageu)), _mm_set1_ps(direction[0]));
@@ -182,12 +222,30 @@
_mm_store_ps(&data->origin[0][i], _mm_set1_ps(eye[0]));
_mm_store_ps(&data->origin[1][i], _mm_set1_ps(eye[1]));
_mm_store_ps(&data->origin[2][i], _mm_set1_ps(eye[2]));
+
+ min_vs = min4(min_vs, imagev);
+ max_vs = max4(max_vs, imagev);
+ min_us = min4(min_us, imageu);
+ max_us = max4(max_us, imageu);
}
- for(;i<rays.rayEnd;i++){
- Vector raydir(v*rays.getImageCoordinates(i,
0)+u*rays.getImageCoordinates(i, 1)+direction);
- raydir.normalize();
- rays.setRay(i, eye, raydir);
- }
+
+ min_v = min4f(min_vs);
+ max_v = max4f(max_vs);
+ min_u = min4f(min_us);
+ max_u = max4f(max_us);
+ const sse_t imageu = set44(max_u, max_u, min_u, min_u);
+ const sse_t imagev = set44(max_v, min_v, max_v, min_v);
+ const sse_t xd = add4(add4(mul4(set4(v[0]), imagev), mul4(set4(u[0]),
imageu)), set4(direction[0]));
+ const sse_t yd = add4(add4(mul4(set4(v[1]), imagev), mul4(set4(u[1]),
imageu)), set4(direction[1]));
+ const sse_t zd = add4(add4(mul4(set4(v[2]), imagev), mul4(set4(u[2]),
imageu)), set4(direction[2]));
+ const sse_t length2 = add4(add4(mul4(xd, xd), mul4(yd, yd)), mul4(zd,
zd));
+ sse_t scale = _mm_rsqrt_ps(length2);
+ // Do one newton-raphson iteration to get the accuracy we need
+ scale = mul4(mul4(scale, _mm_sub_ps(set4(3.f), mul4(length2,
mul4(scale, scale)))), set4(0.5f));
+ data->corner_dir[0] = mul4(xd, scale);
+ data->corner_dir[1] = mul4(yd, scale);
+ data->corner_dir[2] = mul4(zd, scale);
+ rays.setFlag(RayPacket::HaveCornerRays);
}
#else
for(int i = rays.begin(); i < rays.end(); i++){
- [MANTA] r1626 - trunk/Model/Cameras, thiago, 08/11/2007
Archive powered by MHonArc 2.6.16.