Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r508 - in branches/AFR: Engine/Control Engine/Display Engine/ImageTraversers Engine/ImageTraversers/AFR Interface


Chronological Thread 
  • From: abe@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r508 - in branches/AFR: Engine/Control Engine/Display Engine/ImageTraversers Engine/ImageTraversers/AFR Interface
  • Date: Sat, 27 Aug 2005 00:57:33 -0600 (MDT)

Author: abe
Date: Sat Aug 27 00:57:32 2005
New Revision: 508

Modified:
   branches/AFR/Engine/Control/AFRPipeline.cc
   branches/AFR/Engine/Display/FileDisplay.cc
   branches/AFR/Engine/Display/FileDisplay.h
   branches/AFR/Engine/Display/NullDisplay.cc
   branches/AFR/Engine/Display/NullDisplay.h
   branches/AFR/Engine/Display/OpenGLDisplay.cc
   branches/AFR/Engine/Display/OpenGLDisplay.h
   branches/AFR/Engine/ImageTraversers/AFImageTraverser.cc
   branches/AFR/Engine/ImageTraversers/AFImageTraverser.h
   branches/AFR/Engine/ImageTraversers/AFR/kdtree.cc
   branches/AFR/Engine/ImageTraversers/CMakeLists.txt
   branches/AFR/Interface/ImageDisplay.h
Log:


Added a conditional around the barrier at the top of AFRPipeline. This 
increases the sample throughput 2x to 5x depending 
on how long the inner loop is allowed to run. 

Scaling in terms of samples/second appears to be nearly linear on fisher. 

Added -inner <seconds> arg to afr image traverser which is the default for 
the AFRPipeline.

M    Engine/Control/AFRPipeline.cc
M    Engine/ImageTraversers/AFR/kdtree.cc
M    Engine/ImageTraversers/AFImageTraverser.cc
M    Engine/ImageTraversers/CMakeLists.txt
M    Engine/ImageTraversers/AFImageTraverser.h

Added a third arg "Real performance" to ImageDisplay::displayImage this 
allows the pipeline to pass any performance value
into the image display (like samples instead of just fps). This value will be 
divided by the time since the last image display and output on the image.

M    Interface/ImageDisplay.h
M    Engine/Display/OpenGLDisplay.h
M    Engine/Display/NullDisplay.cc
M    Engine/Display/NullDisplay.h
M    Engine/Display/FileDisplay.cc
M    Engine/Display/FileDisplay.h
M    Engine/Display/OpenGLDisplay.cc

This is only the beginning of the performance analysis of the sampler.



Modified: branches/AFR/Engine/Control/AFRPipeline.cc
==============================================================================
--- branches/AFR/Engine/Control/AFRPipeline.cc  (original)
+++ branches/AFR/Engine/Control/AFRPipeline.cc  Sat Aug 27 00:57:32 2005
@@ -251,9 +251,13 @@
       name << "AFRPipeline Worker " << i;
                        
                        // Construct the thread.
-      Thread* t = new Thread(new Manta::Worker(this, i, false), 
name.str().c_str(), 0, Thread::NotActivated);
+      Thread* t = new Thread(new Manta::Worker(this, i, false), 
+                             name.str().c_str(), 0, 
+                             Thread::NotActivated);
+                             
       t->setStackSize(RENDER_THREAD_STACKSIZE);
       t->activate(false);
+      
       workers[i] = t;
     }
   }
@@ -282,6 +286,8 @@
   bool changed = true;
   bool firstFrame = true;
   
+  Real last_samples = 0.0;
+  
        //if(lateComerFlag){
   //  firstFrame = false;
   //  goto skipToRendering;
@@ -316,8 +322,14 @@
     // Start of non-rendering portion of the loop. Make callbacks
     // that could possibly change state and get everything set up to
     // render the next frame
-    outer_loop_barrier.wait(workersRendering);
-               
+    
+    // Check to see if the barrier is necessary??
+    if ( transactions.size() || pipelineNeedsSetup || firstFrame ) {
+    
+      outer_loop_barrier.wait(workersRendering);
+               }
+    firstFrame = false;
+    
     // Copy over the frame state
     if(proc == 0) {
       renderFrameState = animFrameState;
@@ -338,6 +350,8 @@
     if(proc == 0)
       postTransactions(changed);
     
+    // What is "changed" used for?
+    
                
///////////////////////////////////////////////////////////////////////////
     // Resize the image. rebuild per thread kd-trees.
                // Note all threads call this, which is different from trunk/
@@ -360,8 +374,6 @@
                                resizeImages(renderFrameState.frameNumber);
                        }
                        
-                       // Perhaps KD-tree creation goes here??
-                       
                        // Wait for all of the other processors to finish 
setup.
                        pipeline_setup_barrier.wait(numProcs);
                        
@@ -382,55 +394,66 @@
                        Image* image = channel->images[displayFrame];
                        
                        if(image && image->isValid()){
+        Real current_samples = currentImageTraverser->getSamplesDone();
+      
                                DisplayContext myContext(proc, 
workersAnimAndImage);
-                               channel->display->displayImage(myContext, 
image);
+                               channel->display->displayImage(myContext, 
image, (current_samples-last_samples) );
+        
+        last_samples = current_samples;
                        }
+      
+      // std::cout << "Total samples: " << 
currentImageTraverser->getSamplesDone() << std::endl;
                }
                
+    /////////////////////////////////////////////////////////////////////////
+    // Pre-render callbacks
+    doParallelPreRenderCallbacks(proc, workersRendering);
+    doSerialPreRenderCallbacks(proc, workersRendering);                    
+    
 // skipToRendering:
-               // for (;;) {
+    /////////////////////////////////////////////////////////////////////////
+    /////////////////////////////////////////////////////////////////////////
+    // Inner Loop
+    /////////////////////////////////////////////////////////////////////////
+    /////////////////////////////////////////////////////////////////////////
+    int inner_iterations = 0;
+               for (double begin_time=Time::currentSeconds();
+         
(Time::currentSeconds()-begin_time)<currentImageTraverser->inner_loop_time;) {
                
-                       
/////////////////////////////////////////////////////////////////////////
-                       
/////////////////////////////////////////////////////////////////////////
-                       // Inner Loop: Pipeline Stage 1. 
-                       // Update Tiling. (Master task)
-                       
/////////////////////////////////////////////////////////////////////////
-                       
/////////////////////////////////////////////////////////////////////////
-                       
-      
+      ++inner_iterations;
     
-                       
/////////////////////////////////////////////////////////////////////////
-                       // Pre-render callbacks
-                       doParallelPreRenderCallbacks(proc, workersRendering);
-                       doSerialPreRenderCallbacks(proc, workersRendering);   
          
-
-                       
/////////////////////////////////////////////////////////////////////////
-                       
/////////////////////////////////////////////////////////////////////////
-                       // Inner Loop: Pipeline Stage 2. 
-                       // Sampling / Rendering
-                       
/////////////////////////////////////////////////////////////////////////
-                       
/////////////////////////////////////////////////////////////////////////
-                       
-                       for(int index = 0;index < 
static_cast<int>(channels.size());index++){
+      for(int index = 0;index < static_cast<int>(channels.size());index++){
         
-                               Channel* channel = channels[index];
-                               long renderFrame = 
renderFrameState.frameNumber%channel->pipelineDepth;
-                               Image* image = channel->images[renderFrame];
-                               RenderContext myContext(this, index, proc, 
workersRendering, &renderFrameState,
-                                                                             
                                                  currentLoadBalancer, 
currentPixelSampler,
-                                                                             
                                                  currentRenderer, 
currentShadowAlgorithm,
-                                                                             
                                                  channel->camera, scene);
-
-
+        Channel* channel = channels[index];
+        long renderFrame = 
renderFrameState.frameNumber%channel->pipelineDepth;
+        Image* image = channel->images[renderFrame];
+        RenderContext myContext(this, index, proc, workersRendering, 
&renderFrameState,
+                                currentLoadBalancer, currentPixelSampler,
+                                currentRenderer, currentShadowAlgorithm,
+                                channel->camera, scene);
+    
+        
///////////////////////////////////////////////////////////////////////
+        
///////////////////////////////////////////////////////////////////////
+        // Inner Loop: Pipeline Stage 1. 
+        // Update Tiling. (Master task)
+        
///////////////////////////////////////////////////////////////////////
+        
///////////////////////////////////////////////////////////////////////
+        currentImageTraverser->masterTask( myContext, image );
+        
+        
///////////////////////////////////////////////////////////////////////
+        
///////////////////////////////////////////////////////////////////////
+        // Inner Loop: Pipeline Stage 2. 
+        // Sampling / Rendering
+        
///////////////////////////////////////////////////////////////////////
+        
///////////////////////////////////////////////////////////////////////
+        currentImageTraverser->clientTask( myContext, image );
         
-          currentImageTraverser->masterTask( myContext, image );
-          currentImageTraverser->clientTask( myContext, image );
-         image->setValid(true);
+        image->setValid(true);
                        }
                        
                        // Determine how to break out of inner loop. 
!!!!!!!!!!!!!!!!!!!!!!!!!!
                        
-               // } // End of inner loop.
+               } // End of inner loop.
     
   } // End of outer loop.
 }

Modified: branches/AFR/Engine/Display/FileDisplay.cc
==============================================================================
--- branches/AFR/Engine/Display/FileDisplay.cc  (original)
+++ branches/AFR/Engine/Display/FileDisplay.cc  Sat Aug 27 00:57:32 2005
@@ -42,7 +42,8 @@
 
 void FileDisplay::displayImage(
   const DisplayContext &context,
-  const Image *image )
+  const Image *image,
+  Real performance )
 {
   bool stereo;
   int xres, yres;

Modified: branches/AFR/Engine/Display/FileDisplay.h
==============================================================================
--- branches/AFR/Engine/Display/FileDisplay.h   (original)
+++ branches/AFR/Engine/Display/FileDisplay.h   Sat Aug 27 00:57:32 2005
@@ -16,7 +16,7 @@
     virtual ~FileDisplay();
     virtual void setupDisplayChannel(SetupContext&);
     virtual void displayImage(const DisplayContext& context,
-                             const Image* image);
+                             const Image* image, Real performance = 1.0);
     static ImageDisplay* create(const vector<string>& args);
   protected:
     string baseName;

Modified: branches/AFR/Engine/Display/NullDisplay.cc
==============================================================================
--- branches/AFR/Engine/Display/NullDisplay.cc  (original)
+++ branches/AFR/Engine/Display/NullDisplay.cc  Sat Aug 27 00:57:32 2005
@@ -24,6 +24,6 @@
 {
 }
 
-void NullDisplay::displayImage(const DisplayContext&, const Image*)
+void NullDisplay::displayImage(const DisplayContext&, const Image*, Real 
performance)
 {
 }

Modified: branches/AFR/Engine/Display/NullDisplay.h
==============================================================================
--- branches/AFR/Engine/Display/NullDisplay.h   (original)
+++ branches/AFR/Engine/Display/NullDisplay.h   Sat Aug 27 00:57:32 2005
@@ -16,7 +16,7 @@
     virtual ~NullDisplay();
     virtual void setupDisplayChannel(SetupContext&);
     virtual void displayImage(const DisplayContext& context,
-                             const Image* image);
+                             const Image* image, Real performance = 1.0);
     static ImageDisplay* create(const vector<string>& args);
   private:
     NullDisplay(const NullDisplay&);

Modified: branches/AFR/Engine/Display/OpenGLDisplay.cc
==============================================================================
--- branches/AFR/Engine/Display/OpenGLDisplay.cc        (original)
+++ branches/AFR/Engine/Display/OpenGLDisplay.cc        Sat Aug 27 00:57:32 
2005
@@ -215,7 +215,7 @@
 }
 
 void OpenGLDisplay::displayImage(const DisplayContext& context,
-                                const Image* image)
+                                const Image* image, Real performance )
 {
   if(context.proc != displayProc%context.numProcs)
     return;
@@ -307,7 +307,7 @@
                         __FILE__, __LINE__);
   }
 
-  display_frame_rate(1.0/(currentTime-last_frame_time));
+  display_frame_rate(performance/(currentTime-last_frame_time));
   last_frame_time = currentTime;
   
   glXSwapBuffers(dpy, win);
@@ -326,7 +326,7 @@
   // Display textual information on the screen:
   char buf[200];
   if (framerate > 1)
-    sprintf( buf, "%3.1lf fps", framerate);
+    sprintf( buf, "%3.1lf samples/second", framerate);
   else
     sprintf( buf, "%2.2lf fps - %3.1lf spf", framerate , 1.0f/framerate);
   // Figure out how wide the string is

Modified: branches/AFR/Engine/Display/OpenGLDisplay.h
==============================================================================
--- branches/AFR/Engine/Display/OpenGLDisplay.h (original)
+++ branches/AFR/Engine/Display/OpenGLDisplay.h Sat Aug 27 00:57:32 2005
@@ -18,7 +18,7 @@
     OpenGLDisplay(const vector<string>& args);
     virtual ~OpenGLDisplay();
     virtual void setupDisplayChannel(SetupContext&);
-    virtual void displayImage(const DisplayContext& context, const Image* 
image);
+    virtual void displayImage(const DisplayContext& context, const Image* 
image, Real performance = 1.0);
     static ImageDisplay* create(const vector<string>& args);
   private:
     OpenGLDisplay(const OpenGLDisplay&);

Modified: branches/AFR/Engine/ImageTraversers/AFImageTraverser.cc
==============================================================================
--- branches/AFR/Engine/ImageTraversers/AFImageTraverser.cc     (original)
+++ branches/AFR/Engine/ImageTraversers/AFImageTraverser.cc     Sat Aug 27 
00:57:32 2005
@@ -16,6 +16,7 @@
 #include <Core/Thread/Mutex.h>
 #include <Core/XUtils/XHelper.h>
 
+
 #include <SCIRun/Core/Thread/Time.h>
 #include <cmath>
 
@@ -44,9 +45,17 @@
   return new AFImageTraverser(args);
 }
 
-AFImageTraverser::AFImageTraverser(const vector<string>& args)
+AFImageTraverser::AFImageTraverser(const vector<string>& args) :
+  myRandomNumber( 0 ),
+  inner_loop_time( 0.06 )
 {
-  myRandomNumber = NULL;
+  for (int i=0;i<args.size();++i) {
+    if (args[i] == "-inner") {
+      if (!getArg( i, args, inner_loop_time )) {
+        throw IllegalArgument("-inner <Real>", i, args);
+      }
+    }
+  }
 }
 
 AFImageTraverser::~AFImageTraverser()
@@ -72,7 +81,15 @@
   int xres,yres;
   num_clients = context.numProcs;
   samplingrate = 400000; // let us for now assume something realistic
-  samples_done = new unsigned int[num_clients];
+  
+  // Samples done.
+#ifdef __ia64__
+  samples_done = (SamplesDone *)memalign( 128, 
sizeof(SamplesDone)*num_clients );
+#else
+  samples_done = (SamplesDone *)malloc( sizeof(SamplesDone)*num_clients );
+#endif
+  memset( samples_done, 0x0, sizeof(SamplesDone)*num_clients );
+  
   initpass = new bool[num_clients];
   chunkTimeStamp = new double[num_clients];
 
@@ -96,7 +113,8 @@
   kdtree = new KDTree[num_clients];
   for(i=0; i<num_clients; i++)
   {
-    samples_done[i] = 0;
+    // samples_done[i] = 0;
+    
     initpass[i] = false;
     chunkTimeStamp[i] = 0.0;
     kdtree[i].setAB(xres, yres, samplingrate);
@@ -173,6 +191,7 @@
   // Trace the rays.  The results will automatically go into the fragment  
   context.renderer->traceEyeRays(context, rays);
   rays.computeHitPositions();
+  
   // okay now copy from fragment to temporalQ
   for(int i=0;i<size;i++) {
     RayPacket::Element& re = rays.get(i);
@@ -212,7 +231,8 @@
   
   // add the sample to the corresponding kdtree
   kdtree[context.proc].updateStatsAddSampleSet(&ss, 
(float)chunkTimeStamp[context.proc], samplingrate);
-  samples_done[context.proc] += 5;
+  // samples_done[context.proc] += 5;
+  samples_done[context.proc].addFive();
 }
 
 
@@ -232,6 +252,7 @@
   int fsize = chunk_size;
   Color color[RayPacket::MaxSize];
   Sample newSample[RayPacket::MaxSize];
+  
   
/////////////////////////////////////////////////////////////////////////////
   // Create ray packets.
   for(int f=0;f<fsize;f+=RayPacket::MaxSize) {
@@ -267,7 +288,9 @@
           py = (double)(-1.0 + 2.0*(double)(cy)/(double)xres);
         }  
                                
-      samples_done[context.proc] ++;
+      // samples_done[context.proc] ++;
+      samples_done[context.proc].addOne();
+      
       // Specify the position and color pointer for the packet element.
       rays.setPixel(i, 0, px, py, &color[i]);
     }
@@ -282,6 +305,7 @@
     
///////////////////////////////////////////////////////////////////////////
     // okay now copy from fragment to temporalQ
     for(int i=0;i<size;i++) {
+    
       RayPacket::Element& re = rays.get(i);
       RGBColor tempcol = color[i].convertRGB();      
       newSample[i].c[0] = tempcol.r();
@@ -291,11 +315,14 @@
       newSample[i].worldCoord[1] = re.hitPosition.y();
       newSample[i].worldCoord[2] = re.hitPosition.z();
       // newSample[i].print();
+      
       
/////////////////////////////////////////////////////////////////////////////
       // Skip reconstruction and set the image pixel.
       image->set((int)(newSample[i].viewCoord[0]), 
(int)(newSample[i].viewCoord[1]), color[i]);
+      
       // add sample to the corresponding kdtree
       kdtree[myID].updateStatsAddSample(&newSample[i], 
(float)chunkTimeStamp[myID], samplingrate, 0.0, false);
+      
       // add this sample into the temporalQ
       temporalQ[myID].qInsert(&newSample[i]);
     }
@@ -314,11 +341,11 @@
   int xres, yres;
   image->getResolution(stereo, xres, yres);
   
-  chunkTimeStamp[context.proc] = 
(float)samples_done[context.proc]/(float)samplingrate;//Time::currentSeconds();
+  chunkTimeStamp[context.proc] = 
(float)samples_done[context.proc].value/(float)samplingrate;//Time::currentSeconds();
   // return during intialization here 
   if(!initpass[context.proc])
   {
-    if(samples_done[context.proc] >= xres*yres)  initpass[context.proc] = 
true;
+    if(samples_done[context.proc].value >= xres*yres)  
initpass[context.proc] = true;
   }
   else 
   {

Modified: branches/AFR/Engine/ImageTraversers/AFImageTraverser.h
==============================================================================
--- branches/AFR/Engine/ImageTraversers/AFImageTraverser.h      (original)
+++ branches/AFR/Engine/ImageTraversers/AFImageTraverser.h      Sat Aug 27 
00:57:32 2005
@@ -12,6 +12,11 @@
 #include <Engine/ImageTraversers/AFR/CQ.h>
 #include <Engine/ImageTraversers/AFR/sample.h>
 
+#ifdef __ia64__
+#include <malloc.h>
+#include <ia64intrin.h>
+#endif
+
 namespace Manta {
 
   using namespace std;
@@ -20,7 +25,11 @@
 
                enum FrameType { EVEN_FRAME, ODD_FRAME };
                class AFImageTraverser : public ImageTraverser {
+    
+      friend class AFRPipeline;
+    
                public:
+    
                        AFImageTraverser(const vector<string>& args);
                        virtual ~AFImageTraverser();
                        virtual void setupBegin(SetupContext&, int 
numChannels);
@@ -62,11 +71,42 @@
                        int num_clients;
                        int chunk_size;
                        int samplingrate;
-                       unsigned int *samples_done;
+      
+      
/////////////////////////////////////////////////////////////////////////
+      // Samples done.
+      struct SamplesDone {
+        unsigned int value;
+        unsigned int padding[31];
+#ifdef __ia64__
+        inline void addOne()  { __fetchadd4_acq( &value, 1 ); };
+        inline void addFive() { addOne(); __fetchadd4_acq( &value, 4 ); };
+        inline unsigned int load()    { return __ld4_acq( &value ); };
+#else
+        inline void addOne()  { value++; };
+        inline void addFive() { addOne(); value+=5; };
+        inline unsigned int load()    { return value; };
+#endif
+      };
+                       SamplesDone *samples_done;
                        
-                       // ??????????????
+      // ??????????????
                        double *chunkTimeStamp;
                        bool *initpass;
+      
+      
/////////////////////////////////////////////////////////////////////////
+      // Arguments.
+      Real inner_loop_time;
+      
+    public:
+    
+      // Sum up all of the samples done by all of the threads.
+      inline unsigned int getSamplesDone() {
+        unsigned int total = 0;
+        for (int i=0;i<num_clients;++i) {
+          total += samples_done[i].load();
+        }
+        return total;
+      };
                };
 
        };

Modified: branches/AFR/Engine/ImageTraversers/AFR/kdtree.cc
==============================================================================
--- branches/AFR/Engine/ImageTraversers/AFR/kdtree.cc   (original)
+++ branches/AFR/Engine/ImageTraversers/AFR/kdtree.cc   Sat Aug 27 00:57:32 
2005
@@ -44,15 +44,15 @@
                tsize/=2;
        }
        numLevels -= 2;
-       cout << "numLevels = " << numLevels << endl;
+       /// cout << "numLevels = " << numLevels << endl;
        int i;
        totalNodes = (int)(pow(2.0, numLevels+1));
-  cout << "totalNodes = " << totalNodes << endl;
+  /// cout << "totalNodes = " << totalNodes << endl;
        tile = new Tile[totalNodes]; // allocate memory to our array based 
tree
        // now we will assign the indices
 
        Tile::leafMapping = (int**)malloc((width/2)*sizeof(int*));
-  cout << "leafmapping ranges until: " << (width/2) << ", " << (height/2) << 
endl;
+  /// cout << "leafmapping ranges until: " << (width/2) << ", " << 
(height/2) << endl;
        for(i=0; i<width/2; i++)
        {
                Tile::leafMapping[i] = (int*)malloc((height/2)*sizeof(int));

Modified: branches/AFR/Engine/ImageTraversers/CMakeLists.txt
==============================================================================
--- branches/AFR/Engine/ImageTraversers/CMakeLists.txt  (original)
+++ branches/AFR/Engine/ImageTraversers/CMakeLists.txt  Sat Aug 27 00:57:32 
2005
@@ -6,6 +6,7 @@
      ImageTraversers/DissolveImageTraverser.cc
      ImageTraversers/DissolveTiledImageTraverser.cc
 
+     ImageTraversers/AFImageTraverser.h
      ImageTraversers/AFImageTraverser.cc
      ImageTraversers/AFR/CQ.h
      ImageTraversers/AFR/evil.h

Modified: branches/AFR/Interface/ImageDisplay.h
==============================================================================
--- branches/AFR/Interface/ImageDisplay.h       (original)
+++ branches/AFR/Interface/ImageDisplay.h       Sat Aug 27 00:57:32 2005
@@ -2,6 +2,8 @@
 #ifndef Manta_Interface_ImageDisplay_h
 #define Manta_Interface_ImageDisplay_h
 
+#include <MantaTypes.h>
+
 namespace Manta {
   class SetupContext;
   class DisplayContext;
@@ -11,7 +13,7 @@
     virtual ~ImageDisplay();
     virtual void setupDisplayChannel(SetupContext&) = 0;
     virtual void displayImage(const DisplayContext& context,
-                             const Image* image) = 0;
+                             const Image* image, Real performance = 0.0) = 0;
   protected:
     ImageDisplay();
   private:




  • [MANTA] r508 - in branches/AFR: Engine/Control Engine/Display Engine/ImageTraversers Engine/ImageTraversers/AFR Interface, abe, 08/27/2005

Archive powered by MHonArc 2.6.16.

Top of page