Manta Interactive Ray Tracer Development Mailing List

Text archives Help


[MANTA] r1238 - trunk/Model/Primitives


Chronological Thread 
  • From: bigler@sci.utah.edu
  • To: manta@sci.utah.edu
  • Subject: [MANTA] r1238 - trunk/Model/Primitives
  • Date: Mon, 27 Nov 2006 13:50:11 -0700 (MST)

Author: bigler
Date: Mon Nov 27 13:50:10 2006
New Revision: 1238

Modified:
   trunk/Model/Primitives/Parallelogram.cc
Log:

Fixes to make the texture coordinates saved during SSE blocks of
code.  There are currently two versions.  The first uses the
scratchpad and second copies the values directly to the texture
coordinates.

Also makes use of the new RayPacket::hitWithoutTminCheck function.


Modified: trunk/Model/Primitives/Parallelogram.cc
==============================================================================
--- trunk/Model/Primitives/Parallelogram.cc     (original)
+++ trunk/Model/Primitives/Parallelogram.cc     Mon Nov 27 13:50:10 2006
@@ -9,6 +9,13 @@
 using namespace Manta;
 using SCIRun::Abs;
 
+#define USE_SCRATCHPAD 0
+
+#define maskedStore_ps(mask, oldD, newD)                            \
+  _mm_store_ps(oldD,                                                \
+               _mm_or_ps(_mm_and_ps(mask, newD),                    \
+                         _mm_andnot_ps(mask, _mm_load_ps(oldD))))
+
 Parallelogram::Parallelogram(Material* material, const Vector& anchor,
                              const Vector& in_v1, const Vector& in_v2)
   : PrimitiveCommon(material, this), anchor(anchor), v1(in_v1), v2(in_v2)
@@ -76,7 +83,11 @@
           continue;
 
         if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
           rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
       }
     } else {
       int i = rays.rayBegin;
@@ -96,7 +107,11 @@
           continue;
 
         if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
           rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
       }
       RayPacketData* data = rays.data;
       __m128 normalx = _mm_set1_ps(normal[0]);
@@ -119,53 +134,64 @@
         if(_mm_movemask_ps(hit) == 0)
           continue;
 
+        // Real t=num/dt
+        // if(t>rays.getMinT(i))
+        //   continue;
+        // Vector vi(rays.getDirection(i)*t);
+        // Real a1 = Dot(v1, vi)*o1;
         __m128 vix = _mm_mul_ps(dx, t);
         __m128 viy = _mm_mul_ps(dy, t);
         __m128 viz = _mm_mul_ps(dz, t);
         __m128 a1 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(vix, 
_mm_set1_ps(v1[0])), _mm_mul_ps(viy, _mm_set1_ps(v1[1]))), _mm_mul_ps(viz, 
_mm_set1_ps(v1[2]))), vec_o1);
+        // if (a1 < 0 || a1 > 1)
+        //   continue;
         __m128 zero = _mm_setzero_ps();
         __m128 one = _mm_set1_ps(1.0f);
         hit = _mm_and_ps(hit, _mm_and_ps(_mm_cmpge_ps(a1, zero), 
_mm_cmple_ps(a1, one)));
         if(_mm_movemask_ps(hit) == 0)
           continue;
 
+        // Real a2 = Dot(v2, vi)+o2;
+        // if (a2 < 0 || a2 > 1)
+        //   continue;
+        
         __m128 a2 = _mm_add_ps(_mm_add_ps(_mm_add_ps(_mm_mul_ps(vix, 
_mm_set1_ps(v2[0])), _mm_mul_ps(viy, _mm_set1_ps(v2[1]))), _mm_mul_ps(viz, 
_mm_set1_ps(v2[2]))), vec_o2);
         hit = _mm_and_ps(hit, _mm_and_ps(_mm_cmpge_ps(a2, zero), 
_mm_cmple_ps(a2, one)));
         if(_mm_movemask_ps(hit) == 0)
           continue;
 
+        rays.hitWithoutTminCheck(i, hit, t, getMaterial(), this, 
getTexCoordMapper());
+
+#if USE_SCRATCHPAD
+        // Copy the barycentric coordinates to the scratch pad
+        MANTA_ALIGN(16) float ra1[4];
+        MANTA_ALIGN(16) float ra2[4];
+        _mm_store_ps(ra1, a1);
+        _mm_store_ps(ra2, a2);
+          
         if(_mm_movemask_ps(hit) == 15){
-          _mm_store_ps(&data->minT[i], t);
-#ifdef __x86_64
-          _mm_store_si128((__m128i*)&data->hitMatl[i], _mm_set1_epi64x((long 
long)getMaterial()));
-          _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long long)getMaterial()));
-          _mm_store_si128((__m128i*)&data->hitPrim[i], _mm_set1_epi64x((long 
long)this));
-          _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long long)this));
-          _mm_store_si128((__m128i*)&data->hitTex[i], _mm_set1_epi64x((long 
long)getTexCoordMapper()));
-          _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long long)getTexCoordMapper()));
-#else
-          _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
-          _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
-          _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
-#endif
+          rays.scratchpad<Vector>(i+0) = Vector(ra1[0], ra2[0], 0);
+          rays.scratchpad<Vector>(i+1) = Vector(ra1[1], ra2[1], 0);
+          rays.scratchpad<Vector>(i+2) = Vector(ra1[2], ra2[2], 0);
+          rays.scratchpad<Vector>(i+3) = Vector(ra1[3], ra2[3], 0);
         } else {
-          _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t), 
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
-#ifdef __x86_64
-         __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit, 
hit));
-         __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit, 
hit));
-         _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
lohit, (char*)&data->hitMatl[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
hihit, (char*)&data->hitMatl[i+2]);
-
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), lohit, 
(char*)&data->hitPrim[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), hihit, 
(char*)&data->hitPrim[i+2]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
-#else
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
-#endif
+          int hit_mask = _mm_movemask_ps(hit);
+          for(int j = 0; j < 4; ++j) {
+            if (hit_mask & (1 << j))
+              rays.scratchpad<Vector>(i+j) = Vector(ra1[j], ra2[j], 0);
+          }
         }
+#else
+        if (_mm_movemask_ps(hit) == 15) {
+          _mm_store_ps(&data->texCoords[0][i], a1);
+          _mm_store_ps(&data->texCoords[1][i], a2);
+          _mm_store_ps(&data->texCoords[2][i], _mm_setzero_ps());
+        } else {
+          maskedStore_ps(hit, (float*)&data->texCoords[0][i], a1);
+          maskedStore_ps(hit, (float*)&data->texCoords[1][i], a2);
+          maskedStore_ps(hit, (float*)&data->texCoords[2][i], 
_mm_setzero_ps());
+        }
+#endif
       }
       for(;i<rays.rayEnd;i++){
         Real dt=Dot(rays.getDirection(i), normal);
@@ -183,7 +209,11 @@
           continue;
 
         if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
           rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
       }
     }
 #else
@@ -203,7 +233,11 @@
         continue;
 
       if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
-        rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#if USE_SCRATCHPAD
+          rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
     }
 #endif
   } else {
@@ -230,7 +264,11 @@
           continue;
 
         if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
           rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
       }
     } else {
       int i = rays.rayBegin;
@@ -253,7 +291,11 @@
           continue;
 
         if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
           rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
       }
       RayPacketData* data = rays.data;
       __m128 normalx = _mm_set1_ps(normal[0]);
@@ -298,38 +340,38 @@
         if(_mm_movemask_ps(hit) == 0)
           continue;
 
+        rays.hitWithoutTminCheck(i, hit, t, getMaterial(), this, 
getTexCoordMapper());
+
+#if USE_SCRATCHPAD
+        // Copy the barycentric coordinates to the scratch pad
+        MANTA_ALIGN(16) float ra1[4];
+        MANTA_ALIGN(16) float ra2[4];
+        _mm_store_ps(ra1, a1);
+        _mm_store_ps(ra2, a2);
+          
         if(_mm_movemask_ps(hit) == 15){
-          _mm_store_ps(&data->minT[i], t);
-#ifdef __x86_64
-          _mm_store_si128((__m128i*)&data->hitMatl[i], _mm_set1_epi64x((long 
long)getMaterial()));
-          _mm_store_si128((__m128i*)&data->hitMatl[i+2], 
_mm_set1_epi64x((long long)getMaterial()));
-          _mm_store_si128((__m128i*)&data->hitPrim[i], _mm_set1_epi64x((long 
long)this));
-          _mm_store_si128((__m128i*)&data->hitPrim[i+2], 
_mm_set1_epi64x((long long)this));
-          _mm_store_si128((__m128i*)&data->hitTex[i], _mm_set1_epi64x((long 
long)getTexCoordMapper()));
-          _mm_store_si128((__m128i*)&data->hitTex[i+2], 
_mm_set1_epi64x((long long)getTexCoordMapper()));
-#else
-          _mm_store_si128((__m128i*)&data->hitMatl[i], 
_mm_set1_epi32((int)getMaterial()));
-          _mm_store_si128((__m128i*)&data->hitPrim[i], 
_mm_set1_epi32((int)this));
-          _mm_store_si128((__m128i*)&data->hitTex[i], 
_mm_set1_epi32((int)getTexCoordMapper()));
-#endif
+          rays.scratchpad<Vector>(i+0) = Vector(ra1[0], ra2[0], 0);
+          rays.scratchpad<Vector>(i+1) = Vector(ra1[1], ra2[1], 0);
+          rays.scratchpad<Vector>(i+2) = Vector(ra1[2], ra2[2], 0);
+          rays.scratchpad<Vector>(i+3) = Vector(ra1[3], ra2[3], 0);
         } else {
-          _mm_maskmoveu_si128((__m128i)_mm_castps_si128(t), 
(__m128i)_mm_castps_si128(hit), (char*)&data->minT[i]);
-#ifdef __x86_64
-         __m128i lohit = (__m128i)_mm_castps_si128(_mm_unpacklo_ps(hit, 
hit));
-         __m128i hihit = (__m128i)_mm_castps_si128(_mm_unpackhi_ps(hit, 
hit));
-         _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
lohit, (char*)&data->hitMatl[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long long)getMaterial()), 
hihit, (char*)&data->hitMatl[i+2]);
-
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), lohit, 
(char*)&data->hitPrim[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long long)this), hihit, 
(char*)&data->hitPrim[i+2]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), lohit, (char*)&data->hitTex[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi64x((long 
long)getTexCoordMapper()), hihit, (char*)&data->hitTex[i+2]);
-#else
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getMaterial()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitMatl[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)this), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitPrim[i]);
-          _mm_maskmoveu_si128(_mm_set1_epi32((int)getTexCoordMapper()), 
(__m128i)_mm_castps_si128(hit), (char*)&data->hitTex[i]);
-#endif
+          int hit_mask = _mm_movemask_ps(hit);
+          for(int j = 0; j < 4; ++j) {
+            if (hit_mask & (1 << j))
+              rays.scratchpad<Vector>(i+j) = Vector(ra1[j], ra2[j], 0);
+          }
+        }
+#else
+        if (_mm_movemask_ps(hit) == 15) {
+          _mm_store_ps(&data->texCoords[0][i], a1);
+          _mm_store_ps(&data->texCoords[1][i], a2);
+          _mm_store_ps(&data->texCoords[2][i], _mm_setzero_ps());
+        } else {
+          maskedStore_ps(hit, (float*)&data->texCoords[0][i], a1);
+          maskedStore_ps(hit, (float*)&data->texCoords[1][i], a2);
+          maskedStore_ps(hit, (float*)&data->texCoords[2][i], 
_mm_setzero_ps());
         }
+#endif
       }
       for(;i<rays.rayEnd;i++){
         Vector dir = rays.getDirection(i);
@@ -350,7 +392,11 @@
           continue;
         
         if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
+#if USE_SCRATCHPAD
           rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
       }
     }
 #else
@@ -373,26 +419,34 @@
         continue;
 
       if(rays.hit(i, t, getMaterial(), this, getTexCoordMapper()))
-        rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#if USE_SCRATCHPAD
+          rays.scratchpad<Vector>(i) = Vector(a1, a2, 0);
+#else
+          rays.setTexCoords(i, Vector(a1, a2, 0));
+#endif
     }
 #endif
   }
 }
 
 void Parallelogram::computeTexCoords2(const RenderContext&,
-                                     RayPacket& rays) const
+                                      RayPacket& rays) const
 {
+#if USE_SCRATCHPAD
   for(int i=rays.begin();i<rays.end();i++){
     rays.setTexCoords(i, rays.scratchpad<Vector>(i));
   }
+#endif
   rays.setFlag(RayPacket::HaveTexture2|RayPacket::HaveTexture3);
 }
 
 void Parallelogram::computeTexCoords3(const RenderContext&,
-                                     RayPacket& rays) const
+                                      RayPacket& rays) const
 {
+#if USE_SCRATCHPAD
   for(int i=rays.begin();i<rays.end();i++){
     rays.setTexCoords(i, rays.scratchpad<Vector>(i));
   }
+#endif
   rays.setFlag(RayPacket::HaveTexture2|RayPacket::HaveTexture3);
 }




  • [MANTA] r1238 - trunk/Model/Primitives, bigler, 11/27/2006

Archive powered by MHonArc 2.6.16.

Top of page