Text archives Help
- From: "Austin Robison" <arobison@rayscale.com>
- To: manta@sci.utah.edu
- Subject: [Manta] r2138 - trunk/Interface
- Date: Fri, 29 Feb 2008 16:11:15 -0700 (MST)
Author: arobison
Date: Fri Feb 29 16:11:15 2008
New Revision: 2138
Modified:
trunk/Interface/Primitive.cc
Log:
Replacing the SSE normal copy with a memcpy() version.
This is slightly faster than the SSE copy, going from
~17.0fps to ~17.2fps on one core of my laptop.
Modified: trunk/Interface/Primitive.cc
==============================================================================
--- trunk/Interface/Primitive.cc (original)
+++ trunk/Interface/Primitive.cc Fri Feb 29 16:11:15 2008
@@ -25,82 +25,11 @@
RayPacket& rays) const {
rays.computeNormals(context);
-#ifdef MANTA_SSE
- RayPacketData* data = rays.data;
- if((rays.rayBegin ^ (rays.rayEnd-1)) & ~3){
- int i = rays.rayBegin & ~3;
- // Prologue
- if(i != rays.rayBegin){
- __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
- // mask is on for active rays
- __m128 mask = _mm_castsi128_ps(_mm_cmpgt_epi32(ray_idx,
_mm_set1_epi32(rays.rayBegin-i-1)));
- _mm_store_ps(&data->geometricNormal[0][i],
- mask4(mask,
- _mm_load_ps(&data->normal[0][i]),
- _mm_load_ps(&data->geometricNormal[0][i])));
- _mm_store_ps(&data->geometricNormal[1][i],
- mask4(mask,
- _mm_load_ps(&data->normal[1][i]),
- _mm_load_ps(&data->geometricNormal[1][i])));
- _mm_store_ps(&data->geometricNormal[2][i],
- mask4(mask,
- _mm_load_ps(&data->normal[2][i]),
- _mm_load_ps(&data->geometricNormal[2][i])));
- }
- // Primary loop body
- int e = rays.rayEnd - 3;
- for(;i<e;i+=4){
- _mm_store_ps(&data->geometricNormal[0][i],
_mm_load_ps(&data->normal[0][i]));
- _mm_store_ps(&data->geometricNormal[1][i],
_mm_load_ps(&data->normal[1][i]));
- _mm_store_ps(&data->geometricNormal[2][i],
_mm_load_ps(&data->normal[2][i]));
- }
- // Epilogue
- if(i != rays.rayEnd){
- __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
- // mask is on for active rays
- __m128 mask = _mm_castsi128_ps(_mm_cmplt_epi32(ray_idx,
_mm_set1_epi32(rays.rayEnd-i)));
- _mm_store_ps(&data->geometricNormal[0][i],
- mask4(mask,
- _mm_load_ps(&data->normal[0][i]),
- _mm_load_ps(&data->geometricNormal[0][i])));
- _mm_store_ps(&data->geometricNormal[1][i],
- mask4(mask,
- _mm_load_ps(&data->normal[1][i]),
- _mm_load_ps(&data->geometricNormal[1][i])));
- _mm_store_ps(&data->geometricNormal[2][i],
- mask4(mask,
- _mm_load_ps(&data->normal[2][i]),
- _mm_load_ps(&data->geometricNormal[2][i])));
- }
- } else {
- // Single SSE vector
- int i = rays.rayBegin & ~3;
- __m128i ray_idx = _mm_set_epi32(3, 2, 1, 0);
- __m128 mask = _mm_castsi128_ps(_mm_and_si128(_mm_cmpgt_epi32(ray_idx,
_mm_set1_epi32(rays.rayBegin-i-1)), _mm_cmplt_epi32(ray_idx,
_mm_set1_epi32(rays.rayEnd-i))));
- _mm_store_ps(&data->geometricNormal[0][i],
- mask4(mask,
- _mm_load_ps(&data->normal[0][i]),
- _mm_load_ps(&data->geometricNormal[0][i])));
- _mm_store_ps(&data->geometricNormal[1][i],
- mask4(mask,
- _mm_load_ps(&data->normal[1][i]),
- _mm_load_ps(&data->geometricNormal[1][i])));
- _mm_store_ps(&data->geometricNormal[2][i],
- mask4(mask,
- _mm_load_ps(&data->normal[2][i]),
- _mm_load_ps(&data->geometricNormal[2][i])));
+ for(int i = 0; i < 3; ++i) {
+ memcpy(&rays.data->geometricNormal[i][rays.begin()],
+ &rays.data->normal[i][rays.begin()],
+ (rays.end()-rays.begin())*sizeof(Real));
}
-#elif 1
- RayPacketData* data = rays.data;
- for(int i = rays.begin(); i < rays.end(); ++i) {
- data->geometricNormal[0][i] = data->normal[0][i];
- data->geometricNormal[1][i] = data->normal[1][i];
- data->geometricNormal[2][i] = data->normal[2][i];
- }
-#else // #ifdef MANTA_SSE
- for(int i = rays.begin(); i != rays.end(); ++i)
- rays.setGeometricNormal(i, rays.getNormal(i));
-#endif
rays.setFlag( RayPacket::HaveGeometricNormals );
if( rays.getFlag( RayPacket::HaveUnitNormals ) )
- [Manta] r2138 - trunk/Interface, Austin Robison, 02/29/2008
Archive powered by MHonArc 2.6.16.