Author: boulos
Date: Mon Feb 25 19:14:04 2008
New Revision: 2123
Modified:
trunk/Core/Thread/Thread_pthreads.cc
trunk/Engine/Control/RTRT.cc
Log:
Core/Thread/Thread_pthreads.cc
Use sysctl to convert the user's requested cpu to an L2 cache group.
Engine/Control/RTRT.cc
Use Thread::migrate to allow for people to use thread affinity when
available (currently only on Apple Leopard, but we could add CMake
checks for the NPTL and then use pthread_setaffinity_np if
available).
Modified: trunk/Core/Thread/Thread_pthreads.cc
= = = = = = = = ======================================================================
--- trunk/Core/Thread/Thread_pthreads.cc (original)
+++ trunk/Core/Thread/Thread_pthreads.cc Mon Feb 25 19:14:04 2008
@@ -504,16 +504,30 @@
}
#ifdef APPLE_LEOPARD
if (cpu_ != -1) {
+ // Using the Thread Affinity API
+ // (http://developer.apple.com/releasenotes/Performance/RN-AffinityAPI/index.html )
+ // we can tell the scheduler to try to put us on a shared L2
+ // cache. To convert the user's cpu_ number to an affinity tag, we
+ // first determine how many cores share an L2 cache and how many
+ // total cores there are. This information is gathered using
+ // sysctl.
thread_affinity_policy ap;
- // NOTE(boulos): To allow people to specify processors using
- // 0-based indexing, we don't want them to end up with 0 meaning
- // no affinity. OS X also can tell you how many "logical
- // processors" share an L2 cache (which is all that the affinity
- // scheduler priority "respects" anyway) so we might be able to
- // just convert the cpu_ field here into an affinity tag. This
- // would allow a linux implementation that had NPTL to use
- // pthread_setaffinity_np directly on the actual core desired.
- ap.affinity_tag = cpu_ + 1;
+ // hw.cacheconfig[0] is how many total logical processors there are (cores)
+ // hw.cacheconfig[1] is how many share an L1 (1 for now...)
+ // hw.cacheconfig[2] is how many share an L2 (2 on Clovertown)
+ int64_t cache_config[80];
+ size_t len = sizeof(cache_config);
+ if (sysctlbyname("hw.cacheconfig", cache_config, &len, NULL, 0) != 0) {
+ throw ThreadError("Unable to read hw.cacheconfig");
+ }
+ unsigned int num_cores = static_cast<unsigned int>(cache_config[0]);
+ unsigned int num_per_L2 = static_cast<unsigned int>(cache_config[2]);
+ unsigned int num_groups = num_cores / num_per_L2;
+ unsigned int cpu_group = cpu_ / num_per_L2;
+ cpu_group %= num_groups; // In case someone asks to overcommit the machine.
+
+ // The user wants this thread to be on cpu_, let's determine which
+ ap.affinity_tag = cpu_group + 1;
thread_policy_set(pthread_mach_thread_np(priv_->threadid),
THREAD_AFFINITY_POLICY,
(integer_t*)&ap,
Modified: trunk/Engine/Control/RTRT.cc
= = = = = = = = ======================================================================
--- trunk/Engine/Control/RTRT.cc (original)
+++ trunk/Engine/Control/RTRT.cc Mon Feb 25 19:14:04 2008
@@ -513,16 +513,7 @@
Thread* t = workers[i] =
new Thread(new Worker(this, i, false), name.str().c_str(),
0, Thread::NotActivated);
- // NOTE(boulos): It seems that the Mac OS X scheduler is
- // actually much worse when you try to add priority. The docs
- // claim that you should use affinity "groups" which basically
- // means mapping thread ids to L2 caches. The i < 4 code should
- // test this on 4-core processor systems. However, running
- // bin/manta -np 8 on my 8-core mac pro, only 5 threads are
- // running at peak.
-
- //t->migrate((i < 4) ? 0 : 1);
- //t->migrate(i);
+ t->migrate(i);
t->setStackSize(RENDER_THREAD_STACKSIZE);
t->activate(false);
}
@@ -807,6 +798,7 @@
workers[i] = new Thread(new Worker(this, i, true), name.str().c_str(),
0, Thread::NotActivated);
+ workers[i]->migrate(i);
// Set the stack size.
workers[i]->setStackSize(RENDER_THREAD_STACKSIZE);
Archive powered by MHonArc 2.6.16.