aboutsummaryrefslogtreecommitdiff
path: root/libcilkrts/runtime/os-unix.c
diff options
context:
space:
mode:
Diffstat (limited to 'libcilkrts/runtime/os-unix.c')
-rw-r--r--libcilkrts/runtime/os-unix.c210
1 files changed, 136 insertions, 74 deletions
diff --git a/libcilkrts/runtime/os-unix.c b/libcilkrts/runtime/os-unix.c
index cb582dd7591..d339daf7026 100644
--- a/libcilkrts/runtime/os-unix.c
+++ b/libcilkrts/runtime/os-unix.c
@@ -2,11 +2,9 @@
*
*************************************************************************
*
- * @copyright
- * Copyright (C) 2009-2013, Intel Corporation
+ * Copyright (C) 2009-2016, Intel Corporation
* All rights reserved.
*
- * @copyright
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -21,7 +19,6 @@
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
- * @copyright
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -34,15 +31,22 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
* WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
* POSSIBILITY OF SUCH DAMAGE.
+ *
+ * *********************************************************************
+ *
+ * PLEASE NOTE: This file is a downstream copy of a file mainitained in
+ * a repository at cilkplus.org. Changes made to this file that are not
+ * submitted through the contribution process detailed at
+ * http://www.cilkplus.org/submit-cilk-contribution will be lost the next
+ * time that a new version is released. Changes only submitted to the
+ * GNU compiler collection or posted to the git repository at
+ * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
+ * not tracked.
+ *
+ * We welcome your contributions to this open source project. Thank you
+ * for your assistance in helping us improve Cilk Plus.
**************************************************************************/
-#ifdef __linux__
- // define _GNU_SOURCE before *any* #include.
- // Even <stdint.h> will break later #includes if this macro is not
- // already defined when it is #included.
-# define _GNU_SOURCE
-#endif
-
#include "os.h"
#include "bug.h"
#include "cilk_malloc.h"
@@ -51,22 +55,27 @@
#if defined __linux__
# include <sys/sysinfo.h>
# include <sys/syscall.h>
+
#elif defined __APPLE__
# include <sys/sysctl.h>
// Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
-#elif defined __DragonFly__
-// No additional include files
-#elif defined __FreeBSD__
-// No additional include files
-#elif defined __CYGWIN__
-// Cygwin on Windows - no additional include files
+
#elif defined __VXWORKS__
# include <vxWorks.h>
# include <vxCpuLib.h>
-# include <taskLib.h>
+# include <taskLib.h>
+
// Solaris
#elif defined __sun__ && defined __svr4__
# include <sched.h>
+
+// OSes we know about which don't require any additional files
+#elif defined __CYGWIN__ || \
+ defined __DragonFly__ || \
+ defined __FreeBSD__ || \
+ defined __GNU__
+// No additional include files
+
#else
# error "Unsupported OS"
#endif
@@ -311,39 +320,67 @@ static pid_t linux_gettid(void)
* mask is set by the offload library to force the offload code away from
* cores that have offload support threads running on them.
*/
-static int linux_get_affinity_count (int tid)
+static int linux_get_affinity_count ()
{
-#if !defined HAVE_PTHREAD_AFFINITY_NP
- return 0;
+ long system_cores = sysconf(_SC_NPROCESSORS_ONLN);
+ int affinity_cores = 0;
+
+#if defined HAVE_PTHREAD_AFFINITY_NP
+
+#if defined (CPU_ALLOC_SIZE) && ! defined(DONT_USE_CPU_ALLOC_SIZE)
+ // Statically allocated cpu_set_t's max out at 1024 cores. If
+ // CPU_ALLOC_SIZE is available, use it to support large numbers of cores
+ size_t cpusetsize = CPU_ALLOC_SIZE(system_cores);
+ cpu_set_t *process_mask = (cpu_set_t *)__cilkrts_malloc(cpusetsize);
+
+ // Get the affinity mask for this thread
+ int err = pthread_getaffinity_np(pthread_self(),
+ cpusetsize,
+ process_mask);
+
+ // Count the available cores.
+ if (0 == err)
+ affinity_cores = CPU_COUNT_S(cpusetsize, process_mask);
+
+ __cilkrts_free(process_mask);
+
#else
+ // CPU_ALLOC_SIZE isn't available, or this is the Intel compiler build
+ // and we have to support RHEL5. Use a statically allocated cpu_set_t
cpu_set_t process_mask;
// Extract the thread affinity mask
- int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask);
+ int err = pthread_getaffinity_np(pthread_self(),
+ sizeof(process_mask),
+ &process_mask);
- if (0 != err)
+ if (0 == err)
{
- return 0;
- }
-
- // We have extracted the mask OK, so now we can count the number of threads
- // in it. This is linear in the maximum number of CPUs available, We
- // could do a logarithmic version, if we assume the format of the mask,
- // but it's not really worth it. We only call this at thread startup
- // anyway.
- int available_procs = 0;
- int i;
- for (i = 0; i < CPU_SETSIZE; i++)
- {
- if (CPU_ISSET(i, &process_mask))
+ // We have extracted the mask OK, so now we can count the number of
+ // threads in it. This is linear in the maximum number of CPUs
+ // available, We could do a logarithmic version, if we assume the
+ // format of the mask, but it's not really worth it. We only call
+ // this at thread startup anyway.
+ int i;
+ for (i = 0; i < CPU_SETSIZE; i++)
{
- available_procs++;
+ if (CPU_ISSET(i, &process_mask))
+ {
+ affinity_cores++;
+ }
}
}
-
- return available_procs;
-#endif
+#endif // CPU_ALLOC_SIZE
+#endif // ! defined HAVE_PTHREAD_AFFINITY_NP
+
+ // If we've got a count of cores this thread is supposed to use, that's
+ // the number or cores we'll use. Otherwise, default to the number of
+ // cores on the system.
+ if (0 == affinity_cores)
+ return system_cores;
+ else
+ return affinity_cores;
}
#endif // defined (__linux__) && ! defined(__ANDROID__)
@@ -356,43 +393,56 @@ static int linux_get_affinity_count (int tid)
COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
{
-#if defined __ANDROID__ || (defined(__sun__) && defined(__svr4__))
- return sysconf (_SC_NPROCESSORS_ONLN);
+#if defined __ANDROID__ || \
+ defined __CYGWIN__ || \
+ defined __DragonFly__ || \
+ defined __FreeBSD__ || \
+ (defined(__sun__) && defined(__svr4__))
+ return (int)sysconf(_SC_NPROCESSORS_ONLN);
#elif defined __MIC__
/// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
/// on KNC. Also, ignore the last core.
- int P = sysconf (_SC_NPROCESSORS_ONLN);
- return P/2 - 2;
+ int count = (int)sysconf (_SC_NPROCESSORS_ONLN);
+ return count/2 - 2;
#elif defined __linux__
- int affinity_count = linux_get_affinity_count(linux_gettid());
-
- return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
+ return linux_get_affinity_count();
#elif defined __APPLE__
- int count = 0;
- int cmd[2] = { CTL_HW, HW_NCPU };
+ int count;
size_t len = sizeof count;
- int status = sysctl(cmd, 2, &count, &len, 0, 0);
- assert(status >= 0);
- assert((unsigned)count == count);
+ int status = sysctlbyname("hw.logicalcpu", &count, &len, 0, 0);
+ assert(0 == status);
return count;
-#elif defined __FreeBSD__ || defined __CYGWIN__ || defined __DragonFly__
- int ncores = sysconf(_SC_NPROCESSORS_ONLN);
-
- return ncores;
- // Just get the number of processors
-// return sysconf(_SC_NPROCESSORS_ONLN);
#elif defined __VXWORKS__
- return __builtin_popcount( vxCpuEnabledGet() );
+ return __builtin_popcount(vxCpuEnabledGet());
#else
-#error "Unknown architecture"
+#error "Unsupported architecture"
+#endif
+}
+
+COMMON_SYSDEP void __cilkrts_idle(void)
+{
+ // This is another version of __cilkrts_yield() to be used when
+ // silencing workers that are not stealing work.
+#if defined(__ANDROID__) || \
+ defined(__FreeBSD__) || \
+ defined(__VXWORKS__) || \
+ (defined(__sun__) && defined(__svr4__))
+ sched_yield();
+#elif defined(__MIC__)
+ _mm_delay_32(1024);
+#elif defined(__linux__) || \
+ defined(__APPLE__)
+ usleep(10000);
+#else
+# error "Unsupported architecture"
#endif
}
COMMON_SYSDEP void __cilkrts_sleep(void)
{
#ifdef __VXWORKS__
- taskDelay(1);
+ taskDelay(1);
#else
usleep(1);
#endif
@@ -400,13 +450,14 @@ COMMON_SYSDEP void __cilkrts_sleep(void)
COMMON_SYSDEP void __cilkrts_yield(void)
{
-#if __APPLE__ || __FreeBSD__ || __VXWORKS__
- // On MacOS, call sched_yield to yield quantum. I'm not sure why we
+#if defined(__ANDROID__) || \
+ defined(__APPLE__) || \
+ defined(__FreeBSD__) || \
+ defined(__VXWORKS__) || \
+ (defined(__sun__) && defined(__svr4__))
+ // Call sched_yield to yield quantum. I'm not sure why we
// don't do this on Linux also.
sched_yield();
-#elif defined(__DragonFly__)
- // On DragonFly BSD, call sched_yield to yield quantum.
- sched_yield();
#elif defined(__MIC__)
// On MIC, pthread_yield() really trashes things. Arch's measurements
// showed that calling _mm_delay_32() (or doing nothing) was a better
@@ -414,14 +465,12 @@ COMMON_SYSDEP void __cilkrts_yield(void)
// giving up the processor and latency starting up when work becomes
// available
_mm_delay_32(1024);
-#elif defined(__ANDROID__) || (defined(__sun__) && defined(__svr4__))
- // On Android and Solaris, call sched_yield to yield quantum. I'm not
- // sure why we don't do this on Linux also.
- sched_yield();
-#else
+#elif defined(__linux__)
// On Linux, call pthread_yield (which in turn will call sched_yield)
// to yield quantum.
pthread_yield();
+#else
+# error "Unsupported architecture"
#endif
}
@@ -434,11 +483,10 @@ COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
const char* envstr = getenv(varname);
if (envstr)
{
- size_t len = strlen(envstr);
+ size_t len = cilk_strlen(envstr);
if (len > vallen - 1)
return len + 1;
-
- strcpy(value, envstr);
+ cilk_strcpy_s(value, vallen, envstr);
return len;
}
else
@@ -479,11 +527,25 @@ COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
fflush(stderr);
}
+#ifdef __VXWORKS__
+#ifdef _WRS_KERNEL
+void cilkStart()
+{
+ __cilkrts_init_tls_variables();
+}
+#else
+_WRS_CONSTRUCTOR(cilkInit, 100)
+{
+ __cilkrts_init_tls_variables();
+}
+#endif
+#else
static void __attribute__((constructor)) init_once()
{
/*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
__cilkrts_init_tls_variables();
}
+#endif
#define PAGE 4096