diff options
Diffstat (limited to 'libcilkrts/runtime/os-unix.c')
-rw-r--r-- | libcilkrts/runtime/os-unix.c | 210 |
1 files changed, 136 insertions, 74 deletions
diff --git a/libcilkrts/runtime/os-unix.c b/libcilkrts/runtime/os-unix.c index cb582dd7591..d339daf7026 100644 --- a/libcilkrts/runtime/os-unix.c +++ b/libcilkrts/runtime/os-unix.c @@ -2,11 +2,9 @@ * ************************************************************************* * - * @copyright - * Copyright (C) 2009-2013, Intel Corporation + * Copyright (C) 2009-2016, Intel Corporation * All rights reserved. * - * @copyright * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -21,7 +19,6 @@ * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * - * @copyright * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR @@ -34,15 +31,22 @@ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY * WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE * POSSIBILITY OF SUCH DAMAGE. + * + * ********************************************************************* + * + * PLEASE NOTE: This file is a downstream copy of a file mainitained in + * a repository at cilkplus.org. Changes made to this file that are not + * submitted through the contribution process detailed at + * http://www.cilkplus.org/submit-cilk-contribution will be lost the next + * time that a new version is released. Changes only submitted to the + * GNU compiler collection or posted to the git repository at + * https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are + * not tracked. + * + * We welcome your contributions to this open source project. Thank you + * for your assistance in helping us improve Cilk Plus. **************************************************************************/ -#ifdef __linux__ - // define _GNU_SOURCE before *any* #include. - // Even <stdint.h> will break later #includes if this macro is not - // already defined when it is #included. -# define _GNU_SOURCE -#endif - #include "os.h" #include "bug.h" #include "cilk_malloc.h" @@ -51,22 +55,27 @@ #if defined __linux__ # include <sys/sysinfo.h> # include <sys/syscall.h> + #elif defined __APPLE__ # include <sys/sysctl.h> // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output -#elif defined __DragonFly__ -// No additional include files -#elif defined __FreeBSD__ -// No additional include files -#elif defined __CYGWIN__ -// Cygwin on Windows - no additional include files + #elif defined __VXWORKS__ # include <vxWorks.h> # include <vxCpuLib.h> -# include <taskLib.h> +# include <taskLib.h> + // Solaris #elif defined __sun__ && defined __svr4__ # include <sched.h> + +// OSes we know about which don't require any additional files +#elif defined __CYGWIN__ || \ + defined __DragonFly__ || \ + defined __FreeBSD__ || \ + defined __GNU__ +// No additional include files + #else # error "Unsupported OS" #endif @@ -311,39 +320,67 @@ static pid_t linux_gettid(void) * mask is set by the offload library to force the offload code away from * cores that have offload support threads running on them. */ -static int linux_get_affinity_count (int tid) +static int linux_get_affinity_count () { -#if !defined HAVE_PTHREAD_AFFINITY_NP - return 0; + long system_cores = sysconf(_SC_NPROCESSORS_ONLN); + int affinity_cores = 0; + +#if defined HAVE_PTHREAD_AFFINITY_NP + +#if defined (CPU_ALLOC_SIZE) && ! defined(DONT_USE_CPU_ALLOC_SIZE) + // Statically allocated cpu_set_t's max out at 1024 cores. If + // CPU_ALLOC_SIZE is available, use it to support large numbers of cores + size_t cpusetsize = CPU_ALLOC_SIZE(system_cores); + cpu_set_t *process_mask = (cpu_set_t *)__cilkrts_malloc(cpusetsize); + + // Get the affinity mask for this thread + int err = pthread_getaffinity_np(pthread_self(), + cpusetsize, + process_mask); + + // Count the available cores. + if (0 == err) + affinity_cores = CPU_COUNT_S(cpusetsize, process_mask); + + __cilkrts_free(process_mask); + #else + // CPU_ALLOC_SIZE isn't available, or this is the Intel compiler build + // and we have to support RHEL5. Use a statically allocated cpu_set_t cpu_set_t process_mask; // Extract the thread affinity mask - int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask); + int err = pthread_getaffinity_np(pthread_self(), + sizeof(process_mask), + &process_mask); - if (0 != err) + if (0 == err) { - return 0; - } - - // We have extracted the mask OK, so now we can count the number of threads - // in it. This is linear in the maximum number of CPUs available, We - // could do a logarithmic version, if we assume the format of the mask, - // but it's not really worth it. We only call this at thread startup - // anyway. - int available_procs = 0; - int i; - for (i = 0; i < CPU_SETSIZE; i++) - { - if (CPU_ISSET(i, &process_mask)) + // We have extracted the mask OK, so now we can count the number of + // threads in it. This is linear in the maximum number of CPUs + // available, We could do a logarithmic version, if we assume the + // format of the mask, but it's not really worth it. We only call + // this at thread startup anyway. + int i; + for (i = 0; i < CPU_SETSIZE; i++) { - available_procs++; + if (CPU_ISSET(i, &process_mask)) + { + affinity_cores++; + } } } - - return available_procs; -#endif +#endif // CPU_ALLOC_SIZE +#endif // ! defined HAVE_PTHREAD_AFFINITY_NP + + // If we've got a count of cores this thread is supposed to use, that's + // the number or cores we'll use. Otherwise, default to the number of + // cores on the system. + if (0 == affinity_cores) + return system_cores; + else + return affinity_cores; } #endif // defined (__linux__) && ! defined(__ANDROID__) @@ -356,43 +393,56 @@ static int linux_get_affinity_count (int tid) COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void) { -#if defined __ANDROID__ || (defined(__sun__) && defined(__svr4__)) - return sysconf (_SC_NPROCESSORS_ONLN); +#if defined __ANDROID__ || \ + defined __CYGWIN__ || \ + defined __DragonFly__ || \ + defined __FreeBSD__ || \ + (defined(__sun__) && defined(__svr4__)) + return (int)sysconf(_SC_NPROCESSORS_ONLN); #elif defined __MIC__ /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial /// on KNC. Also, ignore the last core. - int P = sysconf (_SC_NPROCESSORS_ONLN); - return P/2 - 2; + int count = (int)sysconf (_SC_NPROCESSORS_ONLN); + return count/2 - 2; #elif defined __linux__ - int affinity_count = linux_get_affinity_count(linux_gettid()); - - return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN); + return linux_get_affinity_count(); #elif defined __APPLE__ - int count = 0; - int cmd[2] = { CTL_HW, HW_NCPU }; + int count; size_t len = sizeof count; - int status = sysctl(cmd, 2, &count, &len, 0, 0); - assert(status >= 0); - assert((unsigned)count == count); + int status = sysctlbyname("hw.logicalcpu", &count, &len, 0, 0); + assert(0 == status); return count; -#elif defined __FreeBSD__ || defined __CYGWIN__ || defined __DragonFly__ - int ncores = sysconf(_SC_NPROCESSORS_ONLN); - - return ncores; - // Just get the number of processors -// return sysconf(_SC_NPROCESSORS_ONLN); #elif defined __VXWORKS__ - return __builtin_popcount( vxCpuEnabledGet() ); + return __builtin_popcount(vxCpuEnabledGet()); #else -#error "Unknown architecture" +#error "Unsupported architecture" +#endif +} + +COMMON_SYSDEP void __cilkrts_idle(void) +{ + // This is another version of __cilkrts_yield() to be used when + // silencing workers that are not stealing work. +#if defined(__ANDROID__) || \ + defined(__FreeBSD__) || \ + defined(__VXWORKS__) || \ + (defined(__sun__) && defined(__svr4__)) + sched_yield(); +#elif defined(__MIC__) + _mm_delay_32(1024); +#elif defined(__linux__) || \ + defined(__APPLE__) + usleep(10000); +#else +# error "Unsupported architecture" #endif } COMMON_SYSDEP void __cilkrts_sleep(void) { #ifdef __VXWORKS__ - taskDelay(1); + taskDelay(1); #else usleep(1); #endif @@ -400,13 +450,14 @@ COMMON_SYSDEP void __cilkrts_sleep(void) COMMON_SYSDEP void __cilkrts_yield(void) { -#if __APPLE__ || __FreeBSD__ || __VXWORKS__ - // On MacOS, call sched_yield to yield quantum. I'm not sure why we +#if defined(__ANDROID__) || \ + defined(__APPLE__) || \ + defined(__FreeBSD__) || \ + defined(__VXWORKS__) || \ + (defined(__sun__) && defined(__svr4__)) + // Call sched_yield to yield quantum. I'm not sure why we // don't do this on Linux also. sched_yield(); -#elif defined(__DragonFly__) - // On DragonFly BSD, call sched_yield to yield quantum. - sched_yield(); #elif defined(__MIC__) // On MIC, pthread_yield() really trashes things. Arch's measurements // showed that calling _mm_delay_32() (or doing nothing) was a better @@ -414,14 +465,12 @@ COMMON_SYSDEP void __cilkrts_yield(void) // giving up the processor and latency starting up when work becomes // available _mm_delay_32(1024); -#elif defined(__ANDROID__) || (defined(__sun__) && defined(__svr4__)) - // On Android and Solaris, call sched_yield to yield quantum. I'm not - // sure why we don't do this on Linux also. - sched_yield(); -#else +#elif defined(__linux__) // On Linux, call pthread_yield (which in turn will call sched_yield) // to yield quantum. pthread_yield(); +#else +# error "Unsupported architecture" #endif } @@ -434,11 +483,10 @@ COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen, const char* envstr = getenv(varname); if (envstr) { - size_t len = strlen(envstr); + size_t len = cilk_strlen(envstr); if (len > vallen - 1) return len + 1; - - strcpy(value, envstr); + cilk_strcpy_s(value, vallen, envstr); return len; } else @@ -479,11 +527,25 @@ COMMON_SYSDEP void cilkos_warning(const char *fmt, ...) fflush(stderr); } +#ifdef __VXWORKS__ +#ifdef _WRS_KERNEL +void cilkStart() +{ + __cilkrts_init_tls_variables(); +} +#else +_WRS_CONSTRUCTOR(cilkInit, 100) +{ + __cilkrts_init_tls_variables(); +} +#endif +#else static void __attribute__((constructor)) init_once() { /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/ __cilkrts_init_tls_variables(); } +#endif #define PAGE 4096 |