1 files changed, 136 insertions, 74 deletions
diff --git a/libcilkrts/runtime/os-unix.c b/libcilkrts/runtime/os-unix.c
index cb582dd7591..d339daf7026 100644
--- a/libcilkrts/runtime/os-unix.c
+++ b/libcilkrts/runtime/os-unix.c
@@ -2,11 +2,9 @@
  *
  *************************************************************************
  *
- *  @copyright
- *  Copyright (C) 2009-2013, Intel Corporation
+ *  Copyright (C) 2009-2016, Intel Corporation
  *  All rights reserved.
  *  
- *  @copyright
  *  Redistribution and use in source and binary forms, with or without
  *  modification, are permitted provided that the following conditions
  *  are met:
@@ -21,7 +19,6 @@
  *      contributors may be used to endorse or promote products derived
  *      from this software without specific prior written permission.
  *  
- *  @copyright
  *  THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
  *  "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
  *  LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
@@ -34,15 +31,22 @@
  *  LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY
  *  WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  *  POSSIBILITY OF SUCH DAMAGE.
+ *  
+ *  *********************************************************************
+ *  
+ *  PLEASE NOTE: This file is a downstream copy of a file mainitained in
+ *  a repository at cilkplus.org. Changes made to this file that are not
+ *  submitted through the contribution process detailed at
+ *  http://www.cilkplus.org/submit-cilk-contribution will be lost the next
+ *  time that a new version is released. Changes only submitted to the
+ *  GNU compiler collection or posted to the git repository at
+ *  https://bitbucket.org/intelcilkruntime/intel-cilk-runtime.git are
+ *  not tracked.
+ *  
+ *  We welcome your contributions to this open source project. Thank you
+ *  for your assistance in helping us improve Cilk Plus.
  **************************************************************************/
 
-#ifdef __linux__
-    // define _GNU_SOURCE before *any* #include.
-    // Even <stdint.h> will break later #includes if this macro is not
-    // already defined when it is #included.
-#   define _GNU_SOURCE
-#endif
-
 #include "os.h"
 #include "bug.h"
 #include "cilk_malloc.h"
@@ -51,22 +55,27 @@
 #if defined __linux__
 #   include <sys/sysinfo.h>
 #   include <sys/syscall.h>
+
 #elif defined __APPLE__
 #   include <sys/sysctl.h>
     // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
-#elif defined  __DragonFly__
-// No additional include files
-#elif defined  __FreeBSD__
-// No additional include files
-#elif defined __CYGWIN__
-// Cygwin on Windows - no additional include files
+
 #elif defined  __VXWORKS__
 #   include <vxWorks.h>   
 #   include <vxCpuLib.h>   
-#   include <taskLib.h>   
+#   include <taskLib.h>
+   
 // Solaris
 #elif defined __sun__ && defined __svr4__
 #   include <sched.h>
+
+// OSes we know about which don't require any additional files
+#elif defined __CYGWIN__ || \
+      defined __DragonFly__ || \
+      defined __FreeBSD__ || \
+      defined __GNU__
+// No additional include files
+
 #else
 #   error "Unsupported OS"
 #endif
@@ -311,39 +320,67 @@ static pid_t linux_gettid(void)
  * mask is set by the offload library to force the offload code away from
  * cores that have offload support threads running on them.
  */
-static int linux_get_affinity_count (int tid) 
+static int linux_get_affinity_count ()
 {
-#if !defined HAVE_PTHREAD_AFFINITY_NP
-  return 0;
+    long system_cores = sysconf(_SC_NPROCESSORS_ONLN);
+    int affinity_cores = 0;
+
+#if defined HAVE_PTHREAD_AFFINITY_NP
+
+#if defined (CPU_ALLOC_SIZE) && ! defined(DONT_USE_CPU_ALLOC_SIZE)
+    // Statically allocated cpu_set_t's max out at 1024 cores.  If
+    // CPU_ALLOC_SIZE is available, use it to support large numbers of cores
+    size_t cpusetsize = CPU_ALLOC_SIZE(system_cores);
+    cpu_set_t *process_mask = (cpu_set_t *)__cilkrts_malloc(cpusetsize);
+
+    // Get the affinity mask for this thread
+    int err = pthread_getaffinity_np(pthread_self(),
+                                     cpusetsize,
+                                     process_mask);
+
+    // Count the available cores.
+    if (0 == err)
+        affinity_cores = CPU_COUNT_S(cpusetsize, process_mask);
+
+    __cilkrts_free(process_mask);
+
 #else
+    // CPU_ALLOC_SIZE isn't available, or this is the Intel compiler build
+    // and we have to support RHEL5.  Use a statically allocated cpu_set_t
 
     cpu_set_t process_mask;
 
     // Extract the thread affinity mask
-    int err = sched_getaffinity (tid, sizeof(process_mask),&process_mask);
+    int err = pthread_getaffinity_np(pthread_self(),
+                                     sizeof(process_mask),
+                                     &process_mask);
 
-    if (0 != err)
+    if (0 == err)
     {
-        return 0;
-    }
-
-    // We have extracted the mask OK, so now we can count the number of threads
-    // in it.  This is linear in the maximum number of CPUs available, We
-    // could do a logarithmic version, if we assume the format of the mask,
-    // but it's not really worth it. We only call this at thread startup
-    // anyway.
-    int available_procs = 0;
-    int i;
-    for (i = 0; i < CPU_SETSIZE; i++)
-    {
-        if (CPU_ISSET(i, &process_mask))
+        // We have extracted the mask OK, so now we can count the number of
+        // threads in it.  This is linear in the maximum number of CPUs
+        // available, We could do a logarithmic version, if we assume the
+        // format of the mask, but it's not really worth it. We only call
+        // this at thread startup anyway.
+        int i;
+        for (i = 0; i < CPU_SETSIZE; i++)
         {
-            available_procs++;
+            if (CPU_ISSET(i, &process_mask))
+            {
+                affinity_cores++;
+            }
         }
     }
-
-    return available_procs;
-#endif
+#endif  // CPU_ALLOC_SIZE
+#endif  //  ! defined HAVE_PTHREAD_AFFINITY_NP
+
+    // If we've got a count of cores this thread is supposed to use, that's
+    // the number or cores we'll use.  Otherwise, default to the number of
+    // cores on the system.
+    if (0 == affinity_cores)
+        return system_cores;
+    else
+        return affinity_cores;
 }
 #endif  //  defined (__linux__) && ! defined(__ANDROID__)
 
@@ -356,43 +393,56 @@ static int linux_get_affinity_count (int tid)
 
 COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
 {
-#if defined __ANDROID__ || (defined(__sun__) && defined(__svr4__))
-    return sysconf (_SC_NPROCESSORS_ONLN);
+#if defined __ANDROID__  || \
+    defined __CYGWIN__   || \
+    defined __DragonFly__  || \
+    defined __FreeBSD__  || \
+    (defined(__sun__) && defined(__svr4__))
+    return (int)sysconf(_SC_NPROCESSORS_ONLN);
 #elif defined __MIC__
     /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
     /// on KNC.  Also, ignore the last core.
-    int P = sysconf (_SC_NPROCESSORS_ONLN);
-    return P/2 - 2;
+    int count = (int)sysconf (_SC_NPROCESSORS_ONLN);
+    return count/2 - 2;
 #elif defined __linux__
-    int affinity_count = linux_get_affinity_count(linux_gettid());
-
-    return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
+    return linux_get_affinity_count();
 #elif defined __APPLE__
-    int count = 0;
-    int cmd[2] = { CTL_HW, HW_NCPU };
+    int count;
     size_t len = sizeof count;
-    int status = sysctl(cmd, 2, &count, &len, 0, 0);
-    assert(status >= 0);
-    assert((unsigned)count == count);
+    int status = sysctlbyname("hw.logicalcpu", &count, &len, 0, 0);
+    assert(0 == status);
 
     return count;
-#elif defined  __FreeBSD__ || defined __CYGWIN__ || defined __DragonFly__
-    int ncores = sysconf(_SC_NPROCESSORS_ONLN);
-
-    return ncores;
-    // Just get the number of processors
-//    return sysconf(_SC_NPROCESSORS_ONLN);
 #elif defined  __VXWORKS__
-    return __builtin_popcount( vxCpuEnabledGet() );
+    return __builtin_popcount(vxCpuEnabledGet());
 #else
-#error "Unknown architecture"
+#error "Unsupported architecture"
+#endif
+}
+
+COMMON_SYSDEP void __cilkrts_idle(void)
+{
+    // This is another version of __cilkrts_yield() to be used when
+    // silencing workers that are not stealing work.
+#if defined(__ANDROID__)  || \
+    defined(__FreeBSD__)  || \
+    defined(__VXWORKS__)  || \
+    (defined(__sun__) && defined(__svr4__))
+    sched_yield();
+#elif defined(__MIC__)
+    _mm_delay_32(1024);
+#elif defined(__linux__) || \
+      defined(__APPLE__)
+    usleep(10000);
+#else
+# error "Unsupported architecture"
 #endif
 }
 
 COMMON_SYSDEP void __cilkrts_sleep(void)
 {
 #ifdef __VXWORKS__
-	taskDelay(1);
+    taskDelay(1);
 #else			
     usleep(1);
 #endif	
@@ -400,13 +450,14 @@ COMMON_SYSDEP void __cilkrts_sleep(void)
 
 COMMON_SYSDEP void __cilkrts_yield(void)
 {
-#if __APPLE__ || __FreeBSD__ || __VXWORKS__
-    // On MacOS, call sched_yield to yield quantum.  I'm not sure why we
+#if defined(__ANDROID__)  || \
+    defined(__APPLE__)    || \
+    defined(__FreeBSD__)  || \
+    defined(__VXWORKS__)  || \
+    (defined(__sun__) && defined(__svr4__))
+    // Call sched_yield to yield quantum.  I'm not sure why we
     // don't do this on Linux also.
     sched_yield();
-#elif defined(__DragonFly__)
-    // On DragonFly BSD, call sched_yield to yield quantum.
-    sched_yield();
 #elif defined(__MIC__)
     // On MIC, pthread_yield() really trashes things.  Arch's measurements
     // showed that calling _mm_delay_32() (or doing nothing) was a better
@@ -414,14 +465,12 @@ COMMON_SYSDEP void __cilkrts_yield(void)
     // giving up the processor and latency starting up when work becomes
     // available
     _mm_delay_32(1024);
-#elif defined(__ANDROID__) || (defined(__sun__) && defined(__svr4__))
-    // On Android and Solaris, call sched_yield to yield quantum.  I'm not
-    // sure why we don't do this on Linux also.
-    sched_yield();
-#else
+#elif defined(__linux__)
     // On Linux, call pthread_yield (which in turn will call sched_yield)
     // to yield quantum.
     pthread_yield();
+#else
+# error "Unsupported architecture"
 #endif
 }
 
@@ -434,11 +483,10 @@ COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
     const char* envstr = getenv(varname);
     if (envstr)
     {
-        size_t len = strlen(envstr);
+        size_t len = cilk_strlen(envstr);
         if (len > vallen - 1)
             return len + 1;
-
-        strcpy(value, envstr);
+        cilk_strcpy_s(value, vallen, envstr);
         return len;
     }
     else
@@ -479,11 +527,25 @@ COMMON_SYSDEP void cilkos_warning(const char *fmt, ...)
     fflush(stderr);
 }
 
+#ifdef __VXWORKS__
+#ifdef _WRS_KERNEL
+void cilkStart()
+{
+    __cilkrts_init_tls_variables();
+}
+#else
+_WRS_CONSTRUCTOR(cilkInit, 100)
+{
+    __cilkrts_init_tls_variables();
+}
+#endif
+#else
 static void __attribute__((constructor)) init_once()
 {
     /*__cilkrts_debugger_notification_internal(CILK_DB_RUNTIME_LOADED);*/
     __cilkrts_init_tls_variables();
 }
+#endif
 
 
 #define PAGE 4096