aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorBalaji V. Iyer <balaji.v.iyer@intel.com>2013-08-05 18:52:16 +0000
committerBalaji V. Iyer <balaji.v.iyer@intel.com>2013-08-05 18:52:16 +0000
commit5f967f13d141fc35ca1747e21a62fcd2804d0bbd (patch)
tree736fe5c630f50a57815fb3f14a2c85dd20c2e576
parent8ec6a963082e92fd6c843553764906ee0f162bb5 (diff)
Updated libcilkrts to revision 3520.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/cilkplus@201502 138bc75d-0d04-0410-961f-82ee72b054a4
-rw-r--r--libcilkrts/ChangeLog.cilkplus96
-rw-r--r--libcilkrts/Makefile.am42
-rw-r--r--libcilkrts/Makefile.in187
-rw-r--r--libcilkrts/include/cilk/cilk.h88
-rw-r--r--libcilkrts/include/cilk/cilk_api.h396
-rw-r--r--libcilkrts/include/cilk/cilk_api_linux.h49
-rw-r--r--libcilkrts/include/cilk/cilk_stub.h49
-rw-r--r--libcilkrts/include/cilk/cilk_undocumented.h59
-rw-r--r--libcilkrts/include/cilk/common.h197
-rw-r--r--libcilkrts/include/cilk/holder.h49
-rw-r--r--libcilkrts/include/cilk/hyperobject_base.h109
-rw-r--r--libcilkrts/include/cilk/metaprogramming.h523
-rw-r--r--libcilkrts/include/cilk/reducer.h2179
-rw-r--r--libcilkrts/include/cilk/reducer_file.h49
-rw-r--r--libcilkrts/include/cilk/reducer_list.h1443
-rw-r--r--libcilkrts/include/cilk/reducer_max.h1057
-rw-r--r--libcilkrts/include/cilk/reducer_min.h1056
-rw-r--r--libcilkrts/include/cilk/reducer_min_max.h3601
-rw-r--r--libcilkrts/include/cilk/reducer_opadd.h1058
-rw-r--r--libcilkrts/include/cilk/reducer_opand.h880
-rw-r--r--libcilkrts/include/cilk/reducer_opmul.h437
-rw-r--r--libcilkrts/include/cilk/reducer_opor.h862
-rw-r--r--libcilkrts/include/cilk/reducer_opxor.h857
-rw-r--r--libcilkrts/include/cilk/reducer_ostream.h49
-rw-r--r--libcilkrts/include/cilk/reducer_string.h1214
-rw-r--r--libcilkrts/include/cilktools/cilkscreen.h9
-rw-r--r--libcilkrts/include/cilktools/cilkview.h24
-rw-r--r--libcilkrts/include/cilktools/fake_mutex.h47
-rw-r--r--libcilkrts/include/cilktools/lock_guard.h7
-rw-r--r--libcilkrts/include/internal/abi.h128
-rw-r--r--libcilkrts/include/internal/cilk_fake.h441
-rw-r--r--libcilkrts/include/internal/cilk_version.h59
-rw-r--r--libcilkrts/include/internal/metacall.h49
-rw-r--r--libcilkrts/include/internal/rev.mk55
-rw-r--r--libcilkrts/runtime/acknowledgements.dox46
-rw-r--r--libcilkrts/runtime/bug.cpp49
-rw-r--r--libcilkrts/runtime/bug.h89
-rw-r--r--libcilkrts/runtime/c_reducers.c207
-rw-r--r--libcilkrts/runtime/cilk-abi-cilk-for.cpp63
-rw-r--r--libcilkrts/runtime/cilk-abi-vla-internal.c78
-rw-r--r--libcilkrts/runtime/cilk-abi-vla-internal.h85
-rw-r--r--libcilkrts/runtime/cilk-abi-vla.c417
-rw-r--r--libcilkrts/runtime/cilk-abi.c259
-rw-r--r--libcilkrts/runtime/cilk-ittnotify.h54
-rw-r--r--libcilkrts/runtime/cilk-tbb-interop.h49
-rw-r--r--libcilkrts/runtime/cilk_api.c49
-rw-r--r--libcilkrts/runtime/cilk_fiber-unix.cpp240
-rw-r--r--libcilkrts/runtime/cilk_fiber-unix.h144
-rw-r--r--libcilkrts/runtime/cilk_fiber.cpp1073
-rw-r--r--libcilkrts/runtime/cilk_fiber.h877
-rw-r--r--libcilkrts/runtime/cilk_malloc.c53
-rw-r--r--libcilkrts/runtime/cilk_malloc.h49
-rw-r--r--libcilkrts/runtime/component.h49
-rw-r--r--libcilkrts/runtime/doxygen-layout.xml49
-rw-r--r--libcilkrts/runtime/doxygen.cfg60
-rw-r--r--libcilkrts/runtime/except-gcc.cpp73
-rw-r--r--libcilkrts/runtime/except-gcc.h49
-rw-r--r--libcilkrts/runtime/except.h49
-rw-r--r--libcilkrts/runtime/frame_malloc.c51
-rw-r--r--libcilkrts/runtime/frame_malloc.h49
-rw-r--r--libcilkrts/runtime/full_frame.c75
-rw-r--r--libcilkrts/runtime/full_frame.h151
-rw-r--r--libcilkrts/runtime/global_state.cpp142
-rw-r--r--libcilkrts/runtime/global_state.h204
-rw-r--r--libcilkrts/runtime/jmpbuf.c49
-rw-r--r--libcilkrts/runtime/jmpbuf.h107
-rw-r--r--libcilkrts/runtime/local_state.c61
-rw-r--r--libcilkrts/runtime/local_state.h225
-rw-r--r--libcilkrts/runtime/metacall_impl.c51
-rw-r--r--libcilkrts/runtime/metacall_impl.h53
-rw-r--r--libcilkrts/runtime/os-unix.c149
-rw-r--r--libcilkrts/runtime/os.h178
-rw-r--r--libcilkrts/runtime/os_mutex-unix.c54
-rw-r--r--libcilkrts/runtime/os_mutex.h60
-rw-r--r--libcilkrts/runtime/pedigrees.c49
-rw-r--r--libcilkrts/runtime/pedigrees.h49
-rw-r--r--libcilkrts/runtime/record-replay.cpp765
-rw-r--r--libcilkrts/runtime/record-replay.h427
-rw-r--r--libcilkrts/runtime/reducer_impl.cpp268
-rw-r--r--libcilkrts/runtime/reducer_impl.h51
-rw-r--r--libcilkrts/runtime/rts-common.h75
-rw-r--r--libcilkrts/runtime/scheduler.c1564
-rw-r--r--libcilkrts/runtime/scheduler.h258
-rw-r--r--libcilkrts/runtime/signal_node.c53
-rw-r--r--libcilkrts/runtime/signal_node.h51
-rw-r--r--libcilkrts/runtime/spin_mutex.c104
-rw-r--r--libcilkrts/runtime/spin_mutex.h124
-rw-r--r--libcilkrts/runtime/stacks.c192
-rw-r--r--libcilkrts/runtime/stats.c69
-rw-r--r--libcilkrts/runtime/stats.h109
-rw-r--r--libcilkrts/runtime/symbol_test.c49
-rw-r--r--libcilkrts/runtime/sysdep-unix.c804
-rw-r--r--libcilkrts/runtime/sysdep.h325
-rw-r--r--libcilkrts/runtime/unix_symbols.t55
-rw-r--r--libcilkrts/runtime/worker_mutex.c84
-rw-r--r--libcilkrts/runtime/worker_mutex.h75
96 files changed, 20107 insertions, 8767 deletions
diff --git a/libcilkrts/ChangeLog.cilkplus b/libcilkrts/ChangeLog.cilkplus
index 5b6898f1e81..db17ce0e333 100644
--- a/libcilkrts/ChangeLog.cilkplus
+++ b/libcilkrts/ChangeLog.cilkplus
@@ -1,3 +1,99 @@
+2013-08-05 Balaji V. Iyer <balaji.v.iyer@intel.com>
+
+ * include/cilk/reducer_opmul.h: Updated to revision 3520.
+ * include/cilk/cilk_api_linux.h: Likewise.
+ * include/cilk/reducer_min.h: Likewise.
+ * include/cilk/cilk_undocumented.h: Likewise.
+ * include/cilk/reducer_max.h: Likewise.
+ * include/cilk/reducer_opadd.h: Likewise.
+ * include/cilk/hyperobject_base.h: Likewise.
+ * include/cilk/reducer_opand.h: Likewise.
+ * include/cilk/cilk_api.h: Likewise.
+ * include/cilk/cilk_api.h: Likewise.
+ * include/cilk/cilk_stub.h: Likewise.
+ * include/cilk/reducer_string.h: Likewise.
+ * include/cilk/reducer_opxor.h: Likewise.
+ * include/cilk/cilk.h: Likewise.
+ * include/cilk/reducer_ostream.h: Likewise.
+ * include/cilk/reducer_list.h: Likewise.
+ * include/cilk/reducer_opor.h: Likewise.
+ * include/cilk/reducer_file.h: Likewise.
+ * include/cilk/common.h: Likewise.
+ * include/cilktools/lock_guard.h: Likewise.
+ * include/cilktools/cilkview.h: Likewise.
+ * include/cilktools/cilkscreen.h: Likewise.
+ * include/cilktools/fake_mutex.h: Likewise.
+ * include/internal/abi.h: Likewise.
+ * include/internal/rev.mk: Likewise.
+ * include/internal/metacall.h: Likewise.
+ * include/internal/cilk_version.h: Likewise.
+ * runtime/stats.c: Likewise.
+ * runtime/local_state.h: Likewise.
+ * runtime/os-unix.c: Likewise.
+ * runtime/stats.h: Likewise.
+ * runtime/symbol_test.c: Likewise.
+ * runtime/global_state.h: Likewise.
+ * runtime/doxygen-layout.xml: Likewise.
+ * runtime/component.h: Likewise.
+ * runtime/os_mutex.h: Likewise.
+ * runtime/cilk_malloc.c: Likewise.
+ * runtime/cilk-abi.c: Likewise.
+ * runtime/cilk_api.c: Likewise.
+ * runtime/sysdep.h: Likewise.
+ * runtime/unix_symbols.t: Likewise.
+ * runtime/bug.cpp: Likewise.
+ * runtime/cilk-ittnotify.h: Likewise.
+ * runtime/cilk_malloc.h: Likewise.
+ * runtime/scheduler.c: Likewise.
+ * runtime/cilk-abi-cilk-for.cpp: Likewise.
+ * runtime/reducer_impl.h: Likewise.
+ * runtime/except-gcc.cpp: Likewise.
+ * runtime/scheduler.h: Likewise.
+ * runtime/os.h: Likewise.
+ * runtime/worker_mutex.c: Likewise.
+ * runtime/except.h: Likewise.
+ * runtime/global_state.cpp: Likewise.
+ * runtime/worker_mutex.h: Likewise.
+ * runtime/metacall_impl.c: Likewise.
+ * runtime/rts-common.h: Likewise.
+ * runtime/pedigrees.c: Likewise.
+ * runtime/metacall_impl.h: Likewise.
+ * runtime/cilk-tbb-interop.h: Likewise.
+ * runtime/reducer_impl.cpp: Likewise.
+ * runtime/full_frame.c: Likewise.
+ * runtime/pedigrees.h: Likewise.
+ * runtime/c_reducers.c: Likewise.
+ * runtime/full_frame.h: Likewise.
+ * runtime/frame_malloc.c: Likewise.
+ * runtime/bug.h: Likewise.
+ * runtime/signal_node.c: Likewise.
+ * runtime/jmpbuf.c: Likewise.
+ * runtime/os_mutex-unix.c: Likewise.
+ * runtime/frame_malloc.h: Likewise.
+ * runtime/except-gcc.h: Likewise.
+ * runtime/signal_node.h: Likewise.
+ * runtime/jmpbuf.h: Likewise.
+ * runtime/jmpbuf.h: Likewise.
+ * runtime/local_state.c: Likewise.
+ * runtime/doxygen.cfg: Likewise.
+ * Makefile.am: Likewise.
+ * include/cilk/metaprogramming.h: New file.
+ * include/cilk/metaprogramming.h: Likewise.
+ * include/cilk/reducer_opmul.h: Likewise.
+ * include/internal/cilk_fake.h: Likewise.
+ * runtime/cilk-abi-vla-internal.c: Likewise.
+ * runtime/cilk_fiber-unix.cpp: Likewise.
+ * runtime/cilk-abi-vla-internal.h: Likewise.
+ * runtime/spin_mutex.c: Likewise.
+ * runtime/spin_mutex.c: Likewise.
+ * runtime/record-replay.cpp: Likewise.
+ * runtime/cilk_fiber-unix.h: Likewise.
+ * runtime/cilk-abi-vla.c: Likewise.
+ * runtime/cilk_fiber.h: Likewise.
+ * runtime/acknowledgements.dox: Likewise.
+ * runtime/record-replay.h: Likewise.
+ * runtime/cilk_fiber.cpp: Likewise.
+
2013-03-13 Balaji V. Iyer <balaji.v.iyer@intel.com>
* runtime/sysdep-unix.c (__cilkrts_stop_workers): Inserted inline
diff --git a/libcilkrts/Makefile.am b/libcilkrts/Makefile.am
index e5d48b1ab59..cb6c563b291 100644
--- a/libcilkrts/Makefile.am
+++ b/libcilkrts/Makefile.am
@@ -36,15 +36,36 @@ AM_LDFLAGS = -lpthread -ldl
# Target list.
toolexeclib_LTLIBRARIES = libcilkrts.la
-libcilkrts_la_SOURCES = runtime/cilk-abi.c runtime/cilk_api.c \
- runtime/cilk_malloc.c runtime/c_reducers.c \
- runtime/frame_malloc.c runtime/full_frame.c runtime/jmpbuf.c \
- runtime/local_state.c runtime/metacall_impl.c runtime/os_mutex-unix.c \
- runtime/os-unix.c runtime/scheduler.c runtime/signal_node.c \
- runtime/stacks.c runtime/stats.c \
- runtime/sysdep-unix.c runtime/worker_mutex.c runtime/bug.cpp \
- runtime/cilk-abi-cilk-for.cpp runtime/except-gcc.cpp \
- runtime/global_state.cpp runtime/reducer_impl.cpp runtime/pedigrees.c
+libcilkrts_la_SOURCES = \
+ runtime/bug.cpp \
+ runtime/cilk-abi.c \
+ runtime/cilk-abi-cilk-for.cpp \
+ runtime/cilk-abi-vla.c \
+ runtime/cilk-abi-vla-internal.c \
+ runtime/cilk_api.c \
+ runtime/cilk_fiber.cpp \
+ runtime/cilk_fiber-unix.cpp \
+ runtime/cilk_malloc.c \
+ runtime/c_reducers.c \
+ runtime/except-gcc.cpp \
+ runtime/frame_malloc.c \
+ runtime/full_frame.c \
+ runtime/global_state.cpp \
+ runtime/jmpbuf.c \
+ runtime/local_state.c \
+ runtime/metacall_impl.c \
+ runtime/os_mutex-unix.c \
+ runtime/os-unix.c \
+ runtime/pedigrees.c \
+ runtime/record-replay.cpp \
+ runtime/reducer_impl.cpp \
+ runtime/scheduler.c \
+ runtime/signal_node.c \
+ runtime/spin_mutex.c \
+ runtime/stats.c \
+ runtime/symbol_test.c \
+ runtime/sysdep-unix.c \
+ runtime/worker_mutex.c
# Load the $(REVISION) value.
include include/internal/rev.mk
@@ -64,13 +85,16 @@ cilkinclude_HEADERS = \
include/cilk/common.h \
include/cilk/holder.h \
include/cilk/hyperobject_base.h \
+ include/cilk/metaprogramming.h \
include/cilk/reducer_file.h \
include/cilk/reducer.h \
include/cilk/reducer_list.h \
include/cilk/reducer_max.h \
include/cilk/reducer_min.h \
+ include/cilk/reducer_min_max.h \
include/cilk/reducer_opadd.h \
include/cilk/reducer_opand.h \
+ include/cilk/reducer_opmul.h \
include/cilk/reducer_opor.h \
include/cilk/reducer_opxor.h \
include/cilk/reducer_ostream.h \
diff --git a/libcilkrts/Makefile.in b/libcilkrts/Makefile.in
index 2c25a48a1a1..61156b422ac 100644
--- a/libcilkrts/Makefile.in
+++ b/libcilkrts/Makefile.in
@@ -40,30 +40,39 @@
#########################################################################
#
-# Copyright (C) 2011-2012
-# Intel Corporation
-#
-# This file is part of the Intel Cilk Plus Library. This library is free
-# software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# Under Section 7 of GPL version 3, you are granted additional
-# permissions described in the GCC Runtime Library Exception, version
-# 3.1, as published by the Free Software Foundation.
-#
-# You should have received a copy of the GNU General Public License and
-# a copy of the GCC Runtime Library Exception along with this program;
-# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-# <http://www.gnu.org/licenses/>.
+# @copyright
+# Copyright (C) 2011-2013
+# Intel Corporation
+#
+# @copyright
+# This file is part of the Intel Cilk Plus Library. This library is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# @copyright
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# @copyright
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# @copyright
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
###########################################################################
+# DO NOT EDIT THIS FILE!
+#
+# It was automatically generated by cilkrts/include/internal/Makefile
+
VPATH = @srcdir@
pkgdatadir = $(datadir)/@PACKAGE@
@@ -130,12 +139,14 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" \
"$(DESTDIR)$(cilkincludedir)"
LTLIBRARIES = $(toolexeclib_LTLIBRARIES)
libcilkrts_la_LIBADD =
-am_libcilkrts_la_OBJECTS = cilk-abi.lo cilk_api.lo cilk_malloc.lo \
- c_reducers.lo frame_malloc.lo full_frame.lo jmpbuf.lo \
- local_state.lo metacall_impl.lo os_mutex-unix.lo os-unix.lo \
- scheduler.lo signal_node.lo stacks.lo stats.lo sysdep-unix.lo \
- worker_mutex.lo bug.lo cilk-abi-cilk-for.lo except-gcc.lo \
- global_state.lo reducer_impl.lo pedigrees.lo
+am_libcilkrts_la_OBJECTS = bug.lo cilk-abi.lo cilk-abi-cilk-for.lo \
+ cilk-abi-vla.lo cilk-abi-vla-internal.lo cilk_api.lo \
+ cilk_fiber.lo cilk_fiber-unix.lo cilk_malloc.lo c_reducers.lo \
+ except-gcc.lo frame_malloc.lo full_frame.lo global_state.lo \
+ jmpbuf.lo local_state.lo metacall_impl.lo os_mutex-unix.lo \
+ os-unix.lo pedigrees.lo record-replay.lo reducer_impl.lo \
+ scheduler.lo signal_node.lo spin_mutex.lo stats.lo \
+ symbol_test.lo sysdep-unix.lo worker_mutex.lo
libcilkrts_la_OBJECTS = $(am_libcilkrts_la_OBJECTS)
libcilkrts_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \
$(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \
@@ -304,17 +315,38 @@ AM_LDFLAGS = -lpthread -ldl
# Target list.
toolexeclib_LTLIBRARIES = libcilkrts.la
-libcilkrts_la_SOURCES = runtime/cilk-abi.c runtime/cilk_api.c \
- runtime/cilk_malloc.c runtime/c_reducers.c \
- runtime/frame_malloc.c runtime/full_frame.c runtime/jmpbuf.c \
- runtime/local_state.c runtime/metacall_impl.c runtime/os_mutex-unix.c \
- runtime/os-unix.c runtime/scheduler.c runtime/signal_node.c \
- runtime/stacks.c runtime/stats.c \
- runtime/sysdep-unix.c runtime/worker_mutex.c runtime/bug.cpp \
- runtime/cilk-abi-cilk-for.cpp runtime/except-gcc.cpp \
- runtime/global_state.cpp runtime/reducer_impl.cpp runtime/pedigrees.c
-
-CILK_REVISION = 2856
+libcilkrts_la_SOURCES = \
+ runtime/bug.cpp \
+ runtime/cilk-abi.c \
+ runtime/cilk-abi-cilk-for.cpp \
+ runtime/cilk-abi-vla.c \
+ runtime/cilk-abi-vla-internal.c \
+ runtime/cilk_api.c \
+ runtime/cilk_fiber.cpp \
+ runtime/cilk_fiber-unix.cpp \
+ runtime/cilk_malloc.c \
+ runtime/c_reducers.c \
+ runtime/except-gcc.cpp \
+ runtime/frame_malloc.c \
+ runtime/full_frame.c \
+ runtime/global_state.cpp \
+ runtime/jmpbuf.c \
+ runtime/local_state.c \
+ runtime/metacall_impl.c \
+ runtime/os_mutex-unix.c \
+ runtime/os-unix.c \
+ runtime/pedigrees.c \
+ runtime/record-replay.cpp \
+ runtime/reducer_impl.cpp \
+ runtime/scheduler.c \
+ runtime/signal_node.c \
+ runtime/spin_mutex.c \
+ runtime/stats.c \
+ runtime/symbol_test.c \
+ runtime/sysdep-unix.c \
+ runtime/worker_mutex.c
+
+CILK_REVISION = 3520
# Load the $(REVISION) value.
@@ -333,13 +365,16 @@ cilkinclude_HEADERS = \
include/cilk/common.h \
include/cilk/holder.h \
include/cilk/hyperobject_base.h \
+ include/cilk/metaprogramming.h \
include/cilk/reducer_file.h \
include/cilk/reducer.h \
include/cilk/reducer_list.h \
include/cilk/reducer_max.h \
include/cilk/reducer_min.h \
+ include/cilk/reducer_min_max.h \
include/cilk/reducer_opadd.h \
include/cilk/reducer_opand.h \
+ include/cilk/reducer_opmul.h \
include/cilk/reducer_opor.h \
include/cilk/reducer_opxor.h \
include/cilk/reducer_ostream.h \
@@ -466,8 +501,12 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/c_reducers.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi-cilk-for.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi-vla-internal.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi-vla.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_api.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_fiber-unix.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_fiber.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_malloc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/except-gcc.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/frame_malloc.Plo@am__quote@
@@ -479,11 +518,13 @@ distclean-compile:
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/os-unix.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/os_mutex-unix.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pedigrees.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/record-replay.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reducer_impl.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scheduler.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/signal_node.Plo@am__quote@
-@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stacks.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spin_mutex.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stats.Plo@am__quote@
+@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol_test.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sysdep-unix.Plo@am__quote@
@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/worker_mutex.Plo@am__quote@
@@ -515,6 +556,20 @@ cilk-abi.lo: runtime/cilk-abi.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cilk-abi.lo `test -f 'runtime/cilk-abi.c' || echo '$(srcdir)/'`runtime/cilk-abi.c
+cilk-abi-vla.lo: runtime/cilk-abi-vla.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cilk-abi-vla.lo -MD -MP -MF $(DEPDIR)/cilk-abi-vla.Tpo -c -o cilk-abi-vla.lo `test -f 'runtime/cilk-abi-vla.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cilk-abi-vla.Tpo $(DEPDIR)/cilk-abi-vla.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/cilk-abi-vla.c' object='cilk-abi-vla.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cilk-abi-vla.lo `test -f 'runtime/cilk-abi-vla.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla.c
+
+cilk-abi-vla-internal.lo: runtime/cilk-abi-vla-internal.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cilk-abi-vla-internal.lo -MD -MP -MF $(DEPDIR)/cilk-abi-vla-internal.Tpo -c -o cilk-abi-vla-internal.lo `test -f 'runtime/cilk-abi-vla-internal.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla-internal.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cilk-abi-vla-internal.Tpo $(DEPDIR)/cilk-abi-vla-internal.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/cilk-abi-vla-internal.c' object='cilk-abi-vla-internal.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cilk-abi-vla-internal.lo `test -f 'runtime/cilk-abi-vla-internal.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla-internal.c
+
cilk_api.lo: runtime/cilk_api.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cilk_api.lo -MD -MP -MF $(DEPDIR)/cilk_api.Tpo -c -o cilk_api.lo `test -f 'runtime/cilk_api.c' || echo '$(srcdir)/'`runtime/cilk_api.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cilk_api.Tpo $(DEPDIR)/cilk_api.Plo
@@ -585,6 +640,13 @@ os-unix.lo: runtime/os-unix.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o os-unix.lo `test -f 'runtime/os-unix.c' || echo '$(srcdir)/'`runtime/os-unix.c
+pedigrees.lo: runtime/pedigrees.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pedigrees.lo -MD -MP -MF $(DEPDIR)/pedigrees.Tpo -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/pedigrees.Tpo $(DEPDIR)/pedigrees.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/pedigrees.c' object='pedigrees.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c
+
scheduler.lo: runtime/scheduler.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT scheduler.lo -MD -MP -MF $(DEPDIR)/scheduler.Tpo -c -o scheduler.lo `test -f 'runtime/scheduler.c' || echo '$(srcdir)/'`runtime/scheduler.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/scheduler.Tpo $(DEPDIR)/scheduler.Plo
@@ -599,12 +661,12 @@ signal_node.lo: runtime/signal_node.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o signal_node.lo `test -f 'runtime/signal_node.c' || echo '$(srcdir)/'`runtime/signal_node.c
-stacks.lo: runtime/stacks.c
-@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT stacks.lo -MD -MP -MF $(DEPDIR)/stacks.Tpo -c -o stacks.lo `test -f 'runtime/stacks.c' || echo '$(srcdir)/'`runtime/stacks.c
-@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/stacks.Tpo $(DEPDIR)/stacks.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/stacks.c' object='stacks.lo' libtool=yes @AMDEPBACKSLASH@
+spin_mutex.lo: runtime/spin_mutex.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spin_mutex.lo -MD -MP -MF $(DEPDIR)/spin_mutex.Tpo -c -o spin_mutex.lo `test -f 'runtime/spin_mutex.c' || echo '$(srcdir)/'`runtime/spin_mutex.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/spin_mutex.Tpo $(DEPDIR)/spin_mutex.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/spin_mutex.c' object='spin_mutex.lo' libtool=yes @AMDEPBACKSLASH@
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o stacks.lo `test -f 'runtime/stacks.c' || echo '$(srcdir)/'`runtime/stacks.c
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o spin_mutex.lo `test -f 'runtime/spin_mutex.c' || echo '$(srcdir)/'`runtime/spin_mutex.c
stats.lo: runtime/stats.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT stats.lo -MD -MP -MF $(DEPDIR)/stats.Tpo -c -o stats.lo `test -f 'runtime/stats.c' || echo '$(srcdir)/'`runtime/stats.c
@@ -613,6 +675,13 @@ stats.lo: runtime/stats.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o stats.lo `test -f 'runtime/stats.c' || echo '$(srcdir)/'`runtime/stats.c
+symbol_test.lo: runtime/symbol_test.c
+@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT symbol_test.lo -MD -MP -MF $(DEPDIR)/symbol_test.Tpo -c -o symbol_test.lo `test -f 'runtime/symbol_test.c' || echo '$(srcdir)/'`runtime/symbol_test.c
+@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/symbol_test.Tpo $(DEPDIR)/symbol_test.Plo
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/symbol_test.c' object='symbol_test.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o symbol_test.lo `test -f 'runtime/symbol_test.c' || echo '$(srcdir)/'`runtime/symbol_test.c
+
sysdep-unix.lo: runtime/sysdep-unix.c
@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sysdep-unix.lo -MD -MP -MF $(DEPDIR)/sysdep-unix.Tpo -c -o sysdep-unix.lo `test -f 'runtime/sysdep-unix.c' || echo '$(srcdir)/'`runtime/sysdep-unix.c
@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/sysdep-unix.Tpo $(DEPDIR)/sysdep-unix.Plo
@@ -627,13 +696,6 @@ worker_mutex.lo: runtime/worker_mutex.c
@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o worker_mutex.lo `test -f 'runtime/worker_mutex.c' || echo '$(srcdir)/'`runtime/worker_mutex.c
-pedigrees.lo: runtime/pedigrees.c
-@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pedigrees.lo -MD -MP -MF $(DEPDIR)/pedigrees.Tpo -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c
-@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/pedigrees.Tpo $(DEPDIR)/pedigrees.Plo
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/pedigrees.c' object='pedigrees.lo' libtool=yes @AMDEPBACKSLASH@
-@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@
-@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c
-
.cpp.o:
@am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $<
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po
@@ -669,6 +731,20 @@ cilk-abi-cilk-for.lo: runtime/cilk-abi-cilk-for.cpp
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o cilk-abi-cilk-for.lo `test -f 'runtime/cilk-abi-cilk-for.cpp' || echo '$(srcdir)/'`runtime/cilk-abi-cilk-for.cpp
+cilk_fiber.lo: runtime/cilk_fiber.cpp
+@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT cilk_fiber.lo -MD -MP -MF $(DEPDIR)/cilk_fiber.Tpo -c -o cilk_fiber.lo `test -f 'runtime/cilk_fiber.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber.cpp
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/cilk_fiber.Tpo $(DEPDIR)/cilk_fiber.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/cilk_fiber.cpp' object='cilk_fiber.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o cilk_fiber.lo `test -f 'runtime/cilk_fiber.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber.cpp
+
+cilk_fiber-unix.lo: runtime/cilk_fiber-unix.cpp
+@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT cilk_fiber-unix.lo -MD -MP -MF $(DEPDIR)/cilk_fiber-unix.Tpo -c -o cilk_fiber-unix.lo `test -f 'runtime/cilk_fiber-unix.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber-unix.cpp
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/cilk_fiber-unix.Tpo $(DEPDIR)/cilk_fiber-unix.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/cilk_fiber-unix.cpp' object='cilk_fiber-unix.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o cilk_fiber-unix.lo `test -f 'runtime/cilk_fiber-unix.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber-unix.cpp
+
except-gcc.lo: runtime/except-gcc.cpp
@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT except-gcc.lo -MD -MP -MF $(DEPDIR)/except-gcc.Tpo -c -o except-gcc.lo `test -f 'runtime/except-gcc.cpp' || echo '$(srcdir)/'`runtime/except-gcc.cpp
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/except-gcc.Tpo $(DEPDIR)/except-gcc.Plo
@@ -683,6 +759,13 @@ global_state.lo: runtime/global_state.cpp
@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o global_state.lo `test -f 'runtime/global_state.cpp' || echo '$(srcdir)/'`runtime/global_state.cpp
+record-replay.lo: runtime/record-replay.cpp
+@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT record-replay.lo -MD -MP -MF $(DEPDIR)/record-replay.Tpo -c -o record-replay.lo `test -f 'runtime/record-replay.cpp' || echo '$(srcdir)/'`runtime/record-replay.cpp
+@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/record-replay.Tpo $(DEPDIR)/record-replay.Plo
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/record-replay.cpp' object='record-replay.lo' libtool=yes @AMDEPBACKSLASH@
+@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@
+@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o record-replay.lo `test -f 'runtime/record-replay.cpp' || echo '$(srcdir)/'`runtime/record-replay.cpp
+
reducer_impl.lo: runtime/reducer_impl.cpp
@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT reducer_impl.lo -MD -MP -MF $(DEPDIR)/reducer_impl.Tpo -c -o reducer_impl.lo `test -f 'runtime/reducer_impl.cpp' || echo '$(srcdir)/'`runtime/reducer_impl.cpp
@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/reducer_impl.Tpo $(DEPDIR)/reducer_impl.Plo
diff --git a/libcilkrts/include/cilk/cilk.h b/libcilkrts/include/cilk/cilk.h
index fa7ac8eaea0..e4a6e155954 100644
--- a/libcilkrts/include/cilk/cilk.h
+++ b/libcilkrts/include/cilk/cilk.h
@@ -1,34 +1,66 @@
/* cilk.h -*-C++-*-
*
- * Copyright (C) 2010-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2010-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/** @file cilk.h
+ *
+ * @brief Provides convenient aliases for the Cilk language keywords.
+ *
+ * @details
+ * Since Cilk is a nonstandard extension to both C and C++, the Cilk
+ * language keywords all begin with “`_Cilk_`”, which guarantees that they
+ * will not conflict with user-defined identifiers in properly written
+ * programs, so that “standard” C and C++ programs can safely be
+ * compiled a Cilk-enabled C or C++ compiler.
+ *
+ * However, this means that the keywords _look_ like something grafted on to
+ * the base language. Therefore, you can include this header:
+ *
+ * #include "cilk/cilk.h"
*
+ * and then write the Cilk keywords with a “`cilk_`” prefix instead of
+ * “`_Cilk_`”.
+ *
+ * @ingroup language
*/
-
-/* Define convenient aliases for Cilk keywords */
-
+
+
+/** @defgroup language Language Keywords
+ * Definitions having to do with the Cilk language.
+ * @{
+ */
+
#ifndef cilk_spawn
-# define cilk_spawn _Cilk_spawn
-# define cilk_sync _Cilk_sync
-# define cilk_for _Cilk_for
+# define cilk_spawn _Cilk_spawn ///< Spawn a task that can execute in parallel.
+# define cilk_sync _Cilk_sync ///< Wait for spawned tasks to complete.
+# define cilk_for _Cilk_for ///< Execute iterations of a for loop in parallel.
#endif
+
+/// @}
diff --git a/libcilkrts/include/cilk/cilk_api.h b/libcilkrts/include/cilk/cilk_api.h
index f7078ec2373..f56216ae6dd 100644
--- a/libcilkrts/include/cilk/cilk_api.h
+++ b/libcilkrts/include/cilk/cilk_api.h
@@ -1,62 +1,57 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
- *
- */
-
-/**
- * @if public_doc
- * @mainpage
- * @section intro_sec Introduction
+/* cilk_api.h
*
- * In addition to the Cilk Plus keywords, Intel Cilk Plus provides an API to
- * allow users to query and control the Intel Cilk Plus runtime.
- * @endif
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-
-/**
- * @file cilk_api.h
+
+/** @file cilk_api.h
*
- * @brief Defines the documented API exposed by the Intel Cilk Plus for use
+ * @brief Defines the documented API exposed by the Cilk Plus for use
* by applications.
+ *
+ * @ingroup api
*/
-
-/**
- * @page API
- * Cilk API -- Functions callable by the user to modify the operation of the
- * Cilk scheduler.
- */
-
+
#ifndef INCLUDED_CILK_API_H
#define INCLUDED_CILK_API_H
-#ifndef CILK_STUB /* Real (non-stub) definitions below */
+/** @defgroup api Runtime API
+ * API to allow user programs to interact with the Cilk runtime.
+ * @{
+ */
+
+#ifndef CILK_STUB /* Real (non-stub) definitions */
#if ! defined(__cilk) && ! defined(USE_CILK_API)
# ifdef _WIN32
-# pragma message("Warning: Cilk ABI is being used with non-Cilk compiler (or Cilk is disabled)")
+# error Cilk API is being used with non-Cilk compiler (or Cilk is disabled)
# else
-# warning Cilk ABI is being used with non-Cilk compiler (or Cilk is disabled)
+# warning Cilk API is being used with non-Cilk compiler (or Cilk is disabled)
# endif
#endif
@@ -81,8 +76,9 @@
__CILKRTS_BEGIN_EXTERN_C
-/** @brief Return values from __cilkrts_set_param() and __cilkrts_set_param_w() */
-enum {
+/** Return values from __cilkrts_set_param() and __cilkrts_set_param_w()
+ */
+enum __cilkrts_set_param_status {
__CILKRTS_SET_PARAM_SUCCESS = 0, /**< Success - parameter set */
__CILKRTS_SET_PARAM_UNIMP = 1, /**< Unimplemented parameter */
__CILKRTS_SET_PARAM_XRANGE = 2, /**< Parameter value out of range */
@@ -90,42 +86,73 @@ enum {
__CILKRTS_SET_PARAM_LATE = 4 /**< Too late to change parameter value */
};
-/**
- * @brief Set user controllable parameters
+/** Set user controllable runtime parameters
+ *
+ * Call this function to set runtime parameters that control the behavior
+ * of the Cilk scheduler.
+ *
+ * @param param A string specifying the parameter to be set. One of:
+ * - `"nworkers"`
+ * - `"force reduce"`
+ * @param value A string specifying the parameter value.
+ * @returns A value from the @ref __cilkrts_set_param_status
+ * enumeration indicating the result of the operation.
+ *
+ * @par The "nworkers" parameter
*
- * @param param - string specifying parameter to be set
- * @param value - string specifying new value
- * @returns One of: __CILKRTS_SET_PARAM_SUCCESS ( = 0),
- * __CILKRTS_SET_PARAM_UNIMP, __CILKRTS_SET_PARAM_XRANGE,
- * __CILKRTS_SET_PARAM_INVALID, or __CILKRTS_SET_PARAM_LATE.
+ * This parameter specifies the number of worker threads to be created by the
+ * Cilk runtime. @a Value must be a string of digits to be parsed by
+ * `strtol()`.
*
- * @attention The wide character function __cilkrts_set_param_w() is available
- * only on Windows.
+ * The number of worker threads is:
+ * 1. the value set with `__cilkrts_set_param("nworkers")`, if it is
+ * positive; otherwise,
+ * 2. the value of the CILK_NWORKERS environment variable, if it is
+ * defined; otherwise
+ * 3. the number of cores available, as reported by the operating system.
*
- * Allowable parameter names:
+ * @note
+ * Technically, Cilk distinguishes between the _user thread_ (the thread that
+ * the user code was executing on when the Cilk runtime started), and
+ * _worker threads_ (new threads created by the Cilk runtime to support
+ * Cilk parallelism). `nworkers` actually includes both the user thread and
+ * the worker threads; that is, it is one greater than the number of true
+ * “worker threads”.
*
- * - "nworkers" - number of cores that should run Cilk code. The value is a
- * string of digits to be parsed by strtol. Negative numbers are not valid
- * for "nworkers".
+ * @note
+ * Setting `nworkers = 1` produces serial behavior. Cilk spawns and syncs will
+ * be executed, but with only one worker, continuations will never be stolen,
+ * so all code will execute in serial.
*
- * The precedence for "nworkers" is:
- * 1) __cilkrts_set_param("nworkers")
- * 2) The CILK_NWORKERS environment variable
- * 3) The number of cores returned by the OS.
+ * @warning
+ * The number of worker threads can only be set *before* the runtime has
+ * started. Attempting to set it when the runtime is running will have no
+ * effect, and will return an error code. You can call __cilkrts_end_cilk()
+ * to shut down the runtime to change the number of workers.
*
- * Setting "nworkers" to "0" sets the number of workers to the value of
- * CILK_NWORKERS environment number or the number of cores returned by the
- * OS.
+ * @warning
+ * The default Cilk scheduler behavior is usually pretty good. The ability
+ * to override `nworkers` can be useful for experimentation, but it won’t
+ * usually be necessary for getting good performance.
*
- * "nworkers" can only be set *before* the runtime has started. Attempting
- * to set "nworkers" when the runtime is running will return an error code.
- * You can use __cilkrts_end_cilk() to shut down the runtime to change the
- * number of workers.
+ * @par The "force reduce" parameter
*
- * - "force reduce" - test reducer callbacks by allocating new views
- * for every spawn within which a reducer is accessed. This can
- * significantly reduce performance. The value is "1" or "true"
- * to enable, "0" or "false" to disable.
+ * This parameter controls whether the runtime should allocate a new view
+ * for a reducer for every parallel strand that it is accessed on. (See
+ * @ref pagereducers.) @a Value must be `"1"` or `"true"` to enable the
+ * “force reduce” behavior, or `"0"` or `"false"` to disable it.
+ *
+ * “Force reduce” behavior will also be enabled if
+ * `__cilkrts_set_param("force reduce")` is not called, but the
+ * `CILK_FORCE_REDUCE` environment variable is defined.
+ *
+ * @warning
+ * When this option is enabled, `nworkers` should be set to `1`. Using “force
+ * reduce” with more than one worker may result in runtime errors.
+ *
+ * @warning
+ * Enabling this option can significantly reduce performance. It should
+ * _only_ be used as a debugging tool.
*/
CILK_API(int) __cilkrts_set_param(const char *param, const char *value);
@@ -133,73 +160,81 @@ CILK_API(int) __cilkrts_set_param(const char *param, const char *value);
/**
* Set user controllable parameters using wide strings
*
+ * @note This variant of __cilkrts_set_param() is only available
+ * on Windows.
+ *
* @copydetails __cilkrts_set_param
*/
CILK_API(int) __cilkrts_set_param_w(const wchar_t *param, const wchar_t *value);
#endif
-/**
- * Shut down and deallocate all Cilk state. The runtime will abort the
- * application if Cilk is still in use by this thread. Otherwise the runtime
- * will wait for all other threads using Cilk to exit.
+/** Shut down and deallocate all Cilk state. The runtime will abort the
+ * application if Cilk is still in use by this thread. Otherwise the runtime
+ * will wait for all other threads using Cilk to exit.
*/
CILK_API(void) __cilkrts_end_cilk(void);
-/**
- * Allocate Cilk data structures, starting the runtime.
+/** Initialize the Cilk data structures and start the runtime.
*/
CILK_API(void) __cilkrts_init(void);
-/**
- * Return the number of worker threads that this instance of Cilk
- * will attempt to use.
+/** Return the runtime `nworkers` parameter. (See the discussion of `nworkers`
+ * in the documentation for __cilkrts_set_param().)
*/
CILK_API(int) __cilkrts_get_nworkers(void);
-/**
- *Return the number of worker threads allocated.
+/** Return the number of thread data structures.
+ *
+ * This function returns the number of data structures that has been allocated
+ * allocated by the runtime to hold information about user and worker threads.
+ *
+ * If you don’t already know what this is good for, then you probably don’t
+ * need it.
*/
CILK_API(int) __cilkrts_get_total_workers(void);
-/**
- * Return a small integer indicating which Cilk worker the function is
- * currently running on. Each thread started by the Cilk runtime library
- * (referred to as a system worker) has a unique worker number in the range
- * 1..P-1, where P is the value returned by __cilkrts_get_nworkers().
- *
- * Note that all threads started by the user or by other libraries (referred
- * to as user workers) share the worker number 0. Therefore, the worker number
- * is not unique across multiple user threads.
+/** What thread is the function running on?
+ *
+ * Return a small integer identifying the current thread. Each worker thread
+ * started by the Cilk runtime library has a unique worker number in the range
+ * `1 .. nworkers - 1`.
+ *
+ * All _user_ threads (threads started by the user, or by other libraries) are
+ * identified as worker number 0. Therefore, the worker number is not unique
+ * across multiple user threads.
*/
CILK_API(int) __cilkrts_get_worker_number(void);
-/**
- * Return non-zero if force reduce mode is on
+/** Test whether “force reduce” behavior is enabled.
+ *
+ * @return Non-zero if force-reduce mode is on, zero if it is off.
*/
CILK_API(int) __cilkrts_get_force_reduce(void);
-/**
- * Interact with tools
+/** Interact with tools
*/
CILK_API(void)
__cilkrts_metacall(unsigned int tool, unsigned int code, void *data);
#ifdef _WIN32
+/// Windows exception description record.
typedef struct _EXCEPTION_RECORD _EXCEPTION_RECORD;
-/** Callback function signature for Windows exception notification */
+/** Function signature for Windows exception notification callbacks.
+ */
typedef void (*__cilkrts_pfn_seh_callback)(const _EXCEPTION_RECORD *exception);
-/**
- * Debugging aid for exceptions on Windows.
+/** Specify a function to call when a non-C++ exception is caught.
+ *
+ * Cilk Plus parallelism plays nicely with C++ exception handling, but the
+ * Cilk Plus runtime has no way to unwind the stack across a strand boundary
+ * for Microsoft SEH (“Structured Exception Handling”) exceptions. Therefore,
+ * when the runtime catches such an exception, it must abort the application.
*
- * The specified function will be called when a non-C++ exception is caught
- * by the Cilk Plus runtime. This is illegal since there's no way for the
- * Cilk Plus runtime to know how to unwind the stack across a strand boundary
- * for Structure Exceptions.
+ * If an SEH callback has been set, the runtime will call it before aborting.
*
- * This function allows an application to do something before the Cilk Plus
- * runtime aborts the application.
+ * @param pfn A pointer to a callback function to be called before the
+ * runtime aborts the program because of an SEH exception.
*/
CILK_API(int) __cilkrts_set_seh_callback(__cilkrts_pfn_seh_callback pfn);
#endif /* _WIN32 */
@@ -207,25 +242,36 @@ CILK_API(int) __cilkrts_set_seh_callback(__cilkrts_pfn_seh_callback pfn);
#if __CILKRTS_ABI_VERSION >= 1
/* Pedigree API is available only for compilers that use ABI version >= 1. */
-/**
- * Pedigree API
+
+/** @name Pedigrees
*/
+//@{
-/* Internal implementation of __cilkrts_get_pedigree */
+// @cond internal
+
+/** Support for __cilkrts_get_pedigree.
+ */
CILK_API(__cilkrts_pedigree)
__cilkrts_get_pedigree_internal(__cilkrts_worker *w);
-/**
- * @brief Returns the current pedigree, in a linked list representation.
+/** Support for __cilkrts_bump_worker_rank.
+ */
+CILK_API(int)
+__cilkrts_bump_worker_rank_internal(__cilkrts_worker* w);
+
+/// @endcond
+
+
+/** Get the current pedigree, in a linked list representation.
*
- * This routine returns a copy of the last node in the pedigree list.
- * For example, if the current pedigree (in order) is <1, 2, 3, 4>,
- * then this method returns a node with rank == 4, and whose parent
- * field points to the node with rank of 3. In summary, following the
- * nodes in the chain visits the terms of the pedigree in reverse.
+ * This routine returns a copy of the last node in the pedigree list.
+ * For example, if the current pedigree (in order) is <1, 2, 3, 4>,
+ * then this method returns a node with rank == 4, and whose parent
+ * field points to the node with rank of 3. In summary, following the
+ * nodes in the chain visits the terms of the pedigree in reverse.
*
- * The returned node is guaranteed to be valid only until the caller
- * of this routine has returned.
+ * The returned node is guaranteed to be valid only until the caller
+ * of this routine has returned.
*/
__CILKRTS_INLINE
__cilkrts_pedigree __cilkrts_get_pedigree(void)
@@ -233,13 +279,16 @@ __cilkrts_pedigree __cilkrts_get_pedigree(void)
return __cilkrts_get_pedigree_internal(__cilkrts_get_tls_worker());
}
-/**
- * @brief DEPRECATED -- Context used by __cilkrts_get_pedigree_info.
+/** Context used by __cilkrts_get_pedigree_info.
+ *
+ * @deprecated
+ * This data structure is only used by the deprecated
+ * __cilkrts_get_pedigree_info function.
*
- * Callers should initialize the
- * data array to NULL, and set the size to sizeof(__cilkrts_pedigree_context_t
- * before the first call to __cilkrts_get_pedigree_info and should not examine
- * or modify it after.
+ * Callers should initialize the `data` array to NULL and set the `size`
+ * field to `sizeof(__cilkrts_pedigree_context_t)` before the first call
+ * to __cilkrts_get_pedigree_info(), and should not examine or modify it
+ * thereafter.
*/
typedef struct
{
@@ -247,17 +296,19 @@ typedef struct
void *data[3]; /**< Opaque context data */
} __cilkrts_pedigree_context_t;
-/**
- * @brief DEPRECATED -- Use __cilkrts_get_pedigree instead.
+/** Get pedigree information.
+ *
+ * @deprecated
+ * Use __cilkrts_get_pedigree() instead.
*
- * This routine allows code to walk up the stack of Cilk frames to gather
- * the pedigree.
+ * This routine allows code to walk up the stack of Cilk frames to gather
+ * the pedigree.
*
- * Initialize the pedigree walk by filling the pedigree context with NULLs
- * and setting the size field to sizeof(__cilkrts_pedigree_context).
- * Other than initialization to NULL to start the walk, user coder should
- * consider the pedigree context data opaque and should not examine or
- * modify it.
+ * Initialize the pedigree walk by filling the pedigree context with NULLs
+ * and setting the size field to sizeof(__cilkrts_pedigree_context).
+ * Other than initialization to NULL to start the walk, user coder should
+ * consider the pedigree context data opaque and should not examine or
+ * modify it.
*
* @returns 0 - Success - birthrank is valid
* @returns >0 - End of pedigree walk
@@ -270,10 +321,10 @@ CILK_API(int)
__cilkrts_get_pedigree_info(/* In/Out */ __cilkrts_pedigree_context_t *context,
/* Out */ uint64_t *sf_birthrank);
-/**
- * @brief DEPRECATED -- Use __cilkrts_get_pedigree().rank instead.
+/** Get the rank of the currently executing worker.
*
- * Fetch the rank from the currently executing worker
+ * @deprecated
+ * Use `__cilkrts_get_pedigree().rank` instead.
*
* @returns 0 - Success - *rank is valid
* @returns <0 - Failure - *rank is not changed
@@ -285,12 +336,7 @@ int __cilkrts_get_worker_rank(uint64_t *rank)
return 0;
}
-/* Internal implementation of __cilkrts_bump_worker_rank */
-CILK_API(int)
-__cilkrts_bump_worker_rank_internal(__cilkrts_worker* w);
-
-/**
- * @brief Increment the pedigree rank of the currently executing worker
+/** Increment the pedigree rank of the currently executing worker.
*
* @returns 0 - Success - rank was incremented
* @returns-1 - Failure
@@ -301,57 +347,33 @@ int __cilkrts_bump_worker_rank(void)
return __cilkrts_bump_worker_rank_internal(__cilkrts_get_tls_worker());
}
-/* Internal implementation of __cilkrts_bump_worker_rank */
-CILK_API(int)
-__cilkrts_bump_loop_rank_internal(__cilkrts_worker* w);
-
-/**
- * @brief Increment the pedigree rank for a cilk_for loop.
- *
- * A cilk_for loop is implemented using a divide and conquer recursive
- * algorithm. This allows the work of the cilk_for loop to spread optimally
- * across the available workers. Unfortunately, this makes the pedigree
- * for dependent on the grainsize. Unless overridden by the cilk grainsize
- * pragma, the grainsize is based on number of workers and the number of
- * iterations in the loop.
- *
- * To fix this, the pedigree is "flattened" in a cilk_for. A pedigree node is
- * created for the loop index, and a second node is created for the loop body.
- * The compiler generates a lambda function from the loop body that is passed
- * the low and high bounds of the loop indicies it should iterate over. This
- * range is the "grain size". When the loop body lambda function is called,
- * the pedigree rank of the loop index node is initialized to the lower loop
- * index.
- *
- * Eventually, the compiler generated loop body lambda function should
- * increment the cilk_for rank at the end of each iteration around the
- * cilk_for loop body. However, this is not currently implemented.
- *
- * This function is provided to allow users to increment the cilk_for rank
- * themselves. Users should call this function only at the end of a cilk_for
- * loop body. Use of this function is not required. If not used, the
- * pedigree sequence will change any time the loop's grainsize changes, i.e.,
- * if the program is run with a different number of workers.
- *
- * When the code generated by the compiler for the cilk_for loop body
- * "does the right thing" this function will become a noop.
- *
- * @returns 0 - Success - rank was incremented
- * @returns -1 - Failure
+/** Increment the pedigree rank for a cilk_for loop.
+ * Obsolete.
+ *
+ * @deprecated
+ * This function was provided to allow the user to manipulate the pedigree
+ * rank of a `cilk_for` loop. The compiler now generates code to do that
+ * manipulation automatically, so this function is now unnecessary. It may
+ * be called, but will have no effect.
*/
CILK_EXPORT_AND_INLINE
int __cilkrts_bump_loop_rank(void)
{
- return __cilkrts_bump_loop_rank_internal(__cilkrts_get_tls_worker());
+ return 0;
}
+//@}
+
#endif /* __CILKRTS_ABI_VERSION >= 1 */
__CILKRTS_END_EXTERN_C
#else /* CILK_STUB */
-/* Stubs for the api functions */
+// Programs compiled with CILK_STUB are not linked with the Cilk runtime
+// library, so they should not have external references to runtime functions.
+// Therefore, the functions are replaced with stubs.
+
#ifdef _WIN32
#define __cilkrts_set_param_w(name,value) ((value), 0)
#define __cilkrts_set_seh_callback(pfn) (0)
@@ -372,7 +394,7 @@ __CILKRTS_END_EXTERN_C
#define __cilkrts_bump_worker_rank() (-1)
#define __cilkrts_bump_loop_rank() (-1)
-/**
+/*
* A stub method for __cilkrts_get_pedigree.
* Returns an empty __cilkrts_pedigree.
*/
@@ -392,4 +414,6 @@ __cilkrts_pedigree __cilkrts_get_pedigree_stub(void)
#endif /* CILK_STUB */
+//@}
+
#endif /* INCLUDED_CILK_API_H */
diff --git a/libcilkrts/include/cilk/cilk_api_linux.h b/libcilkrts/include/cilk/cilk_api_linux.h
index c4d2dad78c6..eae7be4c8f4 100644
--- a/libcilkrts/include/cilk/cilk_api_linux.h
+++ b/libcilkrts/include/cilk/cilk_api_linux.h
@@ -1,26 +1,31 @@
/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
*/
diff --git a/libcilkrts/include/cilk/cilk_stub.h b/libcilkrts/include/cilk/cilk_stub.h
index 80f0c3e0207..67df008c4d7 100644
--- a/libcilkrts/include/cilk/cilk_stub.h
+++ b/libcilkrts/include/cilk/cilk_stub.h
@@ -1,27 +1,32 @@
/* cilk_stub.h -*-C++-*-
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
*/
diff --git a/libcilkrts/include/cilk/cilk_undocumented.h b/libcilkrts/include/cilk/cilk_undocumented.h
index 7eca7ee08f0..9163492c821 100644
--- a/libcilkrts/include/cilk/cilk_undocumented.h
+++ b/libcilkrts/include/cilk/cilk_undocumented.h
@@ -1,26 +1,31 @@
/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
******************************************************************************
*
@@ -95,15 +100,17 @@ CILK_EXPORT __CILKRTS_NOTHROW
int __cilkrts_watch_stack(struct __cilk_tbb_unwatch_thunk *u,
struct __cilk_tbb_stack_op_thunk o);
+#ifndef IN_CILK_RUNTIME
#ifdef _WIN32
/* Do not use CILK_API because __cilkrts_worker_stub must be __stdcall */
CILK_EXPORT unsigned __CILKRTS_NOTHROW __stdcall
__cilkrts_worker_stub(void *arg);
#else
-/* Do not use CILK_API because __cilkrts_worker_stub have defauld visibility */
-__attribute__((visibility("default")))
-void* __CILKRTS_NOTHROW __cilkrts_worker_stub(void *arg);
-#endif
+/* Do not use CILK_API because __cilkrts_worker_stub have default visibility */
+CILK_EXPORT void* __CILKRTS_NOTHROW
+__cilkrts_worker_stub(void *arg);
+#endif /* _WIN32 */
+#endif /* IN_CILK_RUNTIME */
__CILKRTS_END_EXTERN_C
diff --git a/libcilkrts/include/cilk/common.h b/libcilkrts/include/cilk/common.h
index bc28100bfcb..aeff8f358e6 100644
--- a/libcilkrts/include/cilk/common.h
+++ b/libcilkrts/include/cilk/common.h
@@ -1,39 +1,74 @@
-/*
- * Copyright (C) 2010-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+/** common.h
*
+ * @copyright
+ * Copyright (C) 2010-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/** @file common.h
+ *
+ * @brief Defines common macros and structures used by the Intel Cilk Plus
+ * runtime.
+ *
+ * @ingroup common
*/
+/** @defgroup common Common Definitions
+ * Macro, structure, and class definitions used elsewhere in the runtime.
+ * @{
+ */
+
#ifndef INCLUDED_CILK_COMMON
#define INCLUDED_CILK_COMMON
#ifdef __cplusplus
+/** Namespace for all Cilk definitions that can be included in user code.
+ */
+namespace cilk {
+
+ /** Namespace for definitions that are primarily intended for use
+ * in other Cilk definitions.
+ */
+ namespace internal {}
+}
+#endif
+
+/** Cilk library version = 1.0
+ */
+#define CILK_LIBRARY_VERSION 100
+
+#ifdef __cplusplus
# include <cassert>
#else
# include <assert.h>
#endif
-/* Prefix standard library function and type names with __STDNS in order to
+/**
+ * Prefix standard library function and type names with __STDNS in order to
* get correct lookup in both C and C++.
*/
#ifdef __cplusplus
@@ -42,8 +77,12 @@
# define __STDNS
#endif
-/* CILK_EXPORT - Define export of runtime functions from shared library.
+/**
+ * @def CILK_EXPORT
+ * Define export of runtime functions from shared library.
* Should be exported only from cilkrts*.dll/cilkrts*.so
+ * @def CILK_EXPORT_DATA
+ * Define export of runtime data from shared library.
*/
#ifdef _WIN32
# ifdef IN_CILK_RUNTIME
@@ -53,7 +92,7 @@
# define CILK_EXPORT __declspec(dllimport)
# define CILK_EXPORT_DATA __declspec(dllimport)
# endif /* IN_CILK_RUNTIME */
-#elif defined(__CYGWIN__)
+#elif defined(__CYGWIN__) || defined(__APPLE__) || defined(_DARWIN_C_SOURCE)
# define CILK_EXPORT /* nothing */
# define CILK_EXPORT_DATA /* nothing */
#else /* Unix/gcc */
@@ -66,14 +105,39 @@
# endif /* IN_CILK_RUNTIME */
#endif /* Unix/gcc */
+/**
+ * @def __CILKRTS_BEGIN_EXTERN_C
+ * Macro to denote the start of a section in which all names have "C" linkage.
+ * That is, none of the names are to be mangled.
+ * @see __CILKRTS_END_EXTERN_C
+ * @see __CILKRTS_EXTERN_C
+ *
+ * @def __CILKRTS_END_EXTERN_C
+ * Macro to denote the end of a section in which all names have "C" linkage.
+ * That is, none of the names are to be mangled.
+ * @see __CILKRTS_BEGIN_EXTERN_C
+ * @see __CILKRTS_EXTERN_C
+ *
+ * @def __CILKRTS_EXTERN_C
+ * Macro to prefix a single definition which has "C" linkage.
+ * That is, the defined name is not to be mangled.
+ * @see __CILKRTS_BEGIN_EXTERN_C
+ * @see __CILKRTS_END_EXTERN_C
+ */
#ifdef __cplusplus
-# define __CILKRTS_BEGIN_EXTERN_C extern "C" {
-# define __CILKRTS_END_EXTERN_C }
+# define __CILKRTS_BEGIN_EXTERN_C extern "C" {
+# define __CILKRTS_END_EXTERN_C }
+# define __CILKRTS_EXTERN_C extern "C"
#else
# define __CILKRTS_BEGIN_EXTERN_C
# define __CILKRTS_END_EXTERN_C
+# define __CILKRTS_EXTERN_C
#endif
+/**
+ * OS-independent macro to specify a function which is known to not throw
+ * an exception.
+ */
#ifdef __cplusplus
# ifdef _WIN32
# define __CILKRTS_NOTHROW __declspec(nothrow)
@@ -84,15 +148,31 @@
# define __CILKRTS_NOTHROW /* nothing */
#endif /* __cplusplus */
+/** Cache alignment. (Good enough for most architectures.)
+ */
+#define __CILKRTS_CACHE_LINE__ 64
+
+/**
+ * Macro to specify alignment of a data member in a structure.
+ */
#ifdef _WIN32
# define CILK_ALIGNAS(n) __declspec(align(n))
#else /* Unix/gcc */
-# define CILK_ALIGNAS(n) __attribute__((aligned(n)))
+# define CILK_ALIGNAS(n) __attribute__((__aligned__(n)))
#endif /* Unix/gcc */
-/* CILK_API: Called explicitly by the programmer.
- * CILK_ABI: Called by compiler-generated code.
- * CILK_ABI_THROWS: An ABI function that may throw an exception
+/**
+ * Macro to specify cache-line alignment of a data member in a structure.
+ */
+#define __CILKRTS_CACHE_ALIGN CILK_ALIGNAS(__CILKRTS_CACHE_LINE__)
+
+/**
+ * @def CILK_API(RET_TYPE)
+ * A function called explicitly by the programmer.
+ * @def CILK_ABI(RET_TYPE)
+ * A function called by compiler-generated code.
+ * @def CILK_ABI_THROWS(RET_TYPE)
+ * An ABI function that may throw an exception
*
* Even when these are the same definitions, they should be separate macros so
* that they can be easily found in the code.
@@ -108,7 +188,8 @@
# define CILK_ABI_THROWS(RET_TYPE) CILK_EXPORT RET_TYPE
#endif
-/* __CILKRTS_ASSERT should be defined for debugging only, otherwise it
+/**
+ * __CILKRTS_ASSERT should be defined for debugging only, otherwise it
* interferes with vectorization. Since NDEBUG is not reliable (it must be
* set by the user), we must use a platform-specific detection of debug mode.
*/
@@ -126,7 +207,9 @@
# define __CILKRTS_ASSERT(e) ((void) 0)
#endif
-// Inlining is always available, but not always the same way.
+/**
+ * OS-independent macro to specify a function that should be inlined
+ */
#ifdef __cpluspus
// C++
# define __CILKRTS_INLINE inline
@@ -137,22 +220,26 @@
// C89 on Windows
# define __CILKRTS_INLINE __inline
#else
- // C89 on Linux
-# define __CILKRTS_INLINE __inline__
+ // C89 on GCC-compatible systems
+# define __CILKRTS_INLINE extern __inline__
#endif
-// Functions marked as CILK_EXPORT_AND_INLINE have both
-// inline versions defined in the Cilk API, as well as
-// non-inlined versions that are exported (for
-// compatibility with previous versions that did not
-// inline the functions).
+/**
+ * Functions marked as CILK_EXPORT_AND_INLINE have both
+ * inline versions defined in the Cilk API, as well as
+ * non-inlined versions that are exported (for
+ * compatibility with previous versions that did not
+ * inline the functions).
+ */
#ifdef COMPILING_CILK_API_FUNCTIONS
# define CILK_EXPORT_AND_INLINE CILK_EXPORT
#else
# define CILK_EXPORT_AND_INLINE __CILKRTS_INLINE
#endif
-// Try to determine if compiler supports rvalue references.
+/**
+ * Try to determine if compiler supports rvalue references.
+ */
#if defined(__cplusplus) && !defined(__CILKRTS_RVALUE_REFERENCES)
# if __cplusplus >= 201103L // C++11
# define __CILKRTS_RVALUE_REFERENCES 1
@@ -223,17 +310,35 @@
// the internal version of API methods require a worker
// structure as parameter.
__CILKRTS_BEGIN_EXTERN_C
+ /// Worker struct, exported for inlined API methods
+ /// @ingroup api
struct __cilkrts_worker;
- typedef struct __cilkrts_worker __cilkrts_worker;
- typedef struct __cilkrts_worker *__cilkrts_worker_ptr;
+
+ /// Worker struct, exported for inlined API methods
+ /// @ingroup api
+ typedef struct __cilkrts_worker __cilkrts_worker;
+
+ /// Worker struct pointer, exported for inlined API methods
+ /// @ingroup api
+ typedef struct __cilkrts_worker *__cilkrts_worker_ptr;
+
+
+ /// Fetch the worker out of TLS.
CILK_ABI(__cilkrts_worker_ptr) __cilkrts_get_tls_worker(void);
+
+ /// void *, defined to work around complaints from the compiler
+ /// about using __declspec(nothrow) after the "void *" return type
+ typedef void * __cilkrts_void_ptr;
+
__CILKRTS_END_EXTERN_C
#if __CILKRTS_ABI_VERSION >= 1
// Pedigree API is available only for compilers that use ABI version >= 1.
-/** Pedigree information kept in the worker and stack frame */
+/** Pedigree information kept in the worker and stack frame.
+ * @ingroup api
+ */
typedef struct __cilkrts_pedigree
{
/** Rank at start of spawn helper. Saved rank for spawning functions */
@@ -245,4 +350,6 @@ typedef struct __cilkrts_pedigree
#endif // __CILKRTS_ABI_VERSION >= 1
+/// @}
+
#endif /* INCLUDED_CILK_COMMON */
diff --git a/libcilkrts/include/cilk/holder.h b/libcilkrts/include/cilk/holder.h
index 87778ed84c1..ac80f42d1d2 100644
--- a/libcilkrts/include/cilk/holder.h
+++ b/libcilkrts/include/cilk/holder.h
@@ -1,26 +1,31 @@
/*
- * Copyright (C) 2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
*/
diff --git a/libcilkrts/include/cilk/hyperobject_base.h b/libcilkrts/include/cilk/hyperobject_base.h
index 73279065f99..5d0393e1b04 100644
--- a/libcilkrts/include/cilk/hyperobject_base.h
+++ b/libcilkrts/include/cilk/hyperobject_base.h
@@ -1,26 +1,31 @@
/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
*/
@@ -45,19 +50,6 @@
# endif
#endif
-/* Macro to cache-align a declaration. Argument(s) comprise either a
- * variable or a struct declaration. */
-#define __CILKRTS_CACHE_LINE__ 64 /* Good enough for most architectures */
-#if defined(__INTEL_COMPILER) || defined(_WIN32)
-# define __CILKRTS_CACHE_ALIGNED(...) \
- __declspec(align(__CILKRTS_CACHE_LINE__)) __VA_ARGS__
-#elif defined(__GNUC__)
-# define __CILKRTS_CACHE_ALIGNED(...) \
- __VA_ARGS__ __attribute__((__aligned__(__CILKRTS_CACHE_LINE__)))
-#else
-# define __CILKRTS_CACHE_ALIGNED(...) __VA_ARGS__
-#endif
-
/* The __CILKRTS_STRAND_PURE attribute tells the compiler that the value
* returned by 'func' for a given argument to 'func' will remain valid until
* the next strand boundary (spawn or sync) or until the next call to a
@@ -106,6 +98,9 @@ typedef struct __cilkrts_hyperobject_base
__STDNS size_t __view_size; /* Size of each view */
} __cilkrts_hyperobject_base;
+
+#ifndef CILK_STUB
+
/* Library functions. */
CILK_EXPORT
void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key);
@@ -123,6 +118,50 @@ CILK_EXPORT
CILK_EXPORT
void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2);
+
+#else // CILK_STUB
+
+// Programs compiled with CILK_STUB are not linked with the Cilk runtime
+// library, so they should not have external references to cilkrts functions.
+// Furthermore, they don't need the hyperobject functionality, so the
+// functions can be stubbed.
+
+#define __cilkrts_hyperobject_create __cilkrts_hyperobject_create__stub
+__CILKRTS_INLINE
+ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key)
+ {}
+
+#define __cilkrts_hyperobject_destroy __cilkrts_hyperobject_destroy__stub
+__CILKRTS_INLINE
+ void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key)
+ {}
+
+#define __cilkrts_hyperobject_lookup __cilkrts_hyperobject_lookup__stub
+__CILKRTS_INLINE
+ void* __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key)
+ { return (char*)(key) + key->__view_offset; }
+
+// Pointers to these functions are stored into monoids, so real functions
+// are needed.
+
+#define __cilkrts_hyperobject_alloc __cilkrts_hyperobject_alloc__stub
+__CILKRTS_INLINE
+ void* __cilkrts_hyperobject_alloc(void* ignore, __STDNS size_t bytes)
+ { assert(0); return __STDNS malloc(bytes); }
+
+#define __cilkrts_hyperobject_dealloc __cilkrts_hyperobject_dealloc__stub
+__CILKRTS_INLINE
+ void __cilkrts_hyperobject_dealloc(void* ignore, void* view)
+ { assert(0); __STDNS free(view); }
+
+#define __cilkrts_hyperobject_noop_destroy \
+ __cilkrts_hyperobject_noop_destroy__stub
+__CILKRTS_INLINE
+ void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2)
+ {}
+
+#endif
+
__CILKRTS_END_EXTERN_C
#endif /* INCLUDED_CILK_HYPEROBJECT_BASE */
diff --git a/libcilkrts/include/cilk/metaprogramming.h b/libcilkrts/include/cilk/metaprogramming.h
new file mode 100644
index 00000000000..6ef8c063688
--- /dev/null
+++ b/libcilkrts/include/cilk/metaprogramming.h
@@ -0,0 +1,523 @@
+/* metaprogramming.h -*- C++ -*-
+ *
+ * @copyright
+ * Copyright (C) 2012-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/** @file metaprogramming.h
+ *
+ * @brief Defines metaprogramming utility classes used in the Cilk library.
+ *
+ * @ingroup common
+ */
+
+#ifndef METAPROGRAMMING_H_INCLUDED
+#define METAPROGRAMMING_H_INCLUDED
+
+#ifdef __cplusplus
+
+#include <functional>
+#include <new>
+#include <cstdlib>
+#ifdef _WIN32
+#include <malloc.h>
+#endif
+#include <algorithm>
+
+namespace cilk {
+
+namespace internal {
+
+/** Test if a class is empty.
+ *
+ * If @a Class is an empty (and therefore necessarily stateless) class, then
+ * the “empty base-class optimization” guarantees that
+ * `sizeof(check_for_empty_class<Class>) == sizeof(char)`. Conversely, if
+ * `sizeof(check_for_empty_class<Class>) > sizeof(char)`, then @a Class is not
+ * empty, and we must discriminate distinct instances of @a Class.
+ *
+ * Typical usage:
+ *
+ * // General definition of A<B> for non-empty B:
+ * template <typename B, bool BIsEmpty = class_is_empty<B>::value> >
+ * class A { ... };
+ *
+ * // Specialized definition of A<B> for empty B:
+ * template <typename B>
+ * class A<B, true> { ... };
+ *
+ * @tparam Class The class to be tested for emptiness.
+ *
+ * @result The `value` member will be `true` if @a Class is empty,
+ * `false` otherwise.
+ *
+ * @ingroup common
+ */
+template <class Class>
+class class_is_empty {
+ class check_for_empty_class : public Class
+ {
+ char m_data;
+ public:
+ // Declared but not defined
+ check_for_empty_class();
+ check_for_empty_class(const check_for_empty_class&);
+ check_for_empty_class& operator=(const check_for_empty_class&);
+ ~check_for_empty_class();
+ };
+public:
+
+ /** Constant is true if and only if @a Class is empty.
+ */
+ static const bool value = (sizeof(check_for_empty_class) == sizeof(char));
+};
+
+
+/** Compute the alignment of a type. (More precisely, the alignment of a data
+ * member of the type in a structure.)
+ *
+ * For example:
+ *
+ * align_of<double>::value == 8
+ *
+ * Adapted from the [AlignOf](http://llvm.org/doxygen/AlignOf_8h_source.html)
+ * class used in [LLVM](http://llvm.org).
+ *
+ * @tparam T The type whose alignment is to be computed.
+ *
+ * @result `value` will be the alignment for type @a T.
+ *
+ * @see alignof()
+ *
+ * @ingroup common
+ */
+template <typename T>
+class align_of {
+
+ struct impl {
+ char x;
+ T t;
+ impl(); // Never instantiate.
+ impl(const impl&);
+ };
+
+public:
+ enum {
+ /** The alignment of the type @a T.
+ */
+ value = static_cast<std::size_t>(sizeof(impl) - sizeof(T))
+ };
+};
+
+
+/** Get the functor class corresponding to a binary function type.
+ *
+ * The `binary_functor` template class class can be instantiated with a binary
+ * functor class or with a real binary function, and will yield an equivalent
+ * binary functor class class in either case.
+ *
+ * @tparam F A binary functor class, a binary function type, or a pointer to
+ * binary function type.
+ *
+ * @result `binary_functor<F>::%type` will be the same as @a F if @a F is
+ * a class. It will be a `std::pointer_to_binary_function` wrapper
+ * if @a F is a binary function or binary function pointer type.
+ * (It will _not_ necessarily be an `Adaptable Binary Function`
+ * class, since @a F might be a non-adaptable binary functor
+ * class.)
+ *
+ * @ingroup common
+ */
+template <typename F>
+struct binary_functor {
+ /// The binary functor class equivalent to @a F.
+ typedef F type;
+};
+
+/// @copydoc binary_functor
+/// Specialization for binary function.
+template <typename R, typename A, typename B>
+struct binary_functor<R(A,B)> {
+ /// The binary functor class equivalent to @a F.
+ typedef std::pointer_to_binary_function<A, B, R> type;
+};
+
+/// @copydoc binary_functor
+/// Specialization for pointer to binary function.
+template <typename R, typename A, typename B>
+struct binary_functor<R(*)(A,B)> {
+ /// The binary functor class equivalent to @a F.
+ typedef std::pointer_to_binary_function<A, B, R> type;
+};
+
+
+/** Indirect binary function class with specified types.
+ *
+ * `typed_indirect_binary_function<F>` is an `Adaptable Binary Function` class
+ * based on an existing binary functor class or binary function type @a F. If
+ * @a F is a stateless class, then this class will be empty, and its
+ * `operator()` will invoke @a F’s `operator()`. Otherwise, an object of this
+ * class will hold a pointer to an object of type @a F, and will refer its
+ * `operator()` calls to the pointed-to @a F object.
+ *
+ * That is, suppose that we have the declarations:
+ *
+ * F *p;
+ * typed_indirect_binary_function<F, int, int, bool> ibf(p);
+ *
+ * Then:
+ *
+ * - `ibf(x, y) == (*p)(x, y)`.
+ * - `ibf(x, y)` will not do a pointer dereference if `F` is an empty class.
+ *
+ * @note Just to repeat: if `F` is an empty class, then
+ * `typed_indirect_binary_function\<F\>' is also an empty class.
+ * This is critical for its use in the @ref min_max::view_base
+ * "min/max reducer view classes", where it allows the view to
+ * call a comparison functor in the monoid without actually
+ * having to allocate a pointer in the view class when the
+ * comparison class is empty.
+ *
+ * @note If you have an `Adaptable Binary Function` class or a binary
+ * function type, then you can use the
+ * @ref indirect_binary_function class, which derives the
+ * argument and result types parameter type instead of requiring
+ * you to specify them as template arguments.
+ *
+ * @tparam F A binary functor class, a binary function type, or a pointer to
+ * binary function type.
+ * @param A1 The first argument type.
+ * @param A2 The second argument type.
+ * @param R The result type.
+ *
+ * @see min_max::comparator_base
+ * @see indirect_binary_function
+ *
+ * @ingroup common
+ */
+template < typename F
+ , typename A1
+ , typename A2
+ , typename R
+ , typename Functor = typename binary_functor<F>::type
+ , bool FunctorIsEmpty = class_is_empty<Functor>::value
+ >
+class typed_indirect_binary_function : std::binary_function<A1, A2, R>
+{
+ const F* f;
+public:
+ /// Constructor captures a pointer to the wrapped function.
+ typed_indirect_binary_function(const F* f) : f(f) {}
+
+ /// Return the comparator pointer, or `NULL` if the comparator is stateless.
+ const F* pointer() const { return f; }
+
+ /// Apply the pointed-to functor to the arguments.
+ R operator()(const A1& a1, const A2& a2) const { return (*f)(a1, a2); }
+};
+
+
+/// @copydoc typed_indirect_binary_function
+/// Specialization for an empty functor class. (This is only possible if @a F
+/// itself is an empty class. If @a F is a function or pointer-to-function
+/// type, then the functor will contain a pointer.)
+template <typename F, typename A1, typename A2, typename R, typename Functor>
+class typed_indirect_binary_function<F, A1, A2, R, Functor, true> :
+ std::binary_function<A1, A2, R>
+{
+public:
+ /// Return `NULL` for the comparator pointer of a stateless comparator.
+ const F* pointer() const { return 0; }
+
+ /// Constructor discards the pointer to a stateless functor class.
+ typed_indirect_binary_function(const F* f) {}
+
+ /// Create an instance of the stateless functor class and apply it to the arguments.
+ R operator()(const A1& a1, const A2& a2) const { return F()(a1, a2); }
+};
+
+
+/** Indirect binary function class with inferred types.
+ *
+ * This is identical to @ref typed_indirect_binary_function, except that it
+ * derives the binary function argument and result types from the parameter
+ * type @a F instead of taking them as additional template parameters. If @a F
+ * is a class type, then it must be an `Adaptable Binary Function`.
+ *
+ * @see typed_indirect_binary_function
+ *
+ * @ingroup common
+ */
+template <typename F, typename Functor = typename binary_functor<F>::type>
+class indirect_binary_function :
+ typed_indirect_binary_function< F
+ , typename Functor::first_argument_type
+ , typename Functor::second_argument_type
+ , typename Functor::result_type
+ >
+{
+ typedef typed_indirect_binary_function< F
+ , typename Functor::first_argument_type
+ , typename Functor::second_argument_type
+ , typename Functor::result_type
+ >
+ base;
+public:
+ indirect_binary_function(const F* f) : base(f) {} ///< Constructor
+};
+
+
+/** Choose a type based on a boolean constant.
+ *
+ * This metafunction is identical to C++11’s condition metafunction.
+ * It needs to be here until we can reasonably assume that users will be
+ * compiling with C++11.
+ *
+ * @tparam Cond A boolean constant.
+ * @tparam IfTrue A type.
+ * @tparam IfFalse A type.
+ * @result The `type` member will be a typedef of @a IfTrue if @a Cond
+ * is true, and a typedef of @a IfFalse if @a Cond is false.
+ *
+ * @ingroup common
+ */
+template <bool Cond, typename IfTrue, typename IfFalse>
+struct condition
+{
+ typedef IfTrue type; ///< The type selected by the condition.
+};
+
+/// @copydoc condition
+/// Specialization for @a Cond == `false`.
+template <typename IfTrue, typename IfFalse>
+struct condition<false, IfTrue, IfFalse>
+{
+ typedef IfFalse type; ///< The type selected by the condition.
+};
+
+
+/** @def __CILKRTS_STATIC_ASSERT
+ *
+ * @brief Compile-time assertion.
+ *
+ * Causes a compilation error if a compile-time constant expression is false.
+ *
+ * @par Usage example.
+ * This assertion is used in reducer_min_max.h to avoid defining
+ * legacy reducer classes that would not be binary-compatible with the
+ * same classes compiled with earlier versions of the reducer library.
+ *
+ * __CILKRTS_STATIC_ASSERT(
+ * internal::class_is_empty< internal::binary_functor<Compare> >::value,
+ * "cilk::reducer_max<Value, Compare> only works with an empty Compare class");
+ *
+ * @note In a C++11 compiler, this is just the language predefined
+ * `static_assert` macro.
+ *
+ * @note In a non-C++11 compiler, the @a Msg string is not directly included
+ * in the compiler error message, but it may appear if the compiler
+ * prints the source line that the error occurred on.
+ *
+ * @param Cond The expression to test.
+ * @param Msg A string explaining the failure.
+ *
+ * @ingroup common
+ */
+#if defined(__INTEL_CXX11_MODE__) || defined(__GXX_EXPERIMENTAL_CXX0X__)
+# define __CILKRTS_STATIC_ASSERT(Cond, Msg) static_assert(Cond, Msg)
+#else
+# define __CILKRTS_STATIC_ASSERT(Cond, Msg) \
+ typedef int __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \
+ [::cilk::internal::static_assert_failure<(Cond)>::Success]
+
+/// @cond internal
+ template <bool> struct static_assert_failure { };
+ template <> struct static_assert_failure<true> { enum { Success = 1 }; };
+
+# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \
+ __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(__cilkrts_static_assert_, __LINE__)
+# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(a, b) \
+ __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b)
+# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b) a ## b
+/// @endcond
+
+#endif
+
+/// @cond internal
+
+/** @name Aligned heap management.
+ */
+//@{
+
+/** Implementation-specific aligned memory allocation function.
+ *
+ * @param size The minimum number of bytes to allocate.
+ * @param alignment The required alignment (must be a power of 2).
+ * @return The address of a block of memory of at least @a size
+ * bytes. The address will be a multiple of @a alignment.
+ * `NULL` if the allocation fails.
+ *
+ * @see deallocate_aligned()
+ */
+inline void* allocate_aligned(std::size_t size, std::size_t alignment)
+{
+#ifdef _WIN32
+ return _aligned_malloc(size, alignment);
+#else
+ void* ptr;
+ return (posix_memalign(&ptr, std::max(alignment, sizeof(void*)), size) == 0) ? ptr : 0;
+#endif
+}
+
+/** Implementation-specific aligned memory deallocation function.
+ *
+ * @param ptr A pointer which was returned by a call to alloc_aligned().
+ */
+inline void deallocate_aligned(void* ptr)
+{
+#ifdef _WIN32
+ _aligned_free(ptr);
+#else
+ std::free(ptr);
+#endif
+}
+
+/** Class to allocate and guard an aligned pointer.
+ *
+ * A new_aligned_pointer object allocates aligned heap-allocated memory when
+ * it is created, and automatically deallocates it when it is destroyed
+ * unless its `ok()` function is called.
+ *
+ * @tparam T The type of the object to allocate on the heap. The allocated
+ * will have the size and alignment of an object of type T.
+ */
+template <typename T>
+class new_aligned_pointer {
+ void* m_ptr;
+public:
+ /// Constructor allocates the pointer.
+ new_aligned_pointer() :
+ m_ptr(allocate_aligned(sizeof(T), internal::align_of<T>::value)) {}
+ /// Destructor deallocates the pointer.
+ ~new_aligned_pointer() { if (m_ptr) deallocate_aligned(m_ptr); }
+ /// Get the pointer.
+ operator void*() { return m_ptr; }
+ /// Return the pointer and release the guard.
+ T* ok() {
+ T* ptr = static_cast<T*>(m_ptr);
+ m_ptr = 0;
+ return ptr;
+ }
+};
+
+//@}
+
+/// @endcond
+
+} // namespace internal
+
+//@{
+
+/** Allocate an aligned data structure on the heap.
+ *
+ * `cilk::aligned_new<T>([args])` is equivalent to `new T([args])`, except
+ * that it guarantees that the returned pointer will be at least as aligned
+ * as the alignment requirements of type `T`.
+ *
+ * @ingroup common
+ */
+template <typename T>
+T* aligned_new()
+{
+ internal::new_aligned_pointer<T> ptr;
+ new (ptr) T();
+ return ptr.ok();
+}
+
+template <typename T, typename T1>
+T* aligned_new(const T1& x1)
+{
+ internal::new_aligned_pointer<T> ptr;
+ new (ptr) T(x1);
+ return ptr.ok();
+}
+
+template <typename T, typename T1, typename T2>
+T* aligned_new(const T1& x1, const T2& x2)
+{
+ internal::new_aligned_pointer<T> ptr;
+ new (ptr) T(x1, x2);
+ return ptr.ok();
+}
+
+template <typename T, typename T1, typename T2, typename T3>
+T* aligned_new(const T1& x1, const T2& x2, const T3& x3)
+{
+ internal::new_aligned_pointer<T> ptr;
+ new (ptr) T(x1, x2, x3);
+ return ptr.ok();
+}
+
+template <typename T, typename T1, typename T2, typename T3, typename T4>
+T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4)
+{
+ internal::new_aligned_pointer<T> ptr;
+ new (ptr) T(x1, x2, x3, x4);
+ return ptr.ok();
+}
+
+template <typename T, typename T1, typename T2, typename T3, typename T4, typename T5>
+T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5)
+{
+ internal::new_aligned_pointer<T> ptr;
+ new (ptr) T(x1, x2, x3, x4, x5);
+ return ptr.ok();
+}
+
+//@}
+
+
+/** Deallocate an aligned data structure on the heap.
+ *
+ * `cilk::aligned_delete(ptr)` is equivalent to `delete ptr`, except that it
+ * operates on a pointer that was allocated by aligned_new().
+ *
+ * @ingroup common
+ */
+template <typename T>
+void aligned_delete(const T* ptr)
+{
+ ptr->~T();
+ internal::deallocate_aligned((void*)ptr);
+}
+
+} // namespace cilk
+
+#endif // __cplusplus
+
+#endif // METAPROGRAMMING_H_INCLUDED
diff --git a/libcilkrts/include/cilk/reducer.h b/libcilkrts/include/cilk/reducer.h
index d44823ccea3..dcbc3cebf50 100644
--- a/libcilkrts/include/cilk/reducer.h
+++ b/libcilkrts/include/cilk/reducer.h
@@ -1,43 +1,51 @@
-/* reducer.h -*-C++-*-
- *
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+/* reducer.h -*- C++ -*-
*
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-
-#ifndef CILK_REDUCER_H_INCLUDED
-#define CILK_REDUCER_H_INCLUDED
+
+/** @file reducer.h
+ *
+ * @brief Defines foundation classes for creating Cilk reducers.
+ *
+ * @ingroup Reducers
+ *
+ * @see @ref pagereducers
+ *
+ * @defgroup Reducers Reducers
+ */
+
+#ifndef REDUCER_H_INCLUDED
+#define REDUCER_H_INCLUDED
#include "cilk/hyperobject_base.h"
+#include "cilk/metaprogramming.h"
-/*
- * C++ and C interfaces for Cilk reducer hyperobjects
- */
-
-/* Utility macros */
-#define __CILKRTS_MKIDENT(a,b) __CILKRTS_MKIDENT_IMP(a,b,)
-#define __CILKRTS_MKIDENT3(a,b,c) __CILKRTS_MKIDENT_IMP(a,b,c)
-#define __CILKRTS_MKIDENT_IMP(a,b,c) a ## b ## c
#ifdef __cplusplus
@@ -45,583 +53,1832 @@
#include <new>
-#ifdef CILK_STUB
-// Stub implementations are in the cilk::stub namespace
namespace cilk {
- namespace stub { }
- using namespace stub;
-}
-#endif
-// MONOID CONCEPT AND monoid_base CLASS TEMPLATE
-//
-// In mathematics, a "monoid" comprises a set of values (type), an associative
-// operation on that set, and an identity value for that set and that
-// operation. So for example (integer, +, 0) is a monoid, as is (real, *, 1).
-// The 'Monoid' concept in Cilk++ has a typedef and three functions that
-// represent a that map to a monoid, (T, OP, IDENTITY), as follows:
-//..
-// value_type is a typedef for T
-// reduce(left,right) evaluates '*left = *left OP *right'
-// identity(p) constructs IDENTITY value into the uninitilized '*p'
-// destroy(p) calls the destructor on the object pointed-to by 'p'
-// allocate(size) return a pointer to size bytes of raw memory
-// deallocate(p) deallocate the raw memory at p
-//..
-// 'left', 'right', and 'p' are all pointers to objects of type 'value_type'.
-// All functions must be either 'static' or 'const'. A class that meets the
-// requirements of the 'Monoid' concept is usually stateless, but will
-// sometimes contain state used to initialize the identity object.
-
-namespace cilk {
-
-/// The 'monoid_base' class template is a useful base class for a large set
-/// of monoid classes for which the identity value is a default-constructed
-/// value of type 'T', allocated using operator new. A derived class of
-/// 'monoid_base' need only declare and implement the 'reduce' function.
-template <class T>
+/** Base class for defining monoids.
+ *
+ * The monoid_base class template is useful for creating classes that model
+ * the monoid concept. It provides the core type and memory management
+ * functionality. A subclass of monoid_base need only declare and implement
+ * the `identity` and `reduce` functions.
+ *
+ * The monoid_base class also manages the integration between the monoid, the
+ * reducer class that is based on it, and an optional view class which wraps
+ * value objects and restricts access to their operations.
+ *
+ * @tparam Value The value type for the monoid.
+ * @tparam View An optional view class that serves as a proxy for the value
+ * type.
+ *
+ * @see monoid_with_view
+ */
+template <typename Value, typename View = Value>
class monoid_base
{
-public:
- /// Type of value for this monoid
- typedef T value_type;
+protected:
+
+ /** Class for provisionally constructed objects.
+ *
+ * The monoid_base::construct() functions manually construct both a monoid
+ * and a view. If one of these is constructed successfully, and the
+ * construction of the other (or some other initialization) fails, then
+ * the first one must be destroyed to avoid a memory leak. Because the
+ * construction is explicit, the destruction must be explicit, too.
+ *
+ * A provisional_guard object wraps a pointer to a newly constructed
+ * object. A call to its confirm() function confirms that the object is
+ * really going to be used. If the guard is destroyed without being
+ * confirmed, then the pointed-to object is destroyed (but not
+ * deallocated).
+ *
+ * Expected usage:
+ *
+ * provisional_guard<T1> x1_provisional( new (x1) T1() );
+ * … more initialization …
+ * x1_provisional.confirm();
+ *
+ * or
+ *
+ * provisional_guard<T1> x1_provisional( new (x1) T1() );
+ * x1_provisional.confirm_if( new (x2) T2() );
+ *
+ * If an exception is thrown in the “more initialization” code in the
+ * first example, or in the `T2` constructor in the second example, then
+ * `x1_provisional` will not be confirmed, so when its destructor is
+ * called during exception unwinding, the `T1` object that was constructed
+ * in `x1` will be destroyed.
+ *
+ * @see provisional()
+ *
+ * @tparam Type The type of the provisionally constructed object.
+ */
+ template <typename Type>
+ class provisional_guard {
+ Type* m_ptr;
+
+ public:
+
+ /** Constructor. Creates a guard for a provisionally constructed object.
+ *
+ * @param ptr A pointer to the provisionally constructed object.
+ */
+ provisional_guard(Type* ptr) : m_ptr(ptr) {}
+
+ /** Destructor. Destroy the object pointed to by the contained pointer
+ * if it has not been confirmed.
+ */
+ ~provisional_guard() { if (m_ptr) m_ptr->~Type(); }
+
+ /** Confirm the provisional construction. Do *not* delete the contained
+ * pointer when the guard is destroyed.
+ */
+ void confirm() { m_ptr = 0; }
+
+ /** Confirm provisional construction if argument is non-null. Note that
+ * if an exception is thrown during evaluation of the argument
+ * expression, then this function will not be called, and the
+ * provisional object will not be confirmed. This allows the usage:
+ *
+ * x1_provisional.confirm_if( new (x2) T2() );
+ *
+ * @param cond An arbitrary pointer. The provisional object will be
+ * confirmed if @a cond is not null.
+ *
+ * @returns The value of the @a cond argument.
+ */
+ template <typename Cond>
+ Cond* confirm_if(Cond* cond) { if (cond) m_ptr = 0; return cond; }
+ };
- /// Constructs IDENTITY value into the uninitilized '*p'
- void identity(T* p) const { new ((void*) p) T(); }
+
+ /** Create a provisional_guard object. This function allows simpler code
+ * when the only use of a provisional_guard is in a
+ * provisional_guard::confirm_if() call immediately following its
+ * creation. Instead of
+ *
+ * provisional_guard<T>guard( new (ptr_to_T) T() );
+ * guard.confirm_if( new (ptr_to_U) U() );
+ *
+ * you can just write
+ *
+ * provisional( new (ptr_to_T) T() ).confirm_if( new (ptr_to_U) U() );
+ *
+ * @tparam Type The type of the provisionally constructed object.
+ *
+ * @param ptr A pointer to a provisionally constructed object.
+ *
+ * @returns A @ref provisional_guard object that guards the
+ * provisionally constructed object pointed to by @a ptr.
+ */
+ template <typename Type>
+ static provisional_guard<Type> provisional(Type* ptr)
+ { return provisional_guard<Type>(ptr); }
- /// Calls the destructor on the object pointed-to by 'p'
- void destroy(T* p) const { p->~T(); }
+public:
- /// Return a pointer to size bytes of raw memory
+ /** Value type of the monoid.
+ */
+ typedef Value value_type;
+
+ /** View type of the monoid. Defaults to be the same as the value type.
+ * @see monoid_with_view
+ */
+ typedef View view_type;
+
+ /** Should reducers created with this monoid be aligned?
+ * Default is true.
+ */
+ enum { align_reducer = true };
+
+ /** Destroy a view. Destroys (without deallocating) the @a View object
+ * pointed to by @a p.
+ *
+ * @param p The address of the @a View object to be destroyed.
+ */
+ void destroy(view_type* p) const { p->~view_type(); }
+
+ /** Allocate raw memory. Allocate @a s bytes of memory with no
+ * initialization.
+ *
+ * @param s The number of bytes of memory to allocate.
+ * @return An untyped pointer to the allocated memory.
+ */
void* allocate(size_t s) const { return operator new(s); }
- /// Deallocate the raw memory at p
+ /** Deallocate raw memory. Deallocates the memory pointed to by @a p
+ * without doing any destruction.
+ *
+ * @param p Pointer to the memory to be deallocated.
+ *
+ * @pre @a p points to a block of memory that was allocated by a
+ * call to allocate().
+ */
void deallocate(void* p) const { operator delete(p); }
+
+ /** Create the identity value. Constructs (without allocating) a @a View
+ * object representing the default value of the @a Value type.
+ *
+ * @param p A pointer to a block of raw memory large enough to hold a
+ * @a View object.
+ *
+ * @post The memory pointed to by @a p contains a @a View object that
+ * represents the default value of the @a View type.
+ *
+ * @deprecated This function constructs the @a View object with its default
+ * constructor, which will often, but not always, yield the
+ * appropriate identity value. Monoid classes should declare
+ * their identity function explicitly, rather than relying on
+ * this default definition.
+ */
+ void identity(View* p) const { new ((void*) p) View(); }
+
+
+ /** @name Construct the monoid and the view with arbitrary arguments.
+ *
+ * A @ref reducer object contains monoid and view data members, which are
+ * declared as raw storage (byte arrays), so that they are not implicitly
+ * constructed when the reducer is constructed. Instead, a reducer
+ * constructor calls one of the monoid class’s static construct()
+ * functions with the addresses of the monoid and the view, and the
+ * construct() function uses placement `new` to construct them.
+ *
+ * This allows the monoid to determine the order in which the monoid and
+ * view are constructed, and to make one of them dependent on the other.
+ *
+ * Any arguments to the reducer constructor are just passed on as
+ * additional arguments to the construct() function (after the monoid
+ * and view addresses).
+ *
+ * Any monoid whose needs are satisfied by the suite of construct()
+ * functions below, such as @ref monoid_with_view, can just inherit them
+ * from monoid_base. Other monoids will need to provide their own versions
+ * to override the monoid_base functions.
+ */
+ //@{
+
+ /** Default-construct the monoid, and pass zero to five const reference
+ * arguments to the view constructor.
+ */
+ //@{
+
+ template <typename Monoid>
+ static void construct(Monoid* monoid, View* view)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ (monoid->identity(view), view) ); }
+
+ template <typename Monoid, typename T1>
+ static void construct(Monoid* monoid, View* view, const T1& x1)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1) ); }
+
+ template <typename Monoid, typename T1, typename T2>
+ static void construct(Monoid* monoid, View* view,
+ const T1& x1, const T2& x2)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1, x2) ); }
+
+ template <typename Monoid, typename T1, typename T2, typename T3>
+ static void construct(Monoid* monoid, View* view,
+ const T1& x1, const T2& x2, const T3& x3)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1, x2, x3) ); }
+
+ template <typename Monoid, typename T1, typename T2, typename T3,
+ typename T4>
+ static void construct(Monoid* monoid, View* view,
+ const T1& x1, const T2& x2, const T3& x3,
+ const T4& x4)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1, x2, x3, x4) ); }
+
+ template <typename Monoid, typename T1, typename T2, typename T3,
+ typename T4, typename T5>
+ static void construct(Monoid* monoid, View* view,
+ const T1& x1, const T2& x2, const T3& x3,
+ const T4& x4, const T5& x5)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1, x2, x3, x4, x5) ); }
+
+ //@}
+
+ /** Default-construct the monoid, and pass one non-const reference argument
+ * to the view constructor.
+ */
+ //@{
+ template <typename Monoid, typename T1>
+ static void construct(Monoid* monoid, View* view, T1& x1)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1) ); }
+ //@}
+
+ /** Copy-construct the monoid, and pass zero to four const reference
+ * arguments to the view constructor.
+ */
+ //@{
+
+ template <typename Monoid>
+ static void construct(Monoid* monoid, View* view, const Monoid& m)
+ { provisional( new ((void*)monoid) Monoid(m) ).confirm_if(
+ new ((void*)view) View() ); }
+
+ template <typename Monoid, typename T1>
+ static void construct(Monoid* monoid, View* view, const Monoid& m,
+ const T1& x1)
+ { provisional( new ((void*)monoid) Monoid(m) ).confirm_if(
+ new ((void*)view) View(x1) ); }
+
+ template <typename Monoid, typename T1, typename T2>
+ static void construct(Monoid* monoid, View* view, const Monoid& m,
+ const T1& x1, const T2& x2)
+ { provisional( new ((void*)monoid) Monoid(m) ).confirm_if(
+ new ((void*)view) View(x1, x2) ); }
+
+ template <typename Monoid, typename T1, typename T2, typename T3>
+ static void construct(Monoid* monoid, View* view, const Monoid& m,
+ const T1& x1, const T2& x2, const T3& x3)
+ {
+ provisional( new ((void*)monoid) Monoid(m) ).confirm_if(
+ new ((void*)view) View(x1, x2, x3) );
+ }
+
+ template <typename Monoid, typename T1, typename T2, typename T3,
+ typename T4>
+ static void construct(Monoid* monoid, View* view, const Monoid& m,
+ const T1& x1, const T2& x2, const T3& x3,
+ const T4& x4)
+ {
+ provisional( new ((void*)monoid) Monoid(m) ).confirm_if(
+ new ((void*)view) View(x1, x2, x3, x4) );
+ }
+
+ //@}
+
+ //@}
};
-} // end namspace cilk
-#ifndef CILK_STUB
+/** Monoid class that gets its value type and identity and reduce operations
+ * from its view.
+ *
+ * A simple implementation of the monoid-view-reducer architecture would
+ * distribute knowledge about the type and operations for the reduction
+ * between the monoid and the view — the identity and reduction operations are
+ * specified in the monoid, the reduction operations are implemented in the
+ * view, and the value type is specified in both the monoid and the view.
+ * This is inelegant.
+ *
+ * monoid_with_view is a subclass of @ref monoid_base that gets its value type
+ * and its identity and reduction operations from its view class. No
+ * customization of the monoid_with_view class itself is needed beyond
+ * instantiating it with an appropriate view class. (Customized subclasses of
+ * monoid_with_view may be needed for other reasons, such as to keep some
+ * state for the reducer.) All of the Cilk predefined reducers use
+ * monoid_with_view or one of its subclasses.
+ *
+ * The view class `View` of a monoid_with_view must provide the following public definitions:
+ *
+ * Definition | Meaning
+ * ---------------------------------|--------
+ * `value_type` | a typedef of the value type for the reduction
+ * `View()` | a default constructor which constructs the identity value for the reduction
+ * `void reduce(const View* other)` | a member function which applies the reduction operation to the values of `this` view and the `other` view, leaving the result as the value of `this` view, and leaving the value of the `other` view undefined (but valid)
+ *
+ * @tparam View The view class for the monoid.
+ * @tparam Align If true, reducers instantiated on this monoid will be
+ * cache-aligned. By default, library reducers (unlike legacy
+ * library reducer _wrappers_) are aligned only as required by
+ * contents.
+ */
+template <class View, bool Align = false>
+class monoid_with_view : public monoid_base<typename View::value_type, View>
+{
+public:
+ /** Should reducers created with this monoid be aligned?
+ */
+ enum { align_reducer = Align };
+
+ /** Create the identity value.
+ *
+ * Implements the monoid `identity` operation by using the @a View class’s
+ * default constructor.
+ *
+ * @param p A pointer to a block of raw memory large enough to hold a
+ * @p View object.
+ */
+ void identity(View* p) const { new ((void*)p) View(); }
+
+ /** Reduce the values of two views.
+ *
+ * Implements the monoid `reduce` operation by calling the left view’s
+ * `%reduce()` function with the right view as an operand.
+ *
+ * @param left The left operand of the reduce operation.
+ * @param right The right operand of the reduce operation.
+ * @post The left view contains the result of the reduce
+ * operation, and the right view is undefined.
+ */
+ void reduce(View* left, View* right) const { left->reduce(right); }
+};
-namespace cilk {
-/// reducer CLASS TEMPLATE
-///
-/// A reducer is instantiated on a Monoid. The Monoid provides the value
-/// type, associative reduce function, and identity for the reducer. Function
-/// view(), operator*(), and operator()() return the current view of the
-/// reducer, although operator()() is deprecated.
-template <class Monoid>
-class reducer
+/** Base class for simple views with (usually) scalar values.
+ *
+ * The scalar_view class is intended as a base class which provides about half
+ * of the required definitions for simple views. It defines the `value_type`
+ * required by a @ref monoid_with_view (but not the identity constructor and
+ * reduce operation, which are inherently specific to a particular kind of
+ * reduction). It also defines the value access functions which will be called
+ * by the corresponding @ref reducer functions. (It uses copy semantics for
+ * the view_move_in() and view_move_out() functions, which is appropriate
+ * for simple scalar types, but not necessarily for more complex types like
+ * STL containers.
+ *
+ * @tparam Type The type of value wrapped by the view.
+ */
+template <typename Type>
+class scalar_view
{
- typedef typename Monoid::value_type value_type;
-
- __cilkrts_hyperobject_base base_;
- const Monoid monoid_; // implementation of monoid interface
- void* initialThis_; // Sanity checker
-
- // Primary (leftmost) view, on its own cache line to avoid false sharing.
- // IMPORTANT: Even though this view is known in advance, access to it from
- // outside the reducer should be through the __cilkrts_hyper_lookup()
- // function only (which is called by the view() function. This
- // restriction is necessary so that the compiler can assume that
- // __cilkrts_hyper_lookup() is the ONLY source of the address of this
- // object, and can therefore optimize as if it had no aliases.
- __CILKRTS_CACHE_ALIGNED(value_type leftmost_);
-
- // Wrappers around C monoid dispatch functions
- static void reduce_wrapper(void* r, void* lhs, void* rhs);
- static void identity_wrapper(void* r, void* view);
- static void destroy_wrapper(void* r, void* view);
- static void* allocate_wrapper(void* r, __STDNS size_t bytes);
- static void deallocate_wrapper(void* r, void* view);
+protected:
+ Type m_value; ///< The wrapped accumulator variable.
- // Used for certain asserts
- bool reducer_is_cache_aligned() const
- { return 0 == ((std::size_t) this & (__CILKRTS_CACHE_LINE__ - 1)); }
+public:
+ /** Value type definition required by @ref monoid_with_view.
+ */
+ typedef Type value_type;
+
+ /** Default constructor.
+ */
+ scalar_view() : m_value() {}
+
+ /** Value constructor.
+ */
+ scalar_view(const Type& v) : m_value(v) {}
+
+ /** @name Value functions required by the reducer class.
+ *
+ * Note that the move in/out functions use simple assignment semantics.
+ */
+ //@{
+
+ /** Set the value of the view.
+ */
+ void view_move_in(Type& v) { m_value = v; }
+
+ /** Get the value of the view.
+ */
+ void view_move_out(Type& v) { v = m_value; }
+
+ /** Set the value of the view.
+ */
+ void view_set_value(const Type& v) { m_value = v; }
+
+ /** Get the value of the view.
+ */
+ Type const& view_get_value() const { return m_value; }
+
+ /** Get a reference to the value contained in the view. For legacy
+ * reducer support only.
+ */
+ Type & view_get_reference() { return m_value; }
+
+ /** Get a reference to the value contained in the view. For legacy
+ * reducer support only.
+ */
+ Type const& view_get_reference() const { return m_value; }
+ //@}
+};
- void init();
- // disable copy
- reducer(const reducer&);
- reducer& operator=(const reducer&);
+/** Wrapper class for move-in construction.
+ *
+ * Some types allow their values to be _moved_ as an alternative to copying.
+ * Moving a value may be much faster than copying it, but may leave the value
+ * of the move’s source undefined. Consider the `swap` operation provided by
+ * many STL container classes:
+ *
+ * list<T> x, y;
+ * x = y; // Copy
+ * x.swap(y); // Move
+ *
+ * The assignment _copies_ the value of `y` into `x` in time linear in the
+ * size of `y`, leaving `y` unchanged. The `swap` _moves_ the value of `y`
+ * into `x` in constant time, but it also moves the value of `x` into `y`,
+ * potentially leaving `y` undefined.
+ *
+ * A move_in_wrapper simply wraps a pointer to an object. It is created by a
+ * call to cilk::move_in(). Passing a move_in_wrapper to a view constructor
+ * (actually, passing it to a reducer constructor, which passes it to the
+ * monoid `construct()` function, which passes it to the view constructor)
+ * allows, but does not require, the value pointed to by the wrapper to be
+ * moved into the view instead of copied.
+ *
+ * A view class exercises this option by defining a _move-in constructor_,
+ * i.e., a constructor with a move_in_wrapper parameter. The constructor calls
+ * the wrapper’s `value()` function to get a reference to its pointed-to
+ * value, and can then use that reference in a move operation.
+ *
+ * A move_in_wrapper also has an implicit conversion to its pointed-to value,
+ * so if a view class does not define a move-in constructor, its ordinary
+ * value constructor will be called with the wrapped value. For example, an
+ * @ref ReducersAdd "op_add" view does not have a move-in constructor, so
+ *
+ * int x;
+ * reducer< op_add<int> > xr(move_in(x));
+ *
+ * will simply call the `op_add_view(const int &)` constructor. But an
+ * @ref ReducersList "op_list_append" view does have a move-in constructor,
+ * so
+ *
+ * list<int> x;
+ * reducer< op_list_append<int> > xr(move_in(x));
+ *
+ * will call the `op_list_append_view(move_in_wrapper<int>)` constructor,
+ * which can `swap` the value of `x` into the view.
+ *
+ * @note Remember that passing the value of a variable to a reducer
+ * constructor using a move_in_wrapper leaves the variable undefined.
+ * You cannot assume that the constructor either will or will not copy
+ * or move the value.
+ *
+ * @tparam Type The type of the wrapped value.
+ *
+ * @see cilk::move_in()
+ */
+template <typename Type>
+class move_in_wrapper
+{
+ Type *m_pointer;
+public:
+
+ /** Constructor that captures the address of its argument. This is almost
+ * always called from the @ref move_in function.
+ */
+ explicit move_in_wrapper(Type& ref) : m_pointer(&ref) { }
+
+ /** Implicit conversion to the wrapped value. This allows a move_in_wrapper
+ * to be used where a value of the wrapped type is expected, in which case
+ * the wrapper is completely transparent.
+ */
+ operator Type&() const { return *m_pointer; }
+
+ /** Get a reference to the pointed-to value. This has the same effect as
+ * the implicit conversion, but makes the intent clearer in a move-in
+ * constructor.
+ */
+ Type& value() const { return *m_pointer; }
+};
- public:
- reducer() : monoid_(), leftmost_()
- {
- init();
- }
+/** Function to create a move_in_wrapper for a value.
+ *
+ * @tparam Type The type of the argument, which will be the `type` of the
+ * created wrapper.
+ *
+ * @see move_in_wrapper
+ */
+template <typename Type>
+inline
+move_in_wrapper<Type> move_in(Type& ref)
+ { return move_in_wrapper<Type>(ref); }
- /// Special case: allow reducer(A) construction from both const and
- /// non-const reference to A. Allowing this for all argument combinations
- /// is desirable but would result in at least 93 overloads.
- template <typename A>
- explicit reducer(A& a)
- : base_(), monoid_(), leftmost_(a)
- {
- init();
- }
- template <typename A>
- explicit reducer(const A& a)
- : base_(), monoid_(), leftmost_(a)
- {
- init();
- }
+/** @copydoc move_in(Type&)
+ *
+ * @note Applying a function that is explicitly specified as modifying its
+ * argument to a const argument is obviously an irrational thing to
+ * do. This move_in() variant is just provided to allow calling a
+ * move-in constructor with a function return value, which the
+ * language treats as a const. Using it for any other purpose will
+ * probably end in tears.
+ */
+template <typename Type>
+inline
+move_in_wrapper<Type> move_in(const Type& ref)
+ { return move_in_wrapper<Type>(ref); }
- template <typename A, typename B>
- reducer(const A& a, const B& b)
- : base_(), monoid_(), leftmost_(a,b)
- {
- init();
- }
- template <typename A, typename B, typename C>
- reducer(const A& a, const B& b, const C& c)
- : base_(), monoid_(), leftmost_(a,b,c)
- {
- init();
- }
+/** Wrapper class to allow implicit downcasts to reducer subclasses.
+ *
+ * The Cilk library contains a collection of reducer wrapper classes which
+ * were created before the `cilk::reducer<Monoid>` style was developed. For
+ * example, `cilk::reducer_opadd<Type>` provided essentially the same
+ * functionality that is now provided by
+ * `cilk::reducer< cilk::op_add<Type> >`. These legacy reducer classes are
+ * deprecated, but still supported, and they have been reimplemented as
+ * subclasses of the corresponding `cilk::reducer` classes. For example:
+ *
+ * template <class T>
+ * reducer_opadd<T> : public reducer< op_add<T> > { ... };
+ *
+ * This reimplementation allows transparent conversion between legacy and
+ * new reducers. That is, a `reducer<op_add>*` or `reducer<op_add>&` can be
+ * used anywhere that a `reducer_opadd*` or `reducer_opadd&` is expected,
+ * and vice versa.
+ *
+ * The conversion from the legacy reducer to the new reducer is just an
+ * up-cast, which is provided for free by C++. The conversion from the new
+ * reducer to the legacy reducer is a down-cast, though, which requires an
+ * explicit conversion member function in the `reducer` class. The challenge
+ * is to define a function in the reducer template class which will convert
+ * each cilk::reducer specialization to the corresponding legacy reducer,
+ * if there is one.
+ *
+ * The trick is in the legacy_reducer_downcast template class, which provides
+ * a mapping from `cilk::reducer` specializations to legacy reducer classes.
+ * `reducer<Monoid>` has a conversion function to convert itself to
+ * `legacy_reducer_downcast< reducer<Monoid> >::%type`. By default,
+ * `legacy_reducer_downcast<Reducer>::%type` is just a trivial subclass of
+ * `Reducer`, which is uninteresting, but a reducer with a legacy counterpart
+ * will have a specialization of `legacy_reducer_downcast` whose `type` is
+ * the corresponding legacy reducer. For example:
+ *
+ * template <typename Type>
+ * struct legacy_reducer_downcast< reducer< op_add<Type> > >
+ * {
+ * typedef reducer_opadd<Type> type;
+ * };
+ *
+ *
+ * @tparam Reducer The new-style reducer class whose corresponding legacy reducer class
+ * is `type`, if there is such a legacy reducer class.
+ */
+template <typename Reducer>
+struct legacy_reducer_downcast
+{
+ /** The related legacy reducer class.
+ *
+ * By default, this is just a trivial subclass of Reducer, but it can be
+ * overridden in the specialization of legacy_reducer_downcast for
+ * a reducer that has a corresponding legacy reducers.
+ */
+ struct type : Reducer { };
+};
- template <typename A, typename B, typename C, typename D>
- reducer(const A& a, const B& b, const C& c, const D& d)
- : base_(), monoid_(), leftmost_(a,b,c,d)
- {
- init();
- }
- template <typename A, typename B, typename C, typename D, typename E>
- reducer(const A& a, const B& b, const C& c, const D& d, const E& e)
- : base_(), monoid_(), leftmost_(a,b,c,d,e)
- {
- init();
- }
+namespace internal {
+/// @cond internal
- // Special case: both const and non-const Monoid reference are needed
- // so that reducer(Monoid&) is more specialised than
- // template <typename A> explicit reducer(A& a) and
- // reducer(const Monoid&) is more specialised than
- // template <typename A> explicit reducer(const A& a)
- explicit reducer(Monoid& hmod)
- : base_(), monoid_(hmod), leftmost_()
- {
- init();
- }
+template <typename Value, typename View>
+struct reducer_set_get
+{
+ static View theView; // Declared but not defined
- explicit reducer(const Monoid& hmod)
- : base_(), monoid_(hmod), leftmost_()
- {
- init();
- }
+ // sizeof(notchar) is guaranteed larger than 1
+ struct notchar { char x[2]; };
- // Special case: allow reducer(Monoid,A) construction from both const and
- // non-const references to A. Allowing this for all argument combinations
- // is desirable but would result in at least 93 overloads.
- template <typename A>
- reducer(const Monoid& hmod, A& a)
- : base_(), monoid_(hmod), leftmost_(a)
- {
- init();
- }
+ // check_for_ref returns char if 'get_value' returns by value and notchar
+ // if 'get_value' returns by reference.
+ static char check_for_ref(Value, ...);
+ static notchar check_for_ref(Value&, int);
- template <typename A>
- reducer(const Monoid& hmod, const A& a)
- : base_(), monoid_(hmod), leftmost_(a)
- {
- init();
- }
+ enum { GET_VALUE_BY_VALUE =
+ (1 == sizeof(check_for_ref(theView.view_get_value(), 0))) } ;
+
+ typedef typename condition<GET_VALUE_BY_VALUE,
+ Value, const Value&>::type get_value_type;
+
+ static void move_in(View& view, Value& v) { view.view_move_in(v); }
+ static void move_out(View& view, Value& v) { view.view_move_out(v); }
+
+ static void set_value(View& view, const Value& v)
+ { view.view_set_value(v); }
+
+ static get_value_type get_value(const View& view)
+ { return view.view_get_value(); }
+};
+
+template <typename Value>
+struct reducer_set_get<Value, Value>
+{
+ typedef const Value& get_value_type;
+
+ static void move_in(Value& view, Value& v) { view = v; }
+ static void move_out(Value& view, Value& v) { v = view; }
+
+ static void set_value(Value& view, const Value& v) { view = v; }
+
+ static get_value_type get_value(const Value& view) { return view; }
+};
+
+/// @endcond
- template <typename A, typename B>
- reducer(const Monoid& hmod, const A& a, const B& b)
- : base_(), monoid_(hmod), leftmost_(a,b)
- {
- init();
- }
- template <typename A, typename B, typename C>
- reducer(const Monoid& hmod, const A& a, const B& b, const C& c)
- : base_(), monoid_(hmod), leftmost_(a,b,c)
+/** Base class defining the data layout that is common to all reducers.
+ */
+template <typename Monoid>
+class reducer_base {
+ typedef typename Monoid::view_type view_type;
+
+ // The following declarations ensure that the `base`, `monoid`, and
+ // `initialThis` fields (as well as the `leftmost` field, which is defined
+ // in the `reducer_content` subclass) are assigned at the same offsets as
+ // in the “old” reducer implementation (prior to November 2012), which
+ // declared them as
+ //
+ // __cilkrts_hyperobject_base m_base;
+ // const Monoid m_monoid;
+ // void* m_initialThis;
+ // __CILKRTS_CACHE_ALIGNED(view_type m_leftmost);
+
+ // This structure determines what the relative positions of the `base` and
+ // `monoid` fields would be, and how much space would be allocated for
+ // them.
+ //
+ struct _layout_overlay {
+ __cilkrts_hyperobject_base base;
+ Monoid monoid;
+ _layout_overlay(); // Declared, not defined.
+ };
+
+ // This makes the reducer a hyper-object. (Partially initialized in
+ // the derived reducer_content class.)
+ //
+ __cilkrts_hyperobject_base m_base;
+
+ // Reserve enough unconstructed space for the monoid. It is allocated
+ // here as raw bytes, and is constructed explicitly by a call to the
+ // monoid_type::construct() function in the constructor of the `reducer`
+ // subclass.
+ //
+ char _monoid_reservation[
+ sizeof(_layout_overlay) -
+ sizeof(__cilkrts_hyperobject_base) ];
+
+ // Used for sanity checking at destruction.
+ //
+ void* m_initialThis;
+
+ // The leftmost view comes next. It is defined in the derived
+ // reducer_content class.
+
+ /** @name C-callable wrappers for the C++-coded monoid dispatch functions.
+ */
+ //@{
+
+ static void reduce_wrapper(void* r, void* lhs, void* rhs);
+ static void identity_wrapper(void* r, void* view);
+ static void destroy_wrapper(void* r, void* view);
+ static void* allocate_wrapper(void* r, __STDNS size_t bytes);
+ static void deallocate_wrapper(void* r, void* view);
+
+ //@}
+
+protected:
+
+ /** Constructor.
+ *
+ * @param leftmost The address of the leftmost view in the reducer.
+ */
+ reducer_base(char* leftmost)
{
- init();
+ static const cilk_c_monoid c_monoid_initializer = {
+ (cilk_c_reducer_reduce_fn_t) &reduce_wrapper,
+ (cilk_c_reducer_identity_fn_t) &identity_wrapper,
+ (cilk_c_reducer_destroy_fn_t) &destroy_wrapper,
+ (cilk_c_reducer_allocate_fn_t) &allocate_wrapper,
+ (cilk_c_reducer_deallocate_fn_t) &deallocate_wrapper
+ };
+
+ m_base.__c_monoid = c_monoid_initializer;
+ m_base.__flags = 0;
+ m_base.__view_offset = (char*)leftmost - (char*)this;
+ m_base.__view_size = sizeof(view_type);
+ m_initialThis = this;
+
+ __cilkrts_hyper_create(&m_base);
}
-
- template <typename A, typename B, typename C, typename D>
- reducer(const Monoid& hmod, const A& a, const B& b, const C& c,
- const D& d)
- : base_(), monoid_(hmod), leftmost_(a,b,c,d)
+
+ /** Destructor.
+ */
+ __CILKRTS_STRAND_STALE(~reducer_base())
{
- init();
+ // Make sure we haven't been memcopy'd or corrupted
+ __CILKRTS_ASSERT(this == m_initialThis);
+ __cilkrts_hyper_destroy(&m_base);
}
- template <typename A, typename B, typename C, typename D, typename E>
- reducer(const Monoid& hmod, const A& a, const B& b, const C& c,
- const D& d, const E& e)
- : base_(), monoid_(hmod), leftmost_(a,b,c,d,e)
+ /** Monoid data member.
+ *
+ * @return A pointer to the reducer’s monoid data member.
+ */
+ Monoid* monoid_ptr()
+ { return & reinterpret_cast<_layout_overlay*>(this)->monoid; }
+
+ /** Leftmost view data member.
+ *
+ * @return A pointer to the reducer’s leftmost view data member.
+ *
+ * @note This function returns the address of the *leftmost* view,
+ * which is unique for the lifetime of the reducer. It is
+ * intended to be used in constructors and destructors.
+ * Use the reducer::view() function to access the per-strand
+ * view instance.
+ */
+ view_type* leftmost_ptr()
{
- init();
+ char* view_addr = (char*)this + m_base.__view_offset;
+ return reinterpret_cast<view_type*>(view_addr);
}
+
+public:
- __CILKRTS_STRAND_STALE(~reducer());
-
- /* access the unwrapped object */
- value_type& view() {
- // Look up reducer in current map. IMPORTANT: Even though the
- // leftmost view is known in advance, access to it should be through
- // the __cilkrts_hyper_lookup() function only. This restriction is
- // necessary so that the compiler can assume that
- // __cilkrts_hyper_lookup() is the ONLY source of the address of this
- // object, and can therefore optimize as if it had no aliases.
- return *static_cast<value_type *>(__cilkrts_hyper_lookup(&base_));
+ /** @name Access the current view.
+ *
+ * These functions return a reference to the instance of the reducer’s
+ * view that was created for the current strand of a parallel computation
+ * (and create it if it doesn’t already exist). Note the difference from
+ * the (private) leftmost_ptr() function, which returns a pointer to the
+ * _leftmost_ view, which is the same in all strands.
+ */
+ //@{
+
+ /** Per-strand view instance.
+ *
+ * @return A reference to the per-strand view instance.
+ */
+ view_type& view()
+ {
+ return *static_cast<view_type *>(__cilkrts_hyper_lookup(&m_base));
}
-
- value_type const& view() const {
- /* look up reducer in current map */
- return const_cast<reducer*>(this)->view();
+
+ /** @copydoc view()
+ */
+ const view_type& view() const
+ {
+ return const_cast<reducer_base*>(this)->view();
}
-
- /// "Dereference" reducer to return the current view.
- value_type& operator*() { return view(); }
- value_type const& operator*() const { return view(); }
-
- /// "Dereference" reducer to return the current view.
- value_type* operator->() { return &view(); }
- value_type const* operator->() const { return &view(); }
-
- /// operator()() is deprecated. Use operator*() instead.
- value_type& operator()() { return view(); }
- value_type const& operator()() const { return view(); }
-
- const Monoid& monoid() const { return monoid_; }
+
+ //@}
};
template <typename Monoid>
-void reducer<Monoid>::init()
+void reducer_base<Monoid>::reduce_wrapper(void* r, void* lhs, void* rhs)
{
- static const cilk_c_monoid c_monoid_initializer = {
- (cilk_c_reducer_reduce_fn_t) &reduce_wrapper,
- (cilk_c_reducer_identity_fn_t) &identity_wrapper,
- (cilk_c_reducer_destroy_fn_t) &destroy_wrapper,
- (cilk_c_reducer_allocate_fn_t) &allocate_wrapper,
- (cilk_c_reducer_deallocate_fn_t) &deallocate_wrapper
- };
-
-#ifdef CILK_CHECK_REDUCER_ALIGNMENT
- // ASSERT THAT LEFTMOST VIEW IS CACHE-LINE ALIGNED:
- // We use an attribute to ensure that the leftmost view, and therefore the
- // entire reducer object, is cache-line (64-byte) aligned. The compiler
- // enforces this alignment for static- and automatic-duration objects.
- // However, if a reducer or a structure containing a reducer is allocated
- // from the heap using a custom allocator (which typically guarantee only
- // 8- or 16-byte alignment), the compiler cannot guarantee this cache-line
- // alignment. Certain vector instructions require that the operands be
- // aligned on vector boundaries (up to 16-bytes in SSE, 32-bytes in AVX
- // and 64-bytes in MIC). At high optimazation levels, the allocator's
- // failure to keep the promised alignment can cause a program to fault
- // mysteriously in a vector instruction. The assertion is intended to
- // catch this situation. If the assertion fails, the user is advised
- // to change the way that reducer or the the structure containing the
- // reducer is allocated such that it is guaranteed to be on a 64-byte
- // boundary, thus preventing both the possible crash and false sharing.
- __CILKRTS_ASSERT(reducer_is_cache_aligned());
-#endif // CILK_CHECK_REDUCER_ALIGNMENT
-
- base_.__c_monoid = c_monoid_initializer;
- base_.__flags = 0;
- base_.__view_offset = (char*) &leftmost_ - (char*) this;
- base_.__view_size = sizeof(value_type);
- initialThis_ = this;
-
- __cilkrts_hyper_create(&base_);
+ Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr();
+ monoid->reduce(static_cast<view_type*>(lhs),
+ static_cast<view_type*>(rhs));
}
template <typename Monoid>
-void reducer<Monoid>::reduce_wrapper(void* r, void* lhs, void* rhs)
+void reducer_base<Monoid>::identity_wrapper(void* r, void* view)
{
- reducer* self = static_cast<reducer*>(r);
- self->monoid_.reduce(static_cast<value_type*>(lhs),
- static_cast<value_type*>(rhs));
+ Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr();
+ monoid->identity(static_cast<view_type*>(view));
}
template <typename Monoid>
-void reducer<Monoid>::identity_wrapper(void* r, void* view)
+void reducer_base<Monoid>::destroy_wrapper(void* r, void* view)
{
- reducer* self = static_cast<reducer*>(r);
- self->monoid_.identity(static_cast<value_type*>(view));
+ Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr();
+ monoid->destroy(static_cast<view_type*>(view));
}
template <typename Monoid>
-void reducer<Monoid>::destroy_wrapper(void* r, void* view)
+void* reducer_base<Monoid>::allocate_wrapper(void* r, __STDNS size_t bytes)
{
- reducer* self = static_cast<reducer*>(r);
- self->monoid_.destroy(static_cast<value_type*>(view));
+ Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr();
+ return monoid->allocate(bytes);
}
template <typename Monoid>
-void* reducer<Monoid>::allocate_wrapper(void* r, __STDNS size_t bytes)
+void reducer_base<Monoid>::deallocate_wrapper(void* r, void* view)
{
- reducer* self = static_cast<reducer*>(r);
- return self->monoid_.allocate(bytes);
+ Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr();
+ monoid->deallocate(static_cast<view_type*>(view));
}
+
+/** Base class defining the data members of a reducer.
+ *
+ * @tparam Aligned The `m_view` data member, and therefore the entire
+ * structure, are cache-line aligned if this parameter
+ * is `true'.
+ */
+template <typename Monoid, bool Aligned = Monoid::align_reducer>
+class reducer_content;
+
+/** Base class defining the data members of an aligned reducer.
+ */
template <typename Monoid>
-void reducer<Monoid>::deallocate_wrapper(void* r, void* view)
+class reducer_content<Monoid, true> : public reducer_base<Monoid>
{
- reducer* self = static_cast<reducer*>(r);
- self->monoid_.deallocate(static_cast<value_type*>(view));
-}
+ typedef typename Monoid::view_type view_type;
+
+ // The leftmost view is defined as raw bytes. It will be constructed
+ // by the monoid `construct` function. It is cache-aligned, which
+ // will push it into a new cache line. Furthermore, its alignment causes
+ // the reducer as a whole to be cache-aligned, which makes the reducer
+ // size a multiple of a cache line. Since there is nothing in the reducer
+ // after the view, all this means that the leftmost view gets one or more
+ // cache lines all to itself, which prevents false sharing.
+ //
+ __CILKRTS_CACHE_ALIGN
+ char m_leftmost[sizeof(view_type)];
+
+ /** Test if the reducer is cache-line-aligned.
+ *
+ * Used in assertions.
+ */
+ bool reducer_is_cache_aligned() const
+ { return 0 == ((std::size_t) this & (__CILKRTS_CACHE_LINE__ - 1)); }
+
+protected:
+ /** Constructor.
+ */
+ reducer_content() : reducer_base<Monoid>((char*)&m_leftmost)
+ {
+#ifndef CILK_IGNORE_REDUCER_ALIGNMENT
+ assert(reducer_is_cache_aligned() &&
+ "Reducer should be cache aligned. Please see comments following this assertion for explanation and fixes.");
+#endif
+ /* "REDUCER SHOULD BE CACHE ALIGNED" ASSERTION.
+ *
+ * This Reducer class instantiation specifies cache-line alignment of the
+ * leftmost view field (and, implicitly, of the reducer itself). You got
+ * this assertion because a reducer with this class was allocated at a
+ * non-cache-aligned address, probably because it was allocated on the
+ * heap with `new`. This can be a problem for two reasons:
+ *
+ * 1. If the leftmost view is not on a cache line by itself, there might
+ * be a slowdown resulting from accesses to the same cache line from
+ * different threads.
+ *
+ * 2. The compiler thinks that reducer is cache-line aligned, but it
+ * really isn't. If the reducer is contained in a structure, then the
+ * compiler will believe that the containing structure, and other
+ * fields contained in it, are also more aligned than they really
+ * are. In particular, if the structure contains a numeric array that
+ * is used in a vectorizable loop, then the compiler might generate
+ * invalid vector instructions, resulting in a runtime error.
+ *
+ * The compiler will always allocate reducer variables, and structure
+ * variables containing reducers, with their required alignment.
+ * Reducers, and structures containing a reducer, which are allocated
+ * on the heap with `new` will _not_ be properly aligned.
+ *
+ * There are three ways that you can fix this assertion failure.
+ *
+ * A. Rewrite your code to use the new-style `reducer< op_XXX<Type> >`
+ * instead of the legacy `reducer_XXX<type>`. The new-style reducers
+ * are not declared to be cache-aligned, and will work properly if
+ * they are not cache-aligned.
+ *
+ * B. If you must allocate an old-style reducer or a structure containing
+ * a reducer on the heap, figure out how to align it correctly. The
+ * suggested fix is to use `cilk::aligned_new()` and
+ * `cilk::aligned_delete()` instead of `new` and `delete`, as follows:
+ *
+ * Type* ptr = cilk::aligned_new<Type>(constructor-arguments);
+ * cilk::aligned_delete(ptr);
+ *
+ * C. Define the macro CILK_IGNORE_REDUCER_ALIGNMENT, which will suppress
+ * the assertion check. Do this only if you are comfortable that
+ * problem (2) above will not occur.
+ */
+ }
+};
+
+/** Base class defining the data members of an unaligned reducer.
+ */
template <typename Monoid>
-__CILKRTS_STRAND_STALE(reducer<Monoid>::~reducer())
+class reducer_content<Monoid, false> : public reducer_base<Monoid>
{
- // Make sure we haven't been memcopy'd or corrupted
- __CILKRTS_ASSERT(this == initialThis_);
- __cilkrts_hyper_destroy(&base_);
-}
+ typedef typename Monoid::view_type view_type; ///< The view type.
+
+ // Reserve space for the leftmost view. The view will be allocated at an
+ // aligned offset in this space at runtime, to guarantee that the view
+ // will get one or more cache lines all to itself, to prevent false
+ // sharing.
+ //
+ // The number of bytes to reserve is determined as follows:
+ // * Start with the view size.
+ // * Round up to a multiple of the cache line size, to get the total size
+ // of the cache lines that will be dedicated to the view.
+ // * Add (cache line size - 1) filler bytes to guarantee that the reserved
+ // area will contain a cache-aligned block of the required cache lines,
+ // no matter where the reserved area starts.
+ //
+ char m_leftmost[
+ // View size rounded up to multiple cache lines
+ ( (sizeof(view_type) + __CILKRTS_CACHE_LINE__ - 1)
+ & (__CILKRTS_CACHE_LINE__ - 1)
+ )
+ // plus filler to allow alignment.
+ + __CILKRTS_CACHE_LINE__ - 1
+ ];
+
+protected:
+
+ /** Constructor. Find the first cache-aligned position in the reserved
+ * area, and pass it to the base constructor as the leftmost view
+ * address.
+ */
+ reducer_content() : reducer_base<Monoid>(
+ (char*)(
+ ((std::size_t)&m_leftmost + __CILKRTS_CACHE_LINE__ - 1)
+ / __CILKRTS_CACHE_LINE__ * __CILKRTS_CACHE_LINE__) )
+ {}
+};
-} // end namespace cilk
-#else // if defined(CILK_STUB)
+} // namespace internal
-/**************************************************************************
- * Stub reducer implementation
- **************************************************************************/
-namespace cilk {
+// The __cilkrts_hyperobject_ functions are defined differently depending on
+// whether a file is compiled with or without the CILK_STUB option. Therefore,
+// reducers compiled in the two modes should be link-time incompatible, so that
+// object files compiled with stubbed reducers won't be linked into an
+// unstubbed program, or vice versa. We achieve this by putting the reducer
+// class definition into the cilk::stub namespace in a stubbed compilation.
+
+#ifdef CILK_STUB
namespace stub {
+#endif
+/** Reducer class.
+ *
+ * A reducer is instantiated on a Monoid. The Monoid provides the value
+ * type, associative reduce function, and identity for the reducer.
+ *
+ * @tparam Monoid The monoid class that the reducer is instantiated on. It must model
+ * the @ref reducers_monoid_concept "monoid concept".
+ *
+ * @see @ref pagereducers
+ */
template <class Monoid>
-class reducer {
- typedef typename Monoid::value_type value_type;
-
- const Monoid monoid_;
- value_type obj_;
+class reducer : public internal::reducer_content<Monoid>
+{
+ typedef internal::reducer_content<Monoid> base;
+ using base::monoid_ptr;
+ using base::leftmost_ptr;
+ public:
+ typedef Monoid monoid_type; ///< The monoid type.
+ typedef typename Monoid::value_type value_type; ///< The value type.
+ typedef typename Monoid::view_type view_type; ///< The view type.
- /* disable copy */
- reducer(const reducer&);
- reducer& operator=(const reducer&);
+ private:
+ typedef internal::reducer_set_get<value_type, view_type> set_get;
+
+ reducer(const reducer&); ///< Disallow copying.
+ reducer& operator=(const reducer&); ///< Disallow assignment.
public:
- reducer() : monoid_(), obj_() { }
-
- // Special case: allow reducer(A) construction from both const and
- // non-const reference to A. Allowing this for all argument combinations
- // is desirable but would result in at least 93 overloads.
- template <typename A>
- explicit reducer(A& a)
- : monoid_(), obj_(a) {
- }
-
- template <typename A>
- explicit reducer(const A& a)
- : monoid_(), obj_(a) {
+
+ /** @name Constructors
+ *
+ * All reducer constructors call the static `construct()` function of the monoid class to
+ * construct the reducer's monoid and leftmost view.
+ *
+ * The reducer constructor arguments are simply passed through to the construct() function.
+ * Thus, the constructor parameters accepted by a particular reducer class are determined
+ * by its monoid class.
+ */
+ //@{
+
+ /** 0 – 6 const reference parameters.
+ */
+ //@{
+
+ reducer()
+ {
+ monoid_type::construct(monoid_ptr(), leftmost_ptr());
}
- template <typename A, typename B>
- reducer(const A& a, const B& b)
- : monoid_(), obj_(a, b) {
+ template <typename T1>
+ reducer(const T1& x1)
+ {
+ monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1);
}
- template <typename A, typename B, typename C>
- reducer(const A& a, const B& b, const C& c)
- : monoid_(), obj_(a, b, c)
+ template <typename T1, typename T2>
+ reducer(const T1& x1, const T2& x2)
{
+ monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2);
}
- template <typename A, typename B, typename C, typename D>
- reducer(const A& a, const B& b, const C& c, const D& d)
- : monoid_(), obj_(a, b, c, d)
+ template <typename T1, typename T2, typename T3>
+ reducer(const T1& x1, const T2& x2, const T3& x3)
{
+ monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3);
}
- template <typename A, typename B, typename C, typename D, typename E>
- reducer(const A& a, const B& b, const C& c, const D& d, const E& e)
- : monoid_(), obj_(a, b, c, d, e)
+ template <typename T1, typename T2, typename T3, typename T4>
+ reducer(const T1& x1, const T2& x2, const T3& x3, const T4& x4)
{
+ monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4);
}
- // Special case: both const and non-const Monoid reference are needed
- // so that reducer(Monoid&) is more specialised than
- // template <typename A> explicit reducer(A& a) and
- // reducer(const Monoid&) is more specialised than
- // template <typename A> explicit reducer(const A& a)
- explicit reducer(Monoid& m) : monoid_(m), obj_() { }
- explicit reducer(const Monoid& m) : monoid_(m), obj_() { }
-
- // Special case: allow reducer(Monoid,A) construction from both const and
- // non-const references to A. Allowing this for all argument combinations
- // is desirable but would result in at least 93 overloads.
- template <typename A>
- reducer(const Monoid& m, A& a)
- : monoid_(m), obj_(a) {
+ template <typename T1, typename T2, typename T3, typename T4, typename T5>
+ reducer(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5)
+ {
+ monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4, x5);
}
- template <typename A>
- reducer(const Monoid& m, const A& a)
- : monoid_(m), obj_(a) {
+ template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6>
+ reducer(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5, const T6& x6)
+ {
+ monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4, x5, x6);
}
-
- template <typename A, typename B>
- reducer(const Monoid& m, const A& a, const B& b)
- : monoid_(m), obj_(a, b) {
+
+ //@}
+
+ /** 1 non-const reference parameter.
+ */
+ //@{
+
+ template <typename T1>
+ reducer(T1& x1)
+ {
+ monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1);
}
+
+ //@}
- template <typename A, typename B, typename C>
- reducer(const Monoid& m, const A& a, const B& b, const C& c)
- : monoid_(m), obj_(a, b, c)
+ /** Destructor.
+ */
+ __CILKRTS_STRAND_STALE(~reducer())
{
+ leftmost_ptr()->~view_type();
+ monoid_ptr()->~monoid_type();
}
- template <typename A, typename B, typename C, typename D>
- reducer(const Monoid& m, const A& a, const B& b, const C& c, const D& d)
- : monoid_(m), obj_(a, b, c, d)
+ //@{
+ /** Get the monoid.
+ *
+ * @return A reference to the monoid object belonging to this reducer.
+ */
+ Monoid& monoid() { return *monoid_ptr(); }
+
+ const Monoid& monoid() const
+ { return const_cast<reducer*>(this)->monoid(); }
+ //@}
+
+ //@{
+ /** Access the current view.
+ *
+ * Return a reference to the instance of the reducer’s view that was
+ * created for the current strand of a parallel computation (and create
+ * it if it doesn’t already exist).
+ */
+ view_type& view() { return base::view(); }
+ const view_type& view() const { return base::view(); }
+ //@}
+
+
+ /** @name Dereference the reducer to get the view.
+ *
+ * “Dereferencing” a reducer yields the view for the current strand. The
+ * view, in turn, acts as a proxy for its contained value, exposing only
+ * those operations which are consistent with the reducer’s monoid. Thus,
+ * all modifications of the reducer’s accumulator variable are written as
+ *
+ * *reducer OP ...
+ *
+ * or
+ *
+ * reducer->func(...)
+ *
+ * (The permitted operations on a reducer’s accumulator are listed in the
+ * documentation for that particular kind of reducer.)
+ *
+ * @note `*r` is a synonym for `r.view()`. Recommended style is to use
+ * `*r` (or `r->`) in the common case where code is simply
+ * updating the accumulator variable wrapped in the view, and to
+ * use `r.view()` in the unusual case where it is desirable to
+ * call attention to the view itself.
+ */
+ //@{
+
+ //@{
+ /** Dereference operator.
+ *
+ * @return A reference to the per-strand view instance.
+ */
+ view_type& operator*() { return view(); }
+ view_type const& operator*() const { return view(); }
+ //@}
+
+ //@{
+ /** Pointer operator.
+ *
+ * @return A pointer to the per-strand view instance.
+ */
+ view_type* operator->() { return &view(); }
+ view_type const* operator->() const { return &view(); }
+ //@}
+
+ //@{
+ /** Deprecated view access.
+ *
+ * `r()` is a synonym for `*r` which was used with early versions of Cilk
+ * reducers. `*r` is now the preferred usage.
+ *
+ * @deprecated Use operator*() instead of operator()().
+ *
+ * @return A reference to the per-strand view instance.
+ */
+ view_type& operator()() { return view(); }
+ view_type const& operator()() const { return view(); }
+ //@}
+
+ //@}
+
+ /** @name Set and get the value.
+ *
+ * These functions are used to set an initial value for the reducer before
+ * starting the reduction, or to get the final value after the reduction
+ * is complete.
+ *
+ * @note These functions are completely different from the view
+ * operations that are made available via operator*() and
+ * operator->(), which are used to _modify_ the reducer’s value
+ * _during_ the reduction.
+ *
+ * @warning These functions _can_ be called at any time, and in
+ * general, they will refer to the value contained in the view
+ * for the current strand. However, using them other than to
+ * set the reduction’s initial value or get its final value
+ * will almost always result in undefined behavior.
+ */
+ //@{
+
+ /** Move a value into the reducer.
+ *
+ * This function is used to set the initial value of the reducer’s
+ * accumulator variable by either copying or _moving_ the value of @a obj
+ * into it. Moving a value can often be performed in constant time, even
+ * for large container objects, but has the side effect of leaving the
+ * value of @a obj undefined. (See the description of the
+ * @ref move_in_wrapper class for a discussion of moving values.)
+ *
+ * @par Usage
+ * A move_in() call to initialize a reducer is often paired with a
+ * move_out() call to get its final value:
+ *
+ * reducer<Type> xr;
+ * xr.move_in(x);
+ * … do the reduction …
+ * xr.move_out(x);
+ *
+ * @par Assumptions
+ * - You cannot assume either that this will function will copy its
+ * value or that it will move it.
+ * - You must assume that the value of @a obj will be undefined
+ * after the call to move_in().
+ * - You can assume that move_in() will be at least as efficient as
+ * set_value(), and you should therefore prefer move_in() unless
+ * you need the value of @a obj to be unchanged after the call.
+ * (But you should usually prefer the move-in constructor over a
+ * move_in() call — see the note below.)
+ *
+ * @note The behavior of a default constructor followed by move-in
+ * initialization:
+ *
+ * reducer<Type> xr;
+ * xr.move_in(x);
+ *
+ * @note is not necessarily the same as a move-in constructor:
+ *
+ * reducer<Type> xr(move_in(x));
+ *
+ * @note In particular, when @a Type is a container type with a
+ * non-empty allocator, the move-in constructor will create the
+ * accumulator variable with the same allocator as the input
+ * argument @a x, while the default constructor will create the
+ * accumulator variable with a default allocator. The mismatch of
+ * allocators in the latter case means that the input argument
+ * @a x may have to be copied in linear time instead of being
+ * moved in constant time.
+ *
+ * @note Best practice is to prefer the move-in constructor over the
+ * move-in function unless the move-in function is required for
+ * some specific reason.
+ *
+ * @warning Calling this function other than to set the initial value
+ * for a reduction will almost always result in undefined
+ * behavior.
+ *
+ * @param obj The object containing the value that will be moved into the
+ * reducer.
+ *
+ * @post The reducer contains the value that was initially in @a obj.
+ * @post The value of @a obj is undefined.
+ *
+ * @see set_value()
+ */
+ void move_in(value_type& obj) { set_get::move_in(view(), obj);}
+
+ /** Move the value out of the reducer.
+ *
+ * This function is used to retrieve the final value of the reducer’s
+ * accumulator variable by either copying or _moving_ the value of @a obj
+ * into it. Moving a value can often be performed in constant time, even
+ * for large container objects, but has the side effect of leaving the
+ * value of the reducer’s accumulator variable undefined. (See the
+ * description of the @ref move_in_wrapper class for a discussion of
+ * moving values.)
+ *
+ * @par Usage
+ * A move_in() call to initialize a reducer is often paired with a
+ * move_out() call to get its final value:
+ *
+ * reducer<Type> xr;
+ * xr.move_in(x);
+ * … do the reduction …
+ * xr.move_out(x);
+ *
+ * @par Assumptions
+ * - You cannot assume either that this will function will copy its
+ * value or that it will move it.
+ * - You must assume that the value of the reducer’s accumulator
+ * variable will be undefined after the call to move_out().
+ * - You can assume that move_out() will be at least as efficient as
+ * get_value(), and you should therefore prefer move_out() unless
+ * you need the accumulator variable to be preserved after the
+ * call.
+ *
+ * @warning Calling this function other than to retrieve the final
+ * value of a reduction will almost always result in undefined
+ * behavior.
+ *
+ * @param obj The object that the value of the reducer will be moved into.
+ *
+ * @post @a obj contains the value that was initially in the reducer.
+ * @post The value of the reducer is undefined.
+ *
+ * @see get_value()
+ */
+ void move_out(value_type& obj) { set_get::move_out(view(), obj); }
+
+ /** Set the value of the reducer.
+ *
+ * This function sets the initial value of the reducer’s accumulator
+ * variable to the value of @a obj.
+ *
+ * @note The behavior of a default constructor followed by
+ * initialization:
+ *
+ * reducer<Type> xr;
+ * xr.set_value(x);
+ *
+ * @note is not necessarily the same as a value constructor:
+ *
+ * reducer<Type> xr(x);
+ *
+ * @note In particular, when @a Type is a container type with a
+ * non-empty allocator, the value constructor will create the
+ * accumulator variable with the same allocator as the input
+ * argument @a x, while the default constructor will create the
+ * accumulator variable with a default allocator.
+ *
+ * @warning Calling this function other than to set the initial value
+ * for a reduction will almost always result in undefined
+ * behavior.
+ *
+ * @param obj The object containing the value that will be copied into
+ * the reducer.
+ *
+ * @post The reducer contains a copy of the value in @a obj.
+ *
+ * @see move_in()
+ */
+ void set_value(const value_type& obj) { set_get::set_value(view(), obj); }
+
+ /** Get the value of the reducer.
+ *
+ * This function gets the final value of the reducer’s accumulator
+ * variable.
+ *
+ * @warning Calling this function other than to retrieve the final
+ * value of a reduction will almost always result in undefined
+ * behavior.
+ *
+ * @return A reference to the value contained in the reducer.
+ *
+ * @see move_out()
+ */
+ typename set_get::get_value_type get_value() const
+ { return set_get::get_value(view()); }
+
+ //@}
+
+ /** Implicit downcast to legacy reducer wrapper, if any.
+ *
+ * @see legacy_reducer_downcast
+ */
+ operator typename legacy_reducer_downcast<reducer>::type& ()
{
+ typedef typename legacy_reducer_downcast<reducer>::type downcast_type;
+ return *reinterpret_cast<downcast_type*>(this);
}
- template <typename A, typename B, typename C, typename D, typename E>
- reducer(const Monoid& m, const A& a, const B& b, const C& c,
- const D& d, const E& e)
- : monoid_(m), obj_(a, b, c, d, e)
+
+ /** Implicit downcast to legacy reducer wrapper, if any.
+ *
+ * @see legacy_reducer_downcast
+ */
+ operator const typename legacy_reducer_downcast<reducer>::type& () const
{
+ typedef typename legacy_reducer_downcast<reducer>::type downcast_type;
+ return *reinterpret_cast<const downcast_type*>(this);
}
+};
- ~reducer() { }
+#ifdef CILK_STUB
+} // namespace stub
+using stub::reducer;
+#endif
- value_type& view() { return obj_; }
- value_type const& view() const { return obj_; }
+} // end namespace cilk
- value_type& operator()() { return view(); }
- value_type const& operator()() const { return view(); }
+#endif /* __cplusplus */
- const Monoid& monoid() const { return monoid_; }
+/** @page page_reducers_in_c Creating and Using Reducers in C
+ *
+ * @tableofcontents
+ *
+ * The Cilk runtime supports reducers written in C as well as in C++. The basic logic is the
+ * same, but the implementation details are very different. The C++ reducer implementation uses
+ * templates heavily to create very generic components. The C reducer implementation uses
+ * macros, which are a much blunter instrument. The most immediate consequence is that the
+ * monoid/view/reducer architecture is mostly implicit rather than explicit in C reducers.
+ *
+ * @section reducers_c_overview Overview of Using Reducers in C
+ *
+ * The basic usage pattern for C reducers is:
+ *
+ * 1. Create and initialize a reducer object.
+ * 2. Tell the Cilk runtime about the reducer.
+ * 3. Update the value contained in the reducer in a parallel computation.
+ * 4. Tell the Cilk runtime that you are done with the reducer.
+ * 5. Retrieve the value from the reducer.
+ *
+ * @subsection reducers_c_creation Creating and Initializing a C Reducer
+ *
+ * The basic pattern for creating and initializing a reducer object in C is
+ *
+ * CILK_C_DECLARE_REDUCER(value-type) reducer-name =
+ * CILK_C_INIT_REDUCER(value-type,
+ * reduce-function,
+ * identity-function,
+ * destroy-function,
+ * initial-value);
+ *
+ * This is simply an initialized definition of a variable named _reducer-name_. The
+ * @ref CILK_C_DECLARE_REDUCER macro expands to an anonymous `struct` declaration for a reducer
+ * object containing a view of type _value-type_, and the @ref CILK_C_INIT_REDUCER macro
+ * expands to a struct initializer.
+ *
+ * @subsection reducers_c_reduce_func Reduce Functions
+ *
+ * The reduce function for a reducer is called when a parallel execution strand terminates, to
+ * combine the values computed by the terminating strand and the strand to its left. It takes
+ * three arguments:
+ *
+ * - `void* reducer` — the address of the reducer.
+ * - `void* left` — the address of the value for the left strand.
+ * - `void* right` — the address of the value for the right (terminating) strand.
+ *
+ * It must apply the reducer’s reduction operation to the `left` and `right` values, leaving
+ * the result in the `left` value. The `right` value is undefined after the reduce function
+ * call.
+ *
+ * @subsection reducers_c_identity_func Identity Functions
+ *
+ * The identity function for a reducer is called when a parallel execution strand begins, to
+ * initialize its value to the reducer’s identity value. It takes two arguments:
+ *
+ * - `void* reducer` — the address of the reducer.
+ * - `void* v` — the address of a freshly allocated block of memory of size
+ * `sizeof(value-type)`.
+ *
+ * It must initialize the memory pointed to by `v` so that it contains the reducer’s identity
+ * value.
+ *
+ * @subsection reducers_c_destroy_func Destroy Functions
+ *
+ * The destroy function for a reducer is called when a parallel execution strand terminates, to
+ * do any necessary cleanup before its value is deallocated. It takes two arguments:
+ *
+ * - `void* reducer` — the address of the reducer.
+ * - `void* p` — the address of the value for the terminating strand.
+ *
+ * It must release any resources belonging to the value pointed to by `p`, to avoid a resource
+ * leak when the memory containing the value is deallocated.
+ *
+ * The runtime function `__cilkrts_hyperobject_noop_destroy` can be used for the destructor
+ * function if the reducer’s values do not need any cleanup.
+ *
+ * @subsection reducers_c_register Tell the Cilk Runtime About the Reducer
+ *
+ * Call the @ref CILK_C_REGISTER_REDUCER macro to register the reducer with the Cilk runtime:
+ *
+ * CILK_C_REGISTER_REDUCER(reducer-name);
+ *
+ * The runtime will manage reducer values for all registered reducers when parallel execution
+ * strands begin and end.
+ *
+ * @subsection reducers_c_update Update the Value Contained in the Reducer
+ *
+ * The @ref REDUCER_VIEW macro returns a reference to the reducer’s value for the current
+ * parallel strand:
+ *
+ * REDUCER_VIEW(reducer-name) = REDUCER_VIEW(reducer-name) OP x;
+ *
+ * C++ reducer views restrict access to the wrapped value so that it can only be modified in
+ * ways consistent with the reducer’s operation. No such protection is provided for C reducers.
+ * It is
+ * entirely the responsibility of the user to avoid modifying the value in any
+ * inappropriate way.
+ *
+ * @subsection c_reducers_unregister Tell the Cilk Runtime That You Are Done with the Reducer
+ *
+ * When the parallel computation is complete, call the @ref CILK_C_UNREGISTER_REDUCER macro to
+ * unregister the reducer with the Cilk runtime:
+ *
+ * CILK_C_UNREGISTER_REDUCER(reducer-name);
+ *
+ * The runtime will stop managing reducer values for the reducer.
+ *
+ * @subsection c_reducers_retrieve Retrieve the Value from the Reducer
+ *
+ * When the parallel computation is complete, use the @ref REDUCER_VIEW macro to retrieve the
+ * final value computed by the reducer.
+ *
+ * @subsection reducers_c_example_custom Example — Creating and Using a Custom C Reducer
+ *
+ * The `IntList` type represents a simple list of integers.
+ *
+ * struct _intListNode {
+ * int value;
+ * _intListNode* next;
+ * } IntListNode;
+ * typedef struct { IntListNode* head; IntListNode* tail; } IntList;
+ *
+ * // Initialize a list to be empty
+ * void IntList_init(IntList* list) { list->head = list->tail = 0; }
+ *
+ * // Append an integer to the list
+ * void IntList_append(IntList* list, int x)
+ * {
+ * IntListNode* node = (IntListNode*) malloc(sizeof(IntListNode));
+ * if (list->tail) list->tail->next = node; else list->head = node;
+ * list->tail = node;
+ * }
+ *
+ * // Append the right list to the left list, and leave the right list empty
+ * void IntList_concat(IntList* left, IntList* right)
+ * {
+ * if (left->head) {
+ * left->tail->next = right->head;
+ * if (right->tail) left->tail = right->tail;
+ * }
+ * else {
+ * *left = *right;
+ * }
+ * IntList_init(*right);
+ * }
+ *
+ * This code creates a reducer that supports creating an `IntList` by appending values to it.
+ *
+ * void identity_IntList(void* reducer, void* list)
+ * {
+ * IntList_init((IntList*)list);
+ * }
+ *
+ * void reduce_IntList(void* reducer, void* left, void* right)
+ * {
+ * IntList_concat((IntList*)left, (IntList*)right);
+ * }
+ *
+ * CILK_C_DECLARE_REDUCER(IntList) my_list_int_reducer =
+ * CILK_C_INIT_REDUCER(IntList,
+ * reduce_int_list,
+ * identity_int_list,
+ * __cilkrts_hyperobject_noop_destroy);
+ * // Initial value omitted //
+ * ListInt_init(&REDUCER_VIEW(my_int_list_reducer));
+ *
+ * CILK_C_REGISTER_REDUCER(my_int_list_reducer);
+ * cilk_for (int i = 0; i != n; ++i) {
+ * IntList_append(&REDUCER_VIEW(my_int_list_reducer), a[i]);
+ * }
+ * CILK_C_UNREGISTER_REDUCER(my_int_list_reducer);
+ *
+ * IntList result = REDUCER_VIEW(my_int_list_reducer);
+ *
+ * @section reducers_c_predefined Predefined C Reducers
+ *
+ * Some of the predefined reducer classes in the Cilk library come with a set of predefined
+ * macros to provide the same capabilities in C. In general, two macros are provided for each
+ * predefined reducer family:
+ *
+ * - `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)` — Declares a
+ * reducer object named _reducer-name_ with initial value _initial-value_ to perform
+ * a reduction using the _operation_ on values of the type specified by _type-name_.
+ * This is the equivalent of the general code described in @ref reducers_c_creation :
+ *
+ * CILK_C_DECLARE_REDUCER(type) reducer-name =
+ * CILK_C_INIT_REDUCER(type, ..., initial-value);
+ *
+ * where _type_ is the C type corresponding to _type_name_. See @ref reducers_c_type_names
+ * below for the _type-names_ that you can use.
+ *
+ * - `CILK_C_REDUCER_operation_TYPE(type-name)` — Expands to the `typedef` name for the type
+ * of the reducer object declared by
+ * `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)`.
+ *
+ * See @ref reducers_c_example_predefined.
+ *
+ * The predefined C reducers are:
+ *
+ * | Operation | Name | Documentation |
+ * |-------------------|---------------|-------------------------------|
+ * | addition | `OPADD` | @ref ReducersAdd |
+ * | bitwise and | `OPAND` | @ref ReducersAnd |
+ * | bitwise or | `OPOR` | @ref ReducersOr |
+ * | bitwise xor | `OPXOR` | @ref ReducersXor |
+ * | multiplication | `OPMUL` | @ref ReducersMul |
+ * | minimum | `MIN` | @ref ReducersMinMax |
+ * | minimum & index | `MIN_INDEX` | @ref ReducersMinMax |
+ * | maximum | `MIN` | @ref ReducersMinMax |
+ * | maximum & index | `MIN_INDEX` | @ref ReducersMinMax |
+ *
+ * @subsection reducers_c_type_names Numeric Type Names
+ *
+ * The type and function names created by the C reducer definition macros incorporate both the
+ * reducer kind (`opadd`, `opxor`, etc.) and the value type of the reducer (`int`, `double`,
+ * etc.). The value type is represented by a _numeric type name_ string. The types supported
+ * in C reducers, and their corresponding numeric type names, are given in the following table:
+ *
+ * | Type | Numeric Type Name |
+ * |-----------------------|-------------------------------|
+ * | `char` | `char` |
+ * | `unsigned char` | `uchar` |
+ * | `signed char` | `schar` |
+ * | `wchar_t` | `wchar_t` |
+ * | `short` | `short` |
+ * | `unsigned short` | `ushort` |
+ * | `int` | `int` |
+ * | `unsigned int` | `uint` |
+ * | `unsigned int` | `unsigned` (alternate name) |
+ * | `long` | `long` |
+ * | `unsigned long` | `ulong` |
+ * | `long long` | `longlong` |
+ * | `unsigned long long` | `ulonglong` |
+ * | `float` | `float` |
+ * | `double` | `double` |
+ * | `long double` | `longdouble` |
+ *
+ * @subsection reducers_c_example_predefined Example — Using a Predefined C Reducer
+ *
+ * To compute the sum of all the values in an array of `unsigned int`:
+ *
+ * CILK_C_REDUCER_OPADD(sum, uint, 0);
+ * CILK_C_REGISTER_REDUCER(sum);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * REDUCER_VIEW(sum) += a[i];
+ * }
+ * CILK_C_UNREGISTER_REDUCER(sum);
+ * printf("The sum is %u\n", REDUCER_VIEW(sum));
+ */
-}; // stub::reducer
+
+ /** @name C language reducer macros
+ *
+ * These macros are used to declare and work with reducers in C code.
+ *
+ * @see @ref page_reducers_in_c
+ */
+ //@{
-} // end namespace stub
-} // end namespace cilk
+/// @cond internal
-#endif // CILK_STUB
+/** @name Compound identifier macros.
+ *
+ * These macros are used to construct an identifier by concatenating two or three identifiers.
+ */
+//@{
-#endif /* __cplusplus */
+/** Expand to an identifier formed by concatenating two identifiers.
+ */
+#define __CILKRTS_MKIDENT(a,b) __CILKRTS_MKIDENT_IMP(a,b,)
-/*===================== C interfaces ===================================*/
+/** Expand to an identifier formed by concatenating three identifiers.
+ */
+#define __CILKRTS_MKIDENT3(a,b,c) __CILKRTS_MKIDENT_IMP(a,b,c)
+
+/** Helper macro to do the concatenation.
+ */
+#define __CILKRTS_MKIDENT_IMP(a,b,c) a ## b ## c
+//@}
+
+/** Compiler-specific keyword for the “type of” operator.
+ */
#if defined(__GNUC__) && !defined(__INTEL_COMPILER)
# define _Typeof __typeof__
#endif
-/* MACROS FOR DEFINING AND USING C REDUCERS
- *
- * Example use of these macros
- *
- * double array[ARRAY_LEN];
- * double sum()
- * {
- * extern void* double_summing_identity();
- * extern void double_summing_reduce(void* lhs, void* rhs);
+/** @name Predefined reducer function declaration macros.
*
- * CILK_C_DECLARE_REDUCER(double) total =
- * CILK_C_INIT_REDUCER(sizeof(double),
- * double_summing_reduce,
- * double_summing_identity,
- * free,
- * 0);
- * int i;
- *
- * CILK_C_REGISTER_REDUCER(total);
- *
- * cilk_for (i = 0; i < ARRAY_LEN; ++i)
- * REDUCER_VIEW(total) += array[i];
+ * These macros are used to create the function headers for the identity, reduction,
+ * and destructor functions for a builtin reducer family. The macro can be followed by
+ * a semicolon to create a declaration, or by a brace-enclosed body to create a definition.
+ */
+//@{
+
+/** Create an identity function header.
*
- * CILK_C_UNREGISTER_REDUCER(total);
+ * @note The name of the function’s value pointer parameter will always be `v`.
*
- * // Never access total.value directly -- the compiler optimizer assumes
- * // that REDUCER_VIEW(total) is the ONLY way to refer to the value.
- * return REDUCER_VIEW(total);
- * }
+ * @param name The reducer family name.
+ * @param tn The type name.
*/
-
-/***************************************************************************
- * Common to real and stub implementations
- ***************************************************************************/
-
-__CILKRTS_BEGIN_EXTERN_C
-
#define __CILKRTS_DECLARE_REDUCER_IDENTITY(name,tn) CILK_EXPORT \
void __CILKRTS_MKIDENT3(name,_identity_,tn)(void* key, void* v)
+
+/** Create a reduction function header.
+ *
+ * @param name The reducer family name.
+ * @param tn The type name.
+ * @param l The name to use for the function’s left value pointer parameter.
+ * @param r The name to use for the function’s right value pointer parameter.
+ */
#define __CILKRTS_DECLARE_REDUCER_REDUCE(name,tn,l,r) CILK_EXPORT \
void __CILKRTS_MKIDENT3(name,_reduce_,tn)(void* key, void* l, void* r)
+
+/** Create a destructor function header.
+ *
+ * @param name The reducer family name.
+ * @param tn The type name.
+ * @param p The name to use for the function’s value pointer parameter.
+ */
#define __CILKRTS_DECLARE_REDUCER_DESTROY(name,tn,p) CILK_EXPORT \
void __CILKRTS_MKIDENT3(name,_destroy_,tn)(void* key, void* p)
-__CILKRTS_END_EXTERN_C
+//@}
+/// @endcond
-#ifndef CILK_STUB
/***************************************************************************
* Real implementation
***************************************************************************/
-__CILKRTS_BEGIN_EXTERN_C
-
-/* Declare a reducer with 'Type' value type */
+/** Declaration of a C reducer structure type.
+ *
+ * This macro expands into an anonymous structure declaration for a C reducer structure
+ * which contains a @a Type value. For example:
+ *
+ * CILK_C_DECLARE_REDUCER(int) my_add_int_reducer =
+ * CILK_C_INIT_REDUCER(int, …);
+ *
+ * @param Type The type of the value contained in the reducer object.
+ *
+ * @see @ref reducers_c_creation
+ */
#define CILK_C_DECLARE_REDUCER(Type) struct { \
__cilkrts_hyperobject_base __cilkrts_hyperbase; \
- __CILKRTS_CACHE_ALIGNED(Type value); \
+ __CILKRTS_CACHE_ALIGN Type value; \
}
-/* Initialize a reducer using the Identity, Reduce, and Destroy functions
- * (the monoid) and with an arbitrary-length comma-separated initializer list.
+/** Initializer for a C reducer structure.
+ *
+ * This macro expands into a brace-enclosed structure initializer for a C reducer structure
+ * that was declared with `CILK_C_DECLARE_REDUCER(Type)`. For example:
+ *
+ * CILK_C_DECLARE_REDUCER(int) my_add_int_reducer =
+ * CILK_C_INIT_REDUCER(int,
+ * add_int_reduce,
+ * add_int_identity,
+ * __cilkrts_hyperobject_noop_destroy,
+ * 0);
+ *
+ * @param Type The type of the value contained in the reducer object. Must be the same as
+ * the @a Type argument of the CILK_C_DECLARE_REDUCER macro call that created
+ * the reducer.
+ * @param Reduce The address of the @ref reducers_c_reduce_func "reduce function" for the
+ * reducer.
+ * @param Identity The address of the @ref reducers_c_identity_func "identity function" for
+ * the reducer.
+ * @param Destroy The address of the @ref reducers_c_destroy_func "destroy function" for the
+ * reducer.
+ * @param ... The initial value for the reducer. (A single expression if @a Type is a
+ * scalar type; a list of values if @a Type is a struct or array type.)
+ *
+ * @see @ref reducers_c_creation
*/
-#define CILK_C_INIT_REDUCER(T,Reduce,Identity,Destroy, ...) \
- { { { Reduce,Identity,Destroy, \
- __cilkrts_hyperobject_alloc,__cilkrts_hyperobject_dealloc }, \
- 0, __CILKRTS_CACHE_LINE__, sizeof(T) }, __VA_ARGS__ }
-/* Register a local reducer. */
+#define CILK_C_INIT_REDUCER(Type, Reduce, Identity, Destroy, ...) \
+ { { { Reduce \
+ , Identity \
+ , Destroy \
+ , __cilkrts_hyperobject_alloc \
+ , __cilkrts_hyperobject_dealloc \
+ } \
+ , 0 \
+ , __CILKRTS_CACHE_LINE__ \
+ , sizeof(Type) \
+ } \
+ , __VA_ARGS__ \
+ }
+
+/** Register a reducer with the Cilk runtime.
+ *
+ * The runtime will manage reducer values for all registered reducers when parallel execution
+ * strands begin and end. For example:
+ *
+ * CILK_C_REGISTER_REDUCER(my_add_int_reducer);
+ * cilk_for (int i = 0; i != n; ++i) {
+ * …
+ * }
+ *
+ * @param Expr The reducer to be registered.
+ *
+ * @see @ref page_reducers_in_c
+ */
#define CILK_C_REGISTER_REDUCER(Expr) \
__cilkrts_hyper_create(&(Expr).__cilkrts_hyperbase)
-/* Unregister a local reducer. */
+/** Unregister a reducer with the Cilk runtime.
+ *
+ * The runtime will stop managing reducer values for a reducer after it is unregistered. For
+ * example:
+ *
+ * cilk_for (int i = 0; i != n; ++i) {
+ * …
+ * }
+ * CILK_C_UNREGISTER_REDUCER(my_add_int_reducer);
+ *
+ * @param Expr The reducer to be unregistered.
+ *
+ * @see @ref page_reducers_in_c
+ */
#define CILK_C_UNREGISTER_REDUCER(Expr) \
__cilkrts_hyper_destroy(&(Expr).__cilkrts_hyperbase)
-/* Get the current view for a reducer */
+/** Get the current view for a reducer.
+ *
+ * The `REDUCER_VIEW(reducer-name)` returns a reference to the reducer’s value for the
+ * current parallel strand. This can be used to initialize thevalue of the reducer before it
+ * is used, to modify the value of the reducer on the current parallel strand, or to retrieve
+ * the final value of the reducer at the end of the parallel computation.
+ *
+ * REDUCER_VIEW(my_add_int_reducer) = REDUCER_VIEW(my_add_int_reducer) + x;
+ *
+ * @note C++ reducer views restrict access to the wrapped value so that it can only be
+ * modified in ways consistent with the reducer’s operation. No such protection is provided
+ * for C reducers. It is entirely the responsibility of the user to refrain from modifying the
+ * value in any inappropriate way.
+ *
+ * @param Expr The reducer whose value is to be returned.
+ *
+ * @see @ref page_reducers_in_c
+ */
#define REDUCER_VIEW(Expr) (*(_Typeof((Expr).value)*) \
__cilkrts_hyper_lookup(&(Expr).__cilkrts_hyperbase))
-__CILKRTS_END_EXTERN_C
-
-#else /* if defined(CILK_STUB) */
-
-/***************************************************************************
- * Stub implementation
- ***************************************************************************/
-
-__CILKRTS_BEGIN_EXTERN_C
-
-/* Declare a reducer with 'Type' value type */
-#define CILK_C_DECLARE_REDUCER(Type) struct { \
- Type value; \
- }
-
-/* Initialize a reducer using the Identity, Reduce, and Destroy functions
- * (the monoid) and with an arbitrary-length comma-separated initializer list.
- */
-#define CILK_C_INIT_REDUCER(T,Identity,Reduce,Destroy, ...) \
- { __VA_ARGS__ }
-
-/* Register a local reducer. */
-#define CILK_C_REGISTER_REDUCER(Expr) ((void) Expr)
-
-/* Unregister a local reducer. */
-#define CILK_C_UNREGISTER_REDUCER(Expr) ((void) Expr)
-
-/* Get the current view for a reducer */
-#define REDUCER_VIEW(Expr) ((Expr).value)
-
-__CILKRTS_END_EXTERN_C
-
-#endif /* CILK_STUB */
+//@} C language reducer macros
#endif // CILK_REDUCER_H_INCLUDED
diff --git a/libcilkrts/include/cilk/reducer_file.h b/libcilkrts/include/cilk/reducer_file.h
index 828a7bf9254..39bae92bd76 100644
--- a/libcilkrts/include/cilk/reducer_file.h
+++ b/libcilkrts/include/cilk/reducer_file.h
@@ -1,26 +1,31 @@
/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
*/
diff --git a/libcilkrts/include/cilk/reducer_list.h b/libcilkrts/include/cilk/reducer_list.h
index d021577f816..b45acb0e915 100644
--- a/libcilkrts/include/cilk/reducer_list.h
+++ b/libcilkrts/include/cilk/reducer_list.h
@@ -1,517 +1,1122 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+/* reducer_list.h -*- C++ -*-
*
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-/*
- * reducer_list.h
- *
- * Purpose: Reducer hyperobject to accumulate a list of elements.
- *
- * Classes: reducer_list_append<Type, Allocator>
- * reducer_list_prepend<Type, Allocator>
- *
- * Description:
- * ============
- * This component provides reducer-type hyperobject representations that allow
- * either prepending or appending values to an STL list. By replacing the
- * variable with the hyperobject defined in this component, the data race is
- * eliminated.
- *
- * Usage Example:
- * ==============
- * Assume we wish to traverse an array of objects, performing an operation on
- * each object and accumulating the result of the operation into an STL list
- * variable.
- *..
- * int compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * std::list<int> result;
- * for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * result.push_back(compute(myArray[i]));
- * }
+/** @file reducer_list.h
*
- * std::cout << "The result is: ";
- * for (std::list<int>::iterator i = result.begin(); i != result.end();
- * ++i)
- * {
- * std::cout << *i << " " << std::endl;
- * }
+ * @brief Defines classes for doing parallel list creation by appending or
+ * prepending.
*
- * return 0;
- * }
- *..
- * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel,
- * but doing so will create a data race on the 'result' list.
- * The race is solved by changing 'result' to a 'reducer_list_append'
- * hyperobject:
- *..
- * int compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * cilk::reducer_list_append<int> result;
- * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * result->push_back(compute(myArray[i]));
- * }
+ * @ingroup ReducersList
*
- * std::cout << "The result is: ";
- * const std::list &r = result->get_value();
- * for (std::list<int>::const_iterator i = r.begin(); i != r.end(); ++i)
+ * @see ReducersList
+ */
+
+#ifndef REDUCER_LIST_H_INCLUDED
+#define REDUCER_LIST_H_INCLUDED
+
+#include <cilk/reducer.h>
+#include <list>
+
+/** @defgroup ReducersList List Reducers
+ *
+ * List append and prepend reducers allow the creation of a standard list by
+ * concatenating a set of lists or values in parallel.
+ *
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file `reducers.md`, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redlist_usage Usage Example
+ *
+ * // Create a list containing the labels of the nodes of a tree in
+ * // “inorder” (left subtree, root, right subtree).
+ *
+ * struct Tree { Tree* left; Tree* right; string label; ... };
+ *
+ * list<string> x;
+ * cilk::reducer< cilk::op_list_append<string> > xr(cilk::move_in(x));
+ * collect_labels(tree, xr);
+ * xr.move_out(x);
+ *
+ * void collect_labels(Tree* node,
+ * cilk::reducer< cilk::op_list_append<string> >& xr)
* {
- * std::cout << *i << " " << std::endl;
+ * if (node) {
+ * cilk_spawn collect_labels(node->left, xr);
+ * xr->push_back(node->label);
+ * collect_labels(node->right, xr);
+ * cilk_sync;
+ * }
* }
*
- * return 0;
- * }
- *..
+ * @section redlist_monoid The Monoid
+ *
+ * @subsection redlist_monoid_values Value Set
+ *
+ * The value set of a list reducer is the set of values of the class
+ * `std::list<Type, Allocator>`, which we refer to as “the reducer’s list
+ * type”.
+ *
+ * @subsection redlist_monoid_operator Operator
+ *
+ * The operator of a list append reducer is defined as
+ *
+ * x CAT y == (every element of x, followed by every element of y)
+ *
+ * The operator of a list prepend reducer is defined as
+ *
+ * x RCAT y == (every element of y, followed by every element of x)
+ *
+ * @subsection redlist_monoid_identity Identity
+ *
+ * The identity value of a list reducer is the empty list, which is the value
+ * of the expression `std::list<Type, Allocator>([allocator])`.
+ *
+ * @section redlist_operations Operations
+ *
+ * In the operation descriptions below, the type name `List` refers to the
+ * reducer’s string type, `std::list<Type, Allocator>`.
+ *
+ * @subsection redlist_constructors Constructors
+ *
+ * Any argument list which is valid for a `std::list` constructor is valid for
+ * a list reducer constructor. The usual move-in constructor is also provided:
+ *
+ * reducer(move_in(List& variable))
+ *
+ * A list reducer with no constructor arguments, or with only an allocator
+ * argument, will initially contain the identity value, an empty list.
+ *
+ * @subsection redlist_get_set Set and Get
+ *
+ * r.set_value(const List& value)
+ * const List& = r.get_value() const
+ * r.move_in(List& variable)
+ * r.move_out(List& variable)
+ *
+ * @subsection redlist_view_ops View Operations
+ *
+ * The view of a list append reducer provides the following member functions:
+ *
+ * void push_back(const Type& element)
+ * void insert_back(List::size_type n, const Type& element)
+ * template <typename Iter> void insert_back(Iter first, Iter last)
+ * void splice_back(List& x)
+ * void splice_back(List& x, List::iterator i)
+ * void splice_back(List& x, List::iterator first, List::iterator last)
+ *
+ * The view of a list prepend reducer provides the following member functions:
+ *
+ * void push_front(const Type& element)
+ * void insert_front(List::size_type n, const Type& element)
+ * template <typename Iter> void insert_front(Iter first, Iter last)
+ * void splice_front(List& x)
+ * void splice_front(List& x, List::iterator i)
+ * void splice_front(List& x, List::iterator first, List::iterator last)
+ *
+ * The `push_back` and `push_front` functions are the same as the
+ * corresponding `std::list` functions. The `insert_back`, `splice_back`,
+ * `insert_front`, and `splice_front` functions are the same as the
+ * `std::list` `insert` and `splice` functions, with the first parameter
+ * fixed to the end or beginning of the list, respectively.
+ *
+ * @section redlist_performance Performance Considerations
+ *
+ * An efficient reducer requires that combining the values of two views (using
+ * the view `reduce()` function) be a constant-time operations. Two lists can
+ * be merged in constant time using the `splice()` function if they have the
+ * same allocator. Therefore, the lists for new views are created (by the view
+ * identity constructor) using the same allocator as the list that was created
+ * when the reducer was constructed.
*
- * Operations provided:
- * ====================
+ * The performance of adding elements to a list reducer depends on the view
+ * operations that are used:
*
- * 'reducer_list_prepend' and 'reducer_list_append' support accumulation of an
- * ordered list of items. Lists accumulated in Cilk++ strands will be merged
- * to maintain the order of the lists - the order will be the same as if the
- * application was run on a single core.
+ * * The `push` functions add a single element to the list, and therefore
+ * take constant time.
+ * * An `insert` function that inserts _N_ elements adds each of them
+ * individually, and therefore takes _O(N)_ time.
+ * * A `splice` function that inserts _N_ elements just adjusts a couple of
+ * pointers, and therefore takes constant time, _if the splice is from a
+ * list with the same allocator as the reducer_. Otherwise, it is
+ * equivalent to an `insert`, and takes _O(N)_ time.
*
- * The the current value of the reducer can be gotten and set using the
- * 'get_value', 'get_reference', and 'set_value' methods. As with most
- * reducers, these methods produce deterministic results only if called before
- * the first spawn after creating a 'hyperobject' or when all strands spawned
- * since creating the 'hyperobject' have been synced.
+ * This means that for best performance, if you will be adding elements to a
+ * list reducer in batches, you should `splice` them from a list having the
+ * same allocator as the reducer.
+ *
+ * The reducer `move_in` and `move_out` functions do a constant-time `swap` if
+ * the variable has the same allocator as the reducer, and a linear-time copy
+ * otherwise.
+ *
+ * Note that the allocator of a list reducer is determined when the reducer is
+ * constructed. The following two examples may have very different behavior:
+ *
+ * list<Element, Allocator> a_list;
+ *
+ * reducer< list_append<Element, Allocator> reducer1(move_in(a_list));
+ * ... parallel computation ...
+ * reducer1.move_out(a_list);
+ *
+ * reducer< list_append<Element, Allocator> reducer2;
+ * reducer2.move_in(a_list);
+ * ... parallel computation ...
+ * reducer2.move_out(a_list);
+ *
+ * * `reducer1` will be constructed with the same allocator as `a_list`,
+ * because the list was was specified in the constructor. The `move_in`
+ * and`move_out` can therefore be done with a `swap` in constant time.
+ * * `reducer2` will be constructed with a _default_ allocator,
+ * “`Allocator()`”, which may or may not be the same as the allocator of
+ * `a_list`. Therefore, the `move_in` and `move_out` may have to be done
+ * with a copy in _O(N)_ time.
+ *
+ * (All instances of an allocator type with no internal state (like
+ * `std::allocator`) are “the same”. You only need to worry about the “same
+ * allocator” issue when you create list reducers with custom allocator types.)
+ *
+ * @section redlist_types Type and Operator Requirements
+ *
+ * `std::list<Type, Allocator>` must be a valid type.
*/
-#ifndef REDUCER_LIST_H_INCLUDED
-#define REDUCER_LIST_H_INCLUDED
-#include <cilk/reducer.h>
-#include <list>
+namespace cilk {
-namespace cilk
-{
+namespace internal {
-/**
- * @brief Reducer hyperobject to accumulate a list of elements where elements
- * are added to the end of the list.
+/** @ingroup ReducersList */
+//@{
+
+/** Base class for list append and prepend view classes.
+ *
+ * @note This class provides the definitions that are required for a class
+ * that will be used as the parameter of a @ref list_monoid_base
+ * specialization.
+ *
+ * @tparam Type The list element type (not the list type).
+ * @tparam Allocator The list's allocator class.
+ *
+ * @see ReducersList
+ * @see list_monoid_base
*/
-template<class _Ty,
- class _Ax = std::allocator<_Ty> >
-class reducer_list_append
+template <typename Type, typename Allocator>
+class list_view_base
{
-public:
- /// std::list reducer_list_prepend is based on
- typedef std::list<_Ty, _Ax> list_type;
- /// Type of elements in a reducer_list_prepend
- typedef _Ty list_value_type;
- /// Type of elements in a reducer_list_prepend
- typedef _Ty basic_value_type;
+protected:
+ /// The type of the contained list.
+ typedef std::list<Type, Allocator> list_type;
+
+ /// The list accumulator variable.
+ list_type m_value;
public:
- /// Definition of data view, operation, and identity for reducer_list_append
- struct Monoid: monoid_base<std::list<_Ty, _Ax> >
- {
- static void reduce (std::list<_Ty, _Ax> *left,
- std::list<_Ty, _Ax> *right);
- };
-private:
- reducer<Monoid> imp_;
+ /** @name Monoid support.
+ */
+ //@{
+
+ /// Required by @ref monoid_with_view
+ typedef list_type value_type;
-public:
+ /// Required by @ref list_monoid_base
+ Allocator get_allocator() const
+ {
+ return m_value.get_allocator();
+ }
+
+ //@}
+
+
+ /** @name Constructors.
+ */
+ //@{
+
+ /// Standard list constructor.
+ explicit list_view_base(const Allocator& a = Allocator()) : m_value(a) {}
+ explicit list_view_base(
+ typename list_type::size_type n,
+ const Type& value = Type(),
+ const Allocator& a = Allocator() ) : m_value(n, value, a) {}
+ template <typename Iter>
+ list_view_base(Iter first, Iter last, const Allocator& a = Allocator()) :
+ m_value(first, last, a) {}
+ list_view_base(const list_type& list) : m_value(list) {}
- // Default Constructor - Construct a reducer with an empty list
- reducer_list_append();
+ /// Move-in constructor.
+ explicit list_view_base(move_in_wrapper<value_type> w)
+ : m_value(w.value().get_allocator())
+ {
+ m_value.swap(w.value());
+ }
+
+ //@}
+
+ /** @name Reducer support.
+ */
+ //@{
+
+ /// Required by reducer::move_in()
+ void view_move_in(value_type& v)
+ {
+ if (m_value.get_allocator() == v.get_allocator())
+ // Equal allocators. Do a (fast) swap.
+ m_value.swap(v);
+ else
+ // Unequal allocators. Do a (slow) copy.
+ m_value = v;
+ v.clear();
+ }
+
+ /// Required by reducer::move_out()
+ void view_move_out(value_type& v)
+ {
+ if (m_value.get_allocator() == v.get_allocator())
+ // Equal allocators. Do a (fast) swap.
+ m_value.swap(v);
+ else
+ // Unequal allocators. Do a (slow) copy.
+ v = m_value;
+ m_value.clear();
+ }
+
+ /// Required by reducer::set_value()
+ void view_set_value(const value_type& v) { m_value = v; }
- // Construct a reducer with an initial list
- reducer_list_append(const std::list<_Ty, _Ax> &initial_value);
+ /// Required by reducer::get_value()
+ value_type const& view_get_value() const { return m_value; }
+
+ // Required by legacy wrapper get_reference()
+ value_type & view_get_reference() { return m_value; }
+ value_type const& view_get_reference() const { return m_value; }
+
+ //@}
+};
- // Return a const reference to the current list
- const std::list<_Ty, _Ax> &get_value() const;
- // Return a reference to the current list
- std::list<_Ty, _Ax> &get_reference();
- std::list<_Ty, _Ax> const &get_reference() const;
+/** Base class for list append and prepend monoid classes.
+ *
+ * The key to efficient reducers is that the `identity` operation, which
+ * creates a new per-strand view, and the `reduce` operation, which combines
+ * two per-strand views, must be constant-time operations. Two lists can be
+ * concatenated in constant time only if they have the same allocator.
+ * Therefore, all the per-strand list accumulator variables must be created
+ * with the same allocator as the leftmost view list.
+ *
+ * This means that a list reduction monoid must have a copy of the allocator
+ * of the leftmost view’s list, so that it can use it in the `identity`
+ * operation. This, in turn, requires that list reduction monoids have a
+ * specialized `construct()` function, which constructs the leftmost view
+ * before the monoid, and then passes the leftmost view’s allocator to the
+ * monoid constructor.
+ *
+ * @tparam View The list append or prepend view class.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
+ *
+ * @see ReducersList
+ * @see list_view_base
+ */
+template <typename View, bool Align>
+class list_monoid_base : public monoid_with_view<View, Align>
+{
+ typedef typename View::value_type list_type;
+ typedef typename list_type::allocator_type allocator_type;
+ allocator_type m_allocator;
+
+ using monoid_base<list_type, View>::provisional;
+
+public:
- // Replace the list's contents with the given list
- void set_value(const list_type &value);
+ /** Constructor.
+ *
+ * There is no default constructor for list monoids, because the allocator
+ * must always be specified.
+ *
+ * @param allocator The list allocator to be used when
+ * identity-constructing new views.
+ */
+ list_monoid_base(const allocator_type& allocator = allocator_type()) :
+ m_allocator(allocator) {}
- // Add an element to the end of the list
- void push_back(const _Ty element);
+ /** Create an identity view.
+ *
+ * List view identity constructors take the list allocator as an argument.
+ *
+ * @param v The address of the uninitialized memory in which the view
+ * will be constructed.
+ */
+ void identity(View *v) const { ::new((void*) v) View(m_allocator); }
+
+ /** @name construct functions
+ *
+ * All `construct()` functions first construct the leftmost view, using
+ * the optional @a x1, @a x2, and @a x3 arguments that were passed in from
+ * the reducer constructor. They then call the view’s `get_allocator()`
+ * function to get the list allocator from its contained list, and pass it
+ * to the monoid constructor.
+ */
+ //@{
- reducer_list_append& operator*() { return *this; }
- reducer_list_append const& operator*() const { return *this; }
+ template <typename Monoid>
+ static void construct(Monoid* monoid, View* view)
+ { provisional( new ((void*)view) View() ).confirm_if(
+ new ((void*)monoid) Monoid(view->get_allocator()) ); }
- reducer_list_append* operator->() { return this; }
- reducer_list_append const* operator->() const { return this; }
+ template <typename Monoid, typename T1>
+ static void construct(Monoid* monoid, View* view, const T1& x1)
+ { provisional( new ((void*)view) View(x1) ).confirm_if(
+ new ((void*)monoid) Monoid(view->get_allocator()) ); }
-private:
- // Not copyable
- reducer_list_append(const reducer_list_append&);
- reducer_list_append& operator=(const reducer_list_append&);
+ template <typename Monoid, typename T1, typename T2>
+ static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2)
+ { provisional( new ((void*)view) View(x1, x2) ).confirm_if(
+ new ((void*)monoid) Monoid(view->get_allocator()) ); }
-}; // class reducer_list_append
+ template <typename Monoid, typename T1, typename T2, typename T3>
+ static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2,
+ const T3& x3)
+ { provisional( new ((void*)view) View(x1, x2, x3) ).confirm_if(
+ new ((void*)monoid) Monoid(view->get_allocator()) ); }
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
+ //@}
+};
-// ------------------------------------------
-// template class reducer_list_append::Monoid
-// ------------------------------------------
+//@}
-/**
- * Appends list from "right" reducer_list onto the end of the "left".
- * When done, the "right" reducer_list is empty.
- *
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @param left reducer_list to be reduced into
- * @param right reducer_list to be reduced from
- */
-template<class _Ty, class _Ax>
-void
-reducer_list_append<_Ty, _Ax>::Monoid::reduce(std::list<_Ty, _Ax> *left,
- std::list<_Ty, _Ax> *right)
-{
- left->splice(left->end(), *right);
-}
+} // namespace internal
-/**
- * Default constructor - creates an empty list
- *
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- */
-template<class _Ty, class _Ax>
-reducer_list_append<_Ty, _Ax>::reducer_list_append() :
- imp_()
-{
-}
-/**
- * Construct a reducer_list_append based on a list
- *
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @param initial_value - [in] Inital list
- */
-template<class _Ty, class _Ax>
-reducer_list_append<_Ty, _Ax>::reducer_list_append(const std::list<_Ty, _Ax> &initial_value) :
- imp_(std::list<_Ty, _Ax>(initial_value))
-{
-}
+/** @ingroup ReducersList */
+//@{
-/**
- * Allows read-only access to the list - same as get_reference()
+/** The list append reducer view class.
*
- * @warning If this method is called before the parallel calculation is
- * complete, the list returned by this method will be a partial result.
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_list_append<Type, Allocator> >`. It holds the
+ * accumulator variable for the reduction, and allows only append operations
+ * to be performed on it.
*
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @returns A const reference to the list that is the current contents of this view.
- */
-template<class _Ty, class _Ax>
-const std::list<_Ty, _Ax> &reducer_list_append<_Ty, _Ax>::get_value() const
-{
- return imp_.view();
-}
-
-/**
- * Allows mutable access to list
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `push_back` operation would be used in an expression like
+ * `r->push_back(a)`, where `r` is a list append reducer variable.
*
- * @warning If this method is called before the parallel calculation is
- * complete, the list returned by this method will be a partial result.
+ * @tparam Type The list element type (not the list type).
+ * @tparam Allocator The list allocator type.
*
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @returns A reference to the list that is the current contents of this view.
+ * @see ReducersList
+ * @see op_list_append
*/
-template<class _Ty, class _Ax>
-std::list<_Ty, _Ax> &reducer_list_append<_Ty, _Ax>::get_reference()
+template <class Type,
+ class Allocator = typename std::list<Type>::allocator_type>
+class op_list_append_view : public internal::list_view_base<Type, Allocator>
{
- return imp_.view();
-}
+ typedef internal::list_view_base<Type, Allocator> base;
+ typedef std::list<Type, Allocator> list_type;
+ typedef typename list_type::iterator iterator;
+
+ iterator end() { return this->m_value.end(); }
-/**
- * Allows read-only access to list
- *
- * @warning If this method is called before the parallel calculation is
- * complete, the list returned by this method will be a partial result.
- *
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @returns A const reference to the list that is the current contents of this view
- */
-template<class _Ty, class _Ax>
-const std::list<_Ty, _Ax> &reducer_list_append<_Ty, _Ax>::get_reference() const
-{
- return imp_.view();
-}
+public:
-/**
- * Replace the list's contents
- *
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @param value - The list to replace the current contents of this view
- */
-template<class _Ty, class _Ax>
-void reducer_list_append<_Ty, _Ax>::set_value(const list_type &value)
-{
- // Clean out any value in our list
- imp_.view().clear();
+ /** @name Constructors.
+ *
+ * All op_list_append_view constructors simply pass their arguments on to
+ * the @ref internal::list_view_base base class constructor.
+ *
+ * @ref internal::list_view_base supports all the std::list constructor
+ * forms, as well as the reducer move_in constructor form.
+ */
+ //@{
+
+ op_list_append_view() : base() {}
+
+ template <typename T1>
+ op_list_append_view(const T1& x1) : base(x1) {}
+
+ template <typename T1, typename T2>
+ op_list_append_view(const T1& x1, const T2& x2) : base(x1, x2) {}
+
+ template <typename T1, typename T2, typename T3>
+ op_list_append_view(const T1& x1, const T2& x2, const T3& x3) :
+ base(x1, x2, x3) {}
- // If the new list is empty, we're done
- if (value.empty())
- return;
+ //@}
- // Copy each element into our list
- imp_.view() = value;
-}
+ /** @name View modifier operations.
+ */
+ //@{
+
+ /** Add an element at the end of the list.
+ *
+ * This is equivalent to `list.push_back(element)`
+ */
+ void push_back(const Type& element)
+ { this->m_value.push_back(element); }
-/**
- * Adds an element to the end of the list
- *
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @param element - The element to be added to the end of the list
- */
-template<class _Ty, class _Ax>
-void reducer_list_append<_Ty, _Ax>::push_back(const _Ty element)
-{
- imp_.view().push_back(element);
-}
+ /** Insert elements at the end of the list.
+ *
+ * This is equivalent to `list.insert(list.end(), n, element)`
+ */
+ void insert_back(typename list_type::size_type n, const Type& element)
+ { this->m_value.insert(end(), n, element); }
-/**
- * @brief Reducer hyperobject to accumulate a list of elements where elements are
- * added to the beginning of the list.
- */
-template<class _Ty,
- class _Ax = std::allocator<_Ty> >
-class reducer_list_prepend
-{
-public:
- /// std::list reducer_list_prepend is based on
- typedef std::list<_Ty, _Ax> list_type;
- /// Type of elements in a reducer_list_prepend
- typedef _Ty list_value_type;
- /// Type of elements in a reducer_list_prepend
- typedef _Ty basic_value_type;
+ /** Insert elements at the end of the list.
+ *
+ * This is equivalent to `list.insert(list.end(), first, last)`
+ */
+ template <typename Iter>
+ void insert_back(Iter first, Iter last)
+ { this->m_value.insert(end(), first, last); }
-public:
- /// @brief Definition of data view, operation, and identity for reducer_list_prepend
- struct Monoid: monoid_base<std::list<_Ty, _Ax> >
- {
- static void reduce (std::list<_Ty, _Ax> *left,
- std::list<_Ty, _Ax> *right);
- };
+ /** Splice elements at the end of the list.
+ *
+ * This is equivalent to `list.splice(list.end(), x)`
+ */
+ void splice_back(list_type& x) {
+ if (x.get_allocator() == this->m_value.get_allocator())
+ this->m_value.splice(end(), x);
+ else {
+ insert_back(x.begin(), x.end());
+ x.clear();
+ }
+ }
-private:
- reducer<Monoid> imp_;
+ /** Splice elements at the end of the list.
+ *
+ * This is equivalent to `list.splice(list.end(), x, i)`
+ */
+ void splice_back(list_type& x, iterator i) {
+ if (x.get_allocator() == this->m_value.get_allocator())
+ this->m_value.splice(end(), x, i);
+ else {
+ push_back(*i);
+ x.erase(i);
+ }
+ }
-public:
+ /** Splice elements at the end of the list.
+ *
+ * This is equivalent to `list.splice(list.end(), x, first, last)`
+ */
+ void splice_back(list_type& x, iterator first, iterator last) {
+ if (x.get_allocator() == this->m_value.get_allocator())
+ this->m_value.splice(end(), x, first, last);
+ else {
+ insert_back(first, last);
+ x.erase(first, last);
+ }
+ }
+
+ //@}
- // Default Constructor - Construct a reducer with an empty list
- reducer_list_prepend();
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_list_append monoid to combine
+ * the views of two strands when the right strand merges with the left
+ * one. It appends the value contained in the right-strand view to the
+ * value contained in the left-strand view, and leaves the value in the
+ * right-strand view undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_list_append monoid to implement the
+ * monoid reduce operation.
+ */
+ void reduce(op_list_append_view* right)
+ {
+ __CILKRTS_ASSERT(
+ this->m_value.get_allocator() == right->m_value.get_allocator());
+ this->m_value.splice(end(), right->m_value);
+ }
+};
- // Construct a reducer with an initial list
- reducer_list_prepend(const std::list<_Ty, _Ax> &initial_value);
- // Return a const reference to the current list
- const std::list<_Ty, _Ax> &get_value() const;
+/** The list prepend reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_list_prepend<Type, Allocator> >`. It holds the
+ * accumulator variable for the reduction, and allows only prepend operations
+ * to be performed on it.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `push_front` operation would be used in an expression like
+ * `r->push_front(a)`, where `r` is a list prepend reducer variable.
+ *
+ * @tparam Type The list element type (not the list type).
+ * @tparam Allocator The list allocator type.
+ *
+ * @see ReducersList
+ * @see op_list_prepend
+ */
+template <class Type,
+ class Allocator = typename std::list<Type>::allocator_type>
+class op_list_prepend_view : public internal::list_view_base<Type, Allocator>
+{
+ typedef internal::list_view_base<Type, Allocator> base;
+ typedef std::list<Type, Allocator> list_type;
+ typedef typename list_type::iterator iterator;
+
+ iterator begin() { return this->m_value.begin(); }
- // Return a reference to the current list
- std::list<_Ty, _Ax> &get_reference();
- std::list<_Ty, _Ax> const &get_reference() const;
+public:
- // Replace the list's contents with the given list
- void set_value(const list_type &value);
+ /** @name Constructors.
+ *
+ * All op_list_prepend_view constructors simply pass their arguments on to
+ * the @ref internal::list_view_base base class constructor.
+ *
+ * @ref internal::list_view_base supports all the std::list constructor
+ * forms, as well as the reducer move_in constructor form.
+ *
+ */
+ //@{
+
+ op_list_prepend_view() : base() {}
+
+ template <typename T1>
+ op_list_prepend_view(const T1& x1) : base(x1) {}
+
+ template <typename T1, typename T2>
+ op_list_prepend_view(const T1& x1, const T2& x2) : base(x1, x2) {}
+
+ template <typename T1, typename T2, typename T3>
+ op_list_prepend_view(const T1& x1, const T2& x2, const T3& x3) :
+ base(x1, x2, x3) {}
- // Add an element to the beginning of the list
- void push_front(const _Ty element);
+ //@}
- reducer_list_prepend& operator*() { return *this; }
- reducer_list_prepend const& operator*() const { return *this; }
+ /** @name View modifier operations.
+ */
+ //@{
+
+ /** Add an element at the beginning of the list.
+ *
+ * This is equivalent to `list.push_front(element)`
+ */
+ void push_front(const Type& element)
+ { this->m_value.push_front(element); }
- reducer_list_prepend* operator->() { return this; }
- reducer_list_prepend const* operator->() const { return this; }
+ /** Insert elements at the beginning of the list.
+ *
+ * This is equivalent to `list.insert(list.begin(), n, element)`
+ */
+ void insert_front(typename list_type::size_type n, const Type& element)
+ { this->m_value.insert(begin(), n, element); }
-private:
- // Not copyable
- reducer_list_prepend(const reducer_list_prepend&);
- reducer_list_prepend& operator=(const reducer_list_prepend&);
+ /** Insert elements at the beginning of the list.
+ *
+ * This is equivalent to `list.insert(list.begin(), first, last)`
+ */
+ template <typename Iter>
+ void insert_front(Iter first, Iter last)
+ { this->m_value.insert(begin(), first, last); }
-}; // class reducer_list_prepend
+ /** Splice elements at the beginning of the list.
+ *
+ * This is equivalent to `list.splice(list.begin(), x)`
+ */
+ void splice_front(list_type& x) {
+ if (x.get_allocator() == this->m_value.get_allocator())
+ this->m_value.splice(begin(), x);
+ else {
+ insert_front(x.begin(), x.begin());
+ x.clear();
+ }
+ }
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
+ /** Splice elements at the beginning of the list.
+ *
+ * This is equivalent to `list.splice(list.begin(), x, i)`
+ */
+ void splice_front(list_type& x, iterator i) {
+ if (x.get_allocator() == this->m_value.get_allocator())
+ this->m_value.splice(begin(), x, i);
+ else {
+ push_front(*i);
+ x.erase(i);
+ }
+ }
-// ------------------------------------
-// template class reducer_list_prepend::Monoid
-// ------------------------------------
+ /** Splice elements at the beginning of the list.
+ *
+ * This is equivalent to `list.splice(list.begin(), x, first, last)`
+ */
+ void splice_front(list_type& x, iterator first, iterator last) {
+ if (x.get_allocator() == this->m_value.get_allocator())
+ this->m_value.splice(begin(), x, first, last);
+ else {
+ insert_front(first, last);
+ x.erase(first, last);
+ }
+ }
+
+ //@}
+
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_list_prepend monoid to combine
+ * the views of two strands when the right strand merges with the left
+ * one. It prepends the value contained in the right-strand view to the
+ * value contained in the left-strand view, and leaves the value in the
+ * right-strand view undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_list_prepend monoid to implement the
+ * monoid reduce operation.
+ */
+ /** Reduce operation.
+ *
+ * Required by @ref monoid_base.
+ */
+ void reduce(op_list_prepend_view* right)
+ {
+ __CILKRTS_ASSERT(
+ this->m_value.get_allocator() == right->m_value.get_allocator());
+ this->m_value.splice(begin(), right->m_value);
+ }
+};
-/**
- * Appends list from "right" reducer_list onto the end of the "left".
- * When done, the "right" reducer_list is empty.
- */
-template<class _Ty, class _Ax>
-void
-reducer_list_prepend<_Ty, _Ax>::Monoid::reduce(std::list<_Ty, _Ax> *left,
- std::list<_Ty, _Ax> *right)
-{
- left->splice(left->begin(), *right);
-}
-/**
- * Default constructor - creates an empty list
- */
-template<class _Ty, class _Ax>
-reducer_list_prepend<_Ty, _Ax>::reducer_list_prepend() :
- imp_(std::list<_Ty, _Ax>())
-{
-}
-/**
- * Construct a reducer_list_prepend based on a list.
+/** Monoid class for list append reductions. Instantiate the cilk::reducer
+ * template class with a op_list_append monoid to create a list append reducer
+ * class. For example, to create a list of strings:
+ *
+ * cilk::reducer< cilk::op_list_append<std::string> > r;
*
- * @param initial_value List used to initialize the reducer_list_prepend
+ * @tparam Type The list element type (not the list type).
+ * @tparam Alloc The list allocator type.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
+ *
+ * @see ReducersList
+ * @see op_list_append_view
*/
-template<class _Ty, class _Ax>
-reducer_list_prepend<_Ty, _Ax>::reducer_list_prepend(const std::list<_Ty, _Ax> &initial_value) :
- imp_(std::list<_Ty, _Ax>(initial_value))
+template <typename Type,
+ typename Allocator = typename std::list<Type>::allocator_type,
+ bool Align = false>
+struct op_list_append :
+ public internal::list_monoid_base<op_list_append_view<Type, Allocator>, Align>
{
-}
+ /// Construct with default allocator.
+ op_list_append() {}
+ /// Construct with specified allocator.
+ op_list_append(const Allocator& alloc) :
+ internal::list_monoid_base<op_list_append_view<Type, Allocator>, Align>(alloc) {}
+};
-/**
- * Allows read-only access to the list - same as get_reference()
+/** Monoid class for list prepend reductions. Instantiate the cilk::reducer
+ * template class with a op_list_prepend monoid to create a list prepend
+ * reducer class. For example, to create a list of strings:
+ *
+ * cilk::reducer< cilk::op_list_prepend<std::string> > r;
*
- * @warning If this method is called before the parallel calculation is
- * complete, the list returned by this method will be a partial result.
+ * @tparam Type The list element type (not the list type).
+ * @tparam Alloc The list allocator type.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
*
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @returns A const reference to the list that is the current contents of this view.
+ * @see ReducersList
+ * @see op_list_prepend_view
*/
-template<class _Ty, class _Ax>
-const std::list<_Ty, _Ax> &reducer_list_prepend<_Ty, _Ax>::get_value() const
+template <typename Type,
+ typename Allocator = typename std::list<Type>::allocator_type,
+ bool Align = false>
+struct op_list_prepend :
+ public internal::list_monoid_base<op_list_prepend_view<Type, Allocator>, Align>
{
- return imp_.view();
-}
+ /// Construct with default allocator.
+ op_list_prepend() {}
+ /// Construct with specified allocator.
+ op_list_prepend(const Allocator& alloc) :
+ internal::list_monoid_base<op_list_prepend_view<Type, Allocator>, Align>(alloc) {}
+};
+
-/**
- * Allows mutable access to the list
+/** Deprecated list append reducer wrapper class.
*
- * @warning If this method is called before the parallel calculation is
- * complete, the list returned by this method will be a partial result.
+ * reducer_list_append is the same as
+ * @ref reducer<@ref op_list_append>, except that reducer_list_append is a
+ * proxy for the contained view, so that accumulator variable update
+ * operations can be applied directly to the reducer. For example, an element
+ * is appended to a `reducer<%op_list_append>` with `r->push_back(a)`, but an
+ * element can be appended to a `%reducer_list_append` with `r.push_back(a)`.
*
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @returns A mutable reference to the list that is the current contents of this view.
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_list_append.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_list_append`
+ * and `reducer<%op_list_append>`. This allows incremental code
+ * conversion: old code that used `%reducer_list_append` can pass a
+ * `%reducer_list_append` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_list_append>`, and vice
+ * versa.
+ *
+ * @tparam Type The value type of the list.
+ * @tparam Allocator The allocator type of the list.
+ *
+ * @see op_list_append
+ * @see reducer
+ * @see ReducersList
*/
-template<class _Ty, class _Ax>
-std::list<_Ty, _Ax> &reducer_list_prepend<_Ty, _Ax>::get_reference()
+template <class Type, class Allocator = std::allocator<Type> >
+class reducer_list_append :
+ public reducer<op_list_append<Type, Allocator, true> >
{
- return imp_.view();
-}
+ typedef reducer<op_list_append<Type, Allocator, true> > base;
+ using base::view;
+public:
+
+ /// The reducer’s list type.
+ typedef typename base::value_type list_type;
-/**
- * Allows read-only access to the list
+ /// The list’s element type.
+ typedef Type list_value_type;
+
+ /// The reducer’s primitive component type.
+ typedef Type basic_value_type;
+
+ /// The monoid type.
+ typedef typename base::monoid_type Monoid;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /** Construct a reducer with an empty list.
+ */
+ reducer_list_append() {}
+
+ /** Construct a reducer with a specified initial list value.
+ */
+ reducer_list_append(const std::list<Type, Allocator> &initial_value) :
+ base(initial_value) {}
+
+ //@}
+
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_and_view. */
+ //@{
+
+ /// @copydoc op_list_append_view::push_back(const Type&)
+ void push_back(const Type& element) { view().push_back(element); }
+
+ //@}
+
+ /** Allow mutable access to the list within the current view.
+ *
+ * @warning If this method is called before the parallel calculation is
+ * complete, the list returned by this method will be a partial
+ * result.
+ *
+ * @returns A mutable reference to the list within the current view.
+ */
+ list_type &get_reference() { return view().view_get_reference(); }
+
+ /** Allow read-only access to the list within the current view.
+ *
+ * @warning If this method is called before the parallel calculation is
+ * complete, the list returned by this method will be a partial
+ * result.
+ *
+ * @returns A const reference to the list within the current view.
+ */
+ list_type const &get_reference() const { return view().view_get_reference(); }
+
+ /// @name Dereference
+ //@{
+ /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
+ * Combined with the rule that a wrapper forwards view operations to the
+ * view, this means that view operations can be written the same way on
+ * reducers and wrappers, which is convenient for incrementally
+ * converting code using wrappers to code using reducers. That is:
+ *
+ * reducer< op_list_append<int> > r;
+ * r->push_back(a); // *r returns the view
+ * // push_back is a view member function
+ *
+ * reducer_list_append<int> w;
+ * w->push_back(a); // *w returns the wrapper
+ * // push_back is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
+ reducer_list_append& operator*() { return *this; }
+ reducer_list_append const& operator*() const { return *this; }
+
+ reducer_list_append* operator->() { return this; }
+ reducer_list_append const* operator->() const { return this; }
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_list_append<Type, Allocator, false> >& ()
+ {
+ return *reinterpret_cast<
+ reducer< op_list_append<Type, Allocator, false> >*
+ >(this);
+ }
+ operator const reducer< op_list_append<Type, Allocator, false> >& () const
+ {
+ return *reinterpret_cast<
+ const reducer< op_list_append<Type, Allocator, false> >*
+ >(this);
+ }
+ //@}
+
+};
+
+
+/** Deprecated list prepend reducer wrapper class.
*
- * @warning If this method is called before the parallel calculation is
- * complete, the list returned by this method will be a partial result.
+ * reducer_list_prepend is the same as
+ * @ref reducer<@ref op_list_prepend>, except that reducer_list_prepend is a
+ * proxy for the contained view, so that accumulator variable update operations
+ * can be applied directly to the reducer. For example, an element is prepended
+ * to a `reducer<op_list_prepend>` with `r->push_back(a)`, but an element is
+ * prepended to a `reducer_list_prepend` with `r.push_back(a)`.
*
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @returns A read-only reference to the list that is the current contents of this view.
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_list_prepend.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_list_prepend`
+ * and `reducer<%op_list_prepend>`. This allows incremental code
+ * conversion: old code that used `%reducer_list_prepend` can pass a
+ * `%reducer_list_prepend` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_list_prepend>`, and vice
+ * versa.
+ *
+ * @tparam Type The value type of the list.
+ * @tparam Allocator The allocator type of the list.
+ *
+ * @see op_list_prepend
+ * @see reducer
+ * @see ReducersList
*/
-template<class _Ty, class _Ax>
-const std::list<_Ty, _Ax> &reducer_list_prepend<_Ty, _Ax>::get_reference() const
+template <class Type, class Allocator = std::allocator<Type> >
+class reducer_list_prepend :
+ public reducer<op_list_prepend<Type, Allocator, true> >
{
- return imp_.view();
-}
+ typedef reducer<op_list_prepend<Type, Allocator, true> > base;
+ using base::view;
+public:
+
+ /** The reducer’s list type.
+ */
+ typedef typename base::value_type list_type;
-/**
- * Replace the list's contents
+ /** The list’s element type.
+ */
+ typedef Type list_value_type;
+
+ /** The reducer’s primitive component type.
+ */
+ typedef Type basic_value_type;
+
+ /** The monoid type.
+ */
+ typedef typename base::monoid_type Monoid;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /** Construct a reducer with an empty list.
+ */
+ reducer_list_prepend() {}
+
+ /** Construct a reducer with a specified initial list value.
+ */
+ reducer_list_prepend(const std::list<Type, Allocator> &initial_value) :
+ base(initial_value) {}
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_and_view.
+ */
+ //@{
+
+ /// @copydoc op_list_prepend_view::push_front(const Type&)
+ void push_front(const Type& element) { view().push_front(element); }
+
+ //@}
+
+ /** Allow mutable access to the list within the current view.
+ *
+ * @warning If this method is called before the parallel calculation is
+ * complete, the list returned by this method will be a partial
+ * result.
+ *
+ * @returns A mutable reference to the list within the current view.
+ */
+ list_type &get_reference() { return view().view_get_reference(); }
+
+ /** Allow read-only access to the list within the current view.
+ *
+ * @warning If this method is called before the parallel calculation is
+ * complete, the list returned by this method will be a partial
+ * result.
+ *
+ * @returns A const reference to the list within the current view.
+ */
+ list_type const &get_reference() const { return view().view_get_reference(); }
+
+ /// @name Dereference
+ /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
+ * Combined with the rule that a wrapper forwards view operations to the
+ * view, this means that view operations can be written the same way on
+ * reducers and wrappers, which is convenient for incrementally
+ * converting code using wrappers to code using reducers. That is:
+ *
+ * reducer< op_list_prepend<int> > r;
+ * r->push_front(a); // *r returns the view
+ * // push_front is a view member function
+ *
+ * reducer_list_prepend<int> w;
+ * w->push_front(a); // *w returns the wrapper
+ * // push_front is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
+ reducer_list_prepend& operator*() { return *this; }
+ reducer_list_prepend const& operator*() const { return *this; }
+
+ reducer_list_prepend* operator->() { return this; }
+ reducer_list_prepend const* operator->() const { return this; }
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_list_prepend<Type, Allocator, false> >& ()
+ {
+ return *reinterpret_cast<
+ reducer< op_list_prepend<Type, Allocator, false> >*
+ >(this);
+ }
+ operator const reducer< op_list_prepend<Type, Allocator, false> >& () const
+ {
+ return *reinterpret_cast<
+ const reducer< op_list_prepend<Type, Allocator, false> >*
+ >(this);
+ }
+ //@}
+
+};
+
+/// @cond internal
+
+/** Metafunction specialization for reducer conversion.
*
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @param value - The list to replace the current contents of this view
+ * This specialization of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes the `reducer< op_list_append<Type, Allocator> >`
+ * class to have an `operator reducer_list_append<Type, Allocator>& ()`
+ * conversion operator that statically downcasts the `reducer<op_list_append>`
+ * to the corresponding `reducer_list_append` type. (The reverse conversion,
+ * from `reducer_list_append` to `reducer<op_list_append>`, is just an upcast,
+ * which is provided for free by the language.)
*/
-template<class _Ty, class _Ax>
-void reducer_list_prepend<_Ty, _Ax>::set_value(const list_type &value)
+template <class Type, class Allocator, bool Align>
+struct legacy_reducer_downcast<reducer<op_list_append<Type, Allocator, Align> > >
{
- // Clean out any value in our list
- imp_.view().clear();
-
- // If the new list is empty, we're done
- if (value.empty())
- return;
-
- // Copy each element into our list
- imp_.view() = value;
-}
+ typedef reducer_list_append<Type, Allocator> type;
+};
-/**
- * Add an element to the beginning of the list
+/** Metafunction specialization for reducer conversion.
*
- * @tparam _Ty - Type of the list elements
- * @tparam _Ax - Allocator object used to define the storage allocation
- * model. If not specified, the allocator class template for _Ty is used.
- * @param element Element to be added to the beginning of the list
+ * This specialization of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes the
+ * `reducer< op_list_prepend<Type, Allocator> >` class to have an
+ * `operator reducer_list_prepend<Type, Allocator>& ()` conversion operator
+ * that statically downcasts the `reducer<op_list_prepend>` to the
+ * corresponding `reducer_list_prepend` type. (The reverse conversion, from
+ * `reducer_list_prepend` to `reducer<op_list_prepend>`, is just an upcast,
+ * which is provided for free by the language.)
*/
-template<class _Ty, class _Ax>
-void reducer_list_prepend<_Ty, _Ax>::push_front(const _Ty element)
+template <class Type, class Allocator, bool Align>
+struct legacy_reducer_downcast<reducer<op_list_prepend<Type, Allocator, Align> > >
{
- imp_.view().push_front(element);
-}
+ typedef reducer_list_prepend<Type, Allocator> type;
+};
+
+/// @endcond
+
+//@}
-} // namespace cilk
+} // Close namespace cilk
#endif // REDUCER_LIST_H_INCLUDED
diff --git a/libcilkrts/include/cilk/reducer_max.h b/libcilkrts/include/cilk/reducer_max.h
index 0b1c2960d63..2a5b1bd8934 100644
--- a/libcilkrts/include/cilk/reducer_max.h
+++ b/libcilkrts/include/cilk/reducer_max.h
@@ -1,1016 +1,41 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
- *
- */
-
-/*
- * reducer_max.h
- *
- * Purpose: Reducer hyperobject to retain the max value.
- *
- * Classes: reducer_max<Type, Compare=std::less<Type> >
- * reducer_max_index<Index, Value, Compare=std::less<Type> >
- *
- * Description:
- * ============
- * This component provides reducer-type hyperobject representations that allow
- * the maximum value, or the maximum value and an index, of a group of values to
- * be determined in parallel.
- *
- * Usage Example:
- * ==============
- * Suppose we wish to compute the maximum value in an array of integers.
- *
- * int test()
- * {
- * int a[ARRAY_SIZE];
- * int max = INT_MAX;
- *
- * ...
- *
- * for (int i = 0; i < ARRAY_SIZE; ++i)
- * {
- * if (max < a[i])
- * {
- * max = a[i];
- * }
- * }
- * std::cout << "max = " << max << std::endl;
- *
- * ...
- * }
- *
- * Changing the 'for' to a 'cilk_for' will allow the loop to be run in parallel
- * but will create a data race on the variable 'max'. The race can be resolved
- * by changing 'max' to a 'reducer_max' hyperobject:
- *
- * int test()
- * {
- * int a[ARRAY_SIZE];
- * cilk::reducer_max<int> max(INT_MAX);
- *
- * ...
- *
- * cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
- * {
- * max->calc_max(a[i]);
- * }
- * std::cout << "max = " << max->get_value() << std::endl;
- *
- * ...
- * }
- *
- * A similar loop which calculates both the maximum value and index would be:
- *
- * int test()
- * {
- * int a[ARRAY_SIZE];
- * cilk::reducer_max_index<int, int> rmi(INT_MIN, -1);
- *
- * ...
- *
- * cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
- * {
- * rmi.calc_max(i, a[i]);
- * }
- * std::cout << "max = " << rmi->get_value() <<
- * ", index = " << rmi->get_index() << std::endl;
- *
- * ...
- * }
- *
- *
- * Operations provided:
- * ====================
- * reducer_max and reducer_max_index provide set and get methods that are
- * guaranteed to be deterministic iff they are called prior to the first
- * spawn or after the last sync in a parallel algorithm. When called during
- * execution, the value returned by get_value (and get_index) may differ from
- * run to run depending on how the routine or loop is scheduled. Calling
- * set_value anywhere between the first spawn and the last sync may cause the
- * algorithm to produce non-deterministic results.
- *
- * get_value and get_index return imutable values. The matching get_reference
- * and get_index_reference methods return modifiable references
- *
- * The calc_max method is a comparison operation that sets the reducer to the
- * larger of itself and the object being compared. The max_of routines are
- * provided for convenience:
- *
- * cilk::reducer_max<int> rm;
- *
- * ...
- *
- * rm.calc_max(55); // alternatively: rm = cilk::max_of(rm, 55);
- *
- *
- * Template parameter restrictions:
- * ================================
- * reducer_max and reducer_max_index require that the 'Type' template parameter
- * be DefaultConstructible. The 'Compare' template parameter must
- * implement a strict weak ordering if you want deterministic results.
- *
- * There are no requirements on the 'Index' template parameter of
- * reducer_max_index. All comparisons will be done on the 'Type' value.
- *
- */
-
-#ifndef REDUCER_MAX_H_INCLUDED
-#define REDUCER_MAX_H_INCLUDED
-
-#include <cilk/reducer.h>
-#ifdef __cplusplus
-# include <cstddef>
-# include <functional>
-#else
-# include <stddef.h>
-#endif
-
-#ifdef __cplusplus
-
-/* C++ Interface
- */
-
-namespace cilk {
-
-// Forward declaration
-template <typename Type, typename Compare> class reducer_max;
-
-namespace internal {
- // "PRIVATE" HELPER CLASS - uses the type system to make sure that
- // reducer_max instances aren't copied, but we can still allow statements
- // like *max = cilk::max_of(*max, a[i]);
- template <typename Type, typename Compare>
- class temp_max
- {
- private:
- reducer_max<Type,Compare>* m_reducerPtr;
-
- friend class reducer_max<Type,Compare>;
-
- // Copyable, not assignable
- temp_max& operator=(const temp_max &);
-
- public:
- explicit temp_max(reducer_max<Type,Compare> *reducerPtr);
-
- temp_max calc_max(const Type& x) const;
- };
-
- template <typename Type, typename Compare>
- inline
- temp_max<Type,Compare>
- max_of(const temp_max<Type,Compare>& tmp, const Type& x)
- {
- return tmp.calc_max(x);
- }
-
- template <typename Type, typename Compare>
- inline
- temp_max<Type,Compare>
- max_of(const Type& x, const temp_max<Type,Compare>& tmp)
- {
- return tmp.calc_max(x);
- }
-
-} // end namespace internal
-
-/**
- * @brief Class 'reducer_max' is a hyperobject representation of a value that
- * retains the maximum value of all of the values it sees during its lifetime.
- */
-template <typename Type, typename Compare=std::less<Type> >
-class reducer_max
-{
-public:
- /// Type of data in a reducer_max
- typedef Type basic_value_type;
-
-public:
- /// Internal representation of the per-strand view of the data for
- /// reducer_max
- struct View
- {
- friend class reducer_max<Type,Compare>;
- friend class monoid_base<View>;
-
- public:
- /// Constructs a per-strand view instance, initializing it to the
- /// identity value.
- View();
-
- /// Constructs a per-strand view instance, initializing it to the
- /// specified value.
- View(const Type& v);
-
- /// Sets this view to the specified value.
- void set(const Type &v);
-
- /// Returns current value for this view
- const Type &get_value() const;
-
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- private:
- Type m_value;
- bool m_isSet;
- };
-
-public:
- /// Definition of data view, operation, and identity for reducer_max
- struct Monoid: monoid_base<View>
- {
- Compare m_comp;
- Monoid() : m_comp() {}
- Monoid(const Compare& comp) : m_comp(comp) {}
- void take_max(View *left, const Type &v) const;
- void reduce(View *left, View *right) const;
- };
-private:
- // Hyperobject to serve up views
- reducer<Monoid> m_imp;
-
-public:
- typedef internal::temp_max<Type,Compare> temp_max;
-
- friend class internal::temp_max<Type,Compare>;
-
-public:
- /// Construct a 'reducer_max' object with a value of 'Type()'.
- reducer_max();
-
- /// Construct a 'reducer_max' object with the specified initial value.
- explicit reducer_max(const Type& initial_value);
-
- /// Construct a 'reducer_max' object with the specified initial value and
- /// comparator.
- reducer_max(const Type& initial_value, const Compare& comp);
-
- /// Return an immutable reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Type& get_value() const;
-
- /// Return a reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Type& get_reference();
-
- /// Return a reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Type const& get_reference() const;
-
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- /// Set the value of this object.
- ///
- /// @warning Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Type& value);
-
- /// Compare the current value with the one passed and retain the
- /// larger of the two. Return this reducer.
- reducer_max& calc_max(const Type& value);
-
- /// Merge the result of a 'max' operation into this object. The
- /// operation must involve this hyperobject, i.e., x = max_of(x, 5);
- reducer_max& operator=(const temp_max &temp);
-
- reducer_max& operator*() { return *this; }
- reducer_max const& operator*() const { return *this; }
-
- reducer_max* operator->() { return this; }
- reducer_max const* operator->() const { return this; }
-
-private:
- // Not copyable
- reducer_max(const reducer_max&);
- reducer_max& operator=(const reducer_max&);
-};
-
-// Global "cilk::max_of" functions
-
-using internal::max_of;
-
-template <typename Type, typename Compare>
-inline
-internal::temp_max<Type,Compare>
-max_of(reducer_max<Type,Compare>& r, const Type& x)
-{
- return internal::temp_max<Type,Compare>(&r.calc_max(x));
-}
-
-template <typename Type, typename Compare>
-inline
-internal::temp_max<Type,Compare>
-max_of(const Type& x, reducer_max<Type,Compare>& r)
-{
- return internal::temp_max<Type,Compare>(&r.calc_max(x));
-}
-
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// --------------------------------
-// template class reducer_max::View
-// --------------------------------
-
-template<typename Type, typename Compare>
-reducer_max<Type,Compare>::View::View()
- : m_value()
- , m_isSet(false)
-{
-}
-
-template<typename Type, typename Compare>
-reducer_max<Type,Compare>::View::View(const Type& v)
- : m_value(v)
- , m_isSet(true)
-{
-}
-
-template<typename Type, typename Compare>
-void reducer_max<Type,Compare>::View::set(const Type &v)
-{
- m_value = v;
- m_isSet = true;
-}
-
-template<typename Type, typename Compare>
-const Type &reducer_max<Type,Compare>::View::get_value() const
-{
- return m_value;
-}
-
-template<typename Type, typename Compare>
-bool reducer_max<Type,Compare>::View::is_set() const
-{
- return m_isSet;
-}
-
-// -------------------------------------------
-// template class reducer_max::Monoid
-// -------------------------------------------
-
-template<typename Type, typename Compare>
-void
-reducer_max<Type,Compare>::Monoid::take_max(View *left, const Type &v) const
-{
- if (! left->m_isSet || m_comp(left->m_value,v))
- {
- left->m_value = v;
- left->m_isSet = true;
- }
-}
-
-template<typename Type, typename Compare>
-void
-reducer_max<Type,Compare>::Monoid::reduce(View *left, View *right) const
-{
- if (right->m_isSet)
- {
- // Take the max of the two values
- take_max (left, right->m_value);
- }
-}
-
-// --------------------------------------------
-// temp_max private helper class implementation
-// --------------------------------------------
-
-template <typename Type, typename Compare> inline
-internal::temp_max<Type,Compare>::temp_max(
- reducer_max<Type,Compare> *reducerPtr)
- : m_reducerPtr(reducerPtr)
-{
-}
-
-template <typename Type, typename Compare> inline
-internal::temp_max<Type,Compare>
-internal::temp_max<Type,Compare>::calc_max(const Type& x) const
-{
- m_reducerPtr->calc_max(x);
- return *this;
-}
-
-// --------------------------
-// template class reducer_max
-// --------------------------
-
-// Default constructor
-template <typename Type, typename Compare>
-inline
-reducer_max<Type,Compare>::reducer_max()
- : m_imp()
-{
-}
-
-template <typename Type, typename Compare>
-inline
-reducer_max<Type,Compare>::reducer_max(const Type& initial_value)
- : m_imp(initial_value)
-{
-}
-
-template <typename Type, typename Compare>
-inline
-reducer_max<Type,Compare>::reducer_max(const Type& initial_value,
- const Compare& comp)
- : m_imp(Monoid(comp), initial_value)
-{
-}
-
-template <typename Type, typename Compare>
-inline
-const Type& reducer_max<Type,Compare>::get_value() const
-{
- const View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Type, typename Compare>
-inline
-Type& reducer_max<Type,Compare>::get_reference()
-{
- View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Type, typename Compare>
-inline
-Type const& reducer_max<Type,Compare>::get_reference() const
-{
- View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Type, typename Compare>
-inline
-bool reducer_max<Type,Compare>::is_set() const
-{
- const View &v = m_imp.view();
-
- return v.m_isSet;
-}
-
-template <typename Type, typename Compare>
-inline
-void reducer_max<Type,Compare>::set_value(const Type& value)
-{
- View &v = m_imp.view();
-
- v.set(value);
-}
-
-template <typename Type, typename Compare> inline
-reducer_max<Type,Compare>&
-reducer_max<Type,Compare>::calc_max(const Type& value)
-{
- View &v = m_imp.view();
- m_imp.monoid().take_max(&v, value);
- return *this;
-}
-
-template <typename Type, typename Compare>
-reducer_max<Type,Compare>&
-reducer_max<Type,Compare>::operator=(const temp_max& temp)
-{
- // Noop. Just test that temp is the same as this.
- __CILKRTS_ASSERT(this == temp.m_reducerPtr);
- return *this;
-}
-
-/*
- * @brief Class 'reducer_max_index' is a hyperobject representation of an
- * index and corresponding value representing the maximum such pair this
- * object has seen.
- */
-template <typename Index, typename Value, typename Compare=std::less<Value> >
-class reducer_max_index
-{
-public:
- /// Type of data in a reducer_max
- typedef Value basic_value_type;
-
-public:
- /// Internal representation of the per-strand view of the data for
- /// reducer_max_index
- struct View
- {
- friend class reducer_max_index<Index, Value, Compare>;
- friend class monoid_base<View>;
-
- public:
- /// Constructs a per-strand view instance, initializing it to the
- /// identity value.
- View();
-
- /// Construct a per-strand view instance, initializing it to the
- /// specified value and index.
- View(const Index &i, const Value &v);
-
- /// Sets this view to a specified value and index
- void set(const Index &i, const Value &v);
-
- /// Returns current index for this view
- const Index &get_index() const;
-
- /// Returns current value for this view
- const Value &get_value() const;
-
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- private:
- Index m_index;
- Value m_value;
- bool m_isSet;
- };
-
-public:
- /// Definition of data view, operation, and identity for reducer_max_index
- struct Monoid: monoid_base<View>
- {
- Compare m_comp;
- Monoid() : m_comp() {}
- Monoid(const Compare& comp) : m_comp(comp) {}
- void take_max(View *left, const Index &i, const Value &v) const;
- void reduce (View *left, View *right) const;
- };
-
-private:
- // Hyperobject to serve up views
- reducer<Monoid> m_imp;
-
-public:
- /// Construct a 'reducer_max_index' object with a value of 'Type()'.
- reducer_max_index();
-
- /// Construct a 'reducer_max_index' object with the specified initial
- /// value and index.
- reducer_max_index(const Index& initial_index,
- const Value& initial_value);
-
- /// Construct a 'reducer_max_index' object with the specified initial
- /// value, index, and comparator.
- reducer_max_index(const Index& initial_index,
- const Value& initial_value,
- const Compare& comp);
-
- /// Return an immutable reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Value& get_value() const;
-
- /// Return a reference to the value of this object
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Value& get_reference();
-
- /// Return a reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Value const& get_reference() const;
-
- /// Return an immutable reference to the maximum index.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Index& get_index() const;
-
- /// Return a mutable reference to the maximum index
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Index& get_index_reference();
-
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- /// Set the index/value of this object.
- ///
- /// @warning Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Index& index,
- const Value& value);
-
- /// Compare the current value with the one passed and retain the
- /// larger of the two. Return this reducer.
- reducer_max_index& calc_max(const Index& index, const Value& value);
-
- // DEPRECATED. Use calc_max instead.
- void max_of(const Index& index, const Value& value) {calc_max(index,value);}
-
- reducer_max_index& operator*() { return *this; }
- reducer_max_index const& operator*() const { return *this; }
-
- reducer_max_index* operator->() { return this; }
- reducer_max_index const* operator->() const { return this; }
-
-private:
- // Not copyable
- reducer_max_index(const reducer_max_index&);
- reducer_max_index& operator=(const reducer_max_index&);
-};
-
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// --------------------------------
-// template class reducer_max::View
-// --------------------------------
-
-template<typename Index, typename Value, typename Compare>
-reducer_max_index<Index, Value, Compare>::View::View()
- : m_index()
- , m_value()
- , m_isSet(false)
-{
-}
-
-template<typename Index, typename Value, typename Compare>
-reducer_max_index<Index, Value, Compare>::View::View(const Index &i,
- const Value &v)
- : m_index(i)
- , m_value(v)
- , m_isSet(true)
-{
-}
-
-template<typename Index, typename Value, typename Compare>
-void
-reducer_max_index<Index, Value, Compare>::View::set(const Index &i,
- const Value &v)
-{
- m_index = i;
- m_value = v;
- m_isSet = true;
-}
-
-template<typename Index, typename Value, typename Compare>
-const Index &
-reducer_max_index<Index, Value, Compare>::View::get_index() const
-{
- return m_index;
-}
-
-template<typename Index, typename Value, typename Compare>
-const Value &
-reducer_max_index<Index, Value, Compare>::View::get_value() const
-{
- return m_value;
-}
-
-template<typename Index, typename Value, typename Compare>
-bool
-reducer_max_index<Index, Value, Compare>::View::is_set() const
-{
- return m_isSet;
-}
-
-// -------------------------------------------
-// template class reducer_max::Monoid
-// -------------------------------------------
-
-template<typename Index, typename Value, typename Compare>
-void
-reducer_max_index<Index,Value,Compare>::Monoid::take_max(View *left,
- const Index &i,
- const Value &v) const
-{
- if (! left->m_isSet || m_comp(left->m_value,v))
- {
- left->m_index = i;
- left->m_value = v;
- left->m_isSet = true;
- }
-}
-
-template<typename Index, typename Value, typename Compare>
-void
-reducer_max_index<Index, Value, Compare>::Monoid::reduce(View *left,
- View *right) const
-{
- if (right->m_isSet)
- take_max (left, right->m_index, right->m_value);
-}
-
-// --------------------------------
-// template class reducer_max_index
-// --------------------------------
-
-// Default constructor
-template <typename Index, typename Value, typename Compare>
-inline
-reducer_max_index<Index, Value, Compare>::reducer_max_index()
- : m_imp()
-{
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-reducer_max_index<Index, Value, Compare>::reducer_max_index(
- const Index& initial_index, const Value& initial_value)
- : m_imp(initial_index, initial_value)
-{
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-reducer_max_index<Index, Value, Compare>::reducer_max_index(
- const Index& initial_index,
- const Value& initial_value,
- const Compare& comp)
- : m_imp(Monoid(comp), initial_index, initial_value)
-{
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-reducer_max_index<Index, Value, Compare>&
-reducer_max_index<Index, Value, Compare>::calc_max(const Index& index,
- const Value& value)
-{
- View &v = m_imp.view();
- m_imp.monoid().take_max(&v, index, value);
- return *this;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-const Value& reducer_max_index<Index, Value, Compare>::get_value() const
-{
- const View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-Value& reducer_max_index<Index, Value, Compare>::get_reference()
-{
- View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-Value const& reducer_max_index<Index, Value, Compare>::get_reference() const
-{
- const View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-const Index& reducer_max_index<Index, Value, Compare>::get_index() const
-{
- const View &v = m_imp.view();
-
- return v.m_index;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-Index& reducer_max_index<Index, Value, Compare>::get_index_reference()
-{
- View &v = m_imp.view();
-
- return v.m_index;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-bool reducer_max_index<Index, Value, Compare>::is_set() const
-{
- const View &v = m_imp.view();
-
- return v.m_isSet;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-void reducer_max_index<Index, Value, Compare>::set_value(const Index& index,
- const Value& value)
-{
- View &v = m_imp.view();
-
- return v.set(index, value);
-}
-
-} // namespace cilk
-
-#endif // __cplusplus
-
-/* C Interface
- */
-
-__CILKRTS_BEGIN_EXTERN_C
-
-/* REDUCER_MAX */
-
-#define CILK_C_REDUCER_MAX_TYPE(tn) \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_,tn)
-#define CILK_C_REDUCER_MAX(obj,tn,v) \
- CILK_C_REDUCER_MAX_TYPE(tn) obj = \
- CILK_C_INIT_REDUCER(_Typeof(obj.value), \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_reduce_,tn), \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_identity_,tn), \
- __cilkrts_hyperobject_noop_destroy, v)
-
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MAX_INSTANCE(t,tn) \
- typedef CILK_C_DECLARE_REDUCER(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_,tn); \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn);
-
-/* CILK_C_REDUCER_MAX_CALC(reducer, v) performs the reducer lookup
- * AND calc_max operation, leaving the current view with the max of the
- * previous value and v.
- */
-#define CILK_C_REDUCER_MAX_CALC(reducer, v) do { \
- _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
- _Typeof(v) __value = (v); \
- if (*view < __value) { \
- *view = __value; \
- } } while (0)
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_MAX_INSTANCE(char,char);
-CILK_C_REDUCER_MAX_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_MAX_INSTANCE(signed char,schar);
-CILK_C_REDUCER_MAX_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_MAX_INSTANCE(short,short);
-CILK_C_REDUCER_MAX_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_MAX_INSTANCE(int,int);
-CILK_C_REDUCER_MAX_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_MAX_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_MAX_INSTANCE(long,long);
-CILK_C_REDUCER_MAX_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_MAX_INSTANCE(long long,longlong);
-CILK_C_REDUCER_MAX_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_MAX_INSTANCE(float,float);
-CILK_C_REDUCER_MAX_INSTANCE(double,double);
-CILK_C_REDUCER_MAX_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MAX_IMP(t,tn,id) \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r) \
- { if (*(t*)l < *(t*)r) *(t*)l = *(t*)r; } \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn) \
- { *(t*)v = id; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric types. The contents of reducer_max.c are as follows:
-
-CILK_C_REDUCER_MAX_IMP(char,char,CHAR_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned char,uchar,0)
-CILK_C_REDUCER_MAX_IMP(signed char,schar,SCHAR_MIN)
-CILK_C_REDUCER_MAX_IMP(wchar_t,wchar_t,WCHAR_MIN)
-CILK_C_REDUCER_MAX_IMP(short,short,SHRT_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned short,ushort,0)
-CILK_C_REDUCER_MAX_IMP(int,int,INT_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned int,uint,0)
-CILK_C_REDUCER_MAX_IMP(unsigned int,unsigned,0) // alternate name
-CILK_C_REDUCER_MAX_IMP(long,long,LONG_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned long,ulong,0)
-CILK_C_REDUCER_MAX_IMP(long long,longlong,LLONG_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned long long,ulonglong,0)
-CILK_C_REDUCER_MAX_IMP(float,float,-HUGE_VALF)
-CILK_C_REDUCER_MAX_IMP(double,double,-HUGE_VAL)
-CILK_C_REDUCER_MAX_IMP(long double,longdouble,-HUGE_VALL)
-
-*/
-
-/* REDUCER_MAX_INDEX */
-
-#define CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn) \
- typedef struct { \
- __STDNS ptrdiff_t index; \
- t value; \
- } __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)
-
-#define CILK_C_REDUCER_MAX_INDEX_TYPE(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_index_,t)
-#define CILK_C_REDUCER_MAX_INDEX(obj,t,v) \
- CILK_C_REDUCER_MAX_INDEX_TYPE(t) obj = \
- CILK_C_INIT_REDUCER(_Typeof(obj.value), \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_index_reduce_,t), \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_index_identity_,t), \
- __cilkrts_hyperobject_noop_destroy, { 0, v })
-
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t,tn) \
- CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn); \
- typedef CILK_C_DECLARE_REDUCER( \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)) \
- __CILKRTS_MKIDENT(cilk_c_reducer_max_index_,tn); \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn);
-
-/* CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v) performs the reducer lookup
- * AND calc_max operation, leaving the current view with the max of the
- * previous value and v.
- */
-#define CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v) do { \
- _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
- _Typeof(v) __value = (v); \
- if (view->value < __value) { \
- view->index = (i); \
- view->value = __value; \
- } } while (0)
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(char,char);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(signed char,schar);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(short,short);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(int,int);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(long,long);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(long long,longlong);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(float,float);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(double,double);
-CILK_C_REDUCER_MAX_INDEX_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MAX_INDEX_IMP(t,tn,id) \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r) \
- { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \
- if (((view_t*)l)->value < ((view_t*)r)->value) \
- *(view_t*)l = *(view_t*)r; } \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn) \
- { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \
- ((view_t*)v)->index = 0; ((view_t*)v)->value = id; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric tyeps. The contents of reducer_max_index.c are as follows:
-
-CILK_C_REDUCER_MAX_INDEX_IMP(char,char,CHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned char,uchar,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(signed char,schar,SCHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(wchar_t,wchar_t,WCHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(short,short,SHRT_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned short,ushort,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(int,int,INT_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,uint,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,unsigned,0) // alternate name
-CILK_C_REDUCER_MAX_INDEX_IMP(long,long,LONG_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long,ulong,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(long long,longlong,LLONG_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long long,ulonglong,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(float,float,-HUGE_VALF)
-CILK_C_REDUCER_MAX_INDEX_IMP(double,double,-HUGE_VAL)
-CILK_C_REDUCER_MAX_INDEX_IMP(long double,longdouble,-HUGE_VALL)
-
-*/
-
-
-__CILKRTS_END_EXTERN_C
-
-#endif // defined REDUCER_MAX_H_INCLUDED
+/* reducer_max.h -*- C++ -*-
+ *
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/** @file reducer_max.h
+ *
+ * @brief Defines classes for doing parallel maximum reductions.
+ *
+ * @ingroup ReducersMinMax
+ *
+ * @see ReducersMinMax
+ */
+
+#include "reducer_min_max.h"
diff --git a/libcilkrts/include/cilk/reducer_min.h b/libcilkrts/include/cilk/reducer_min.h
index 22694b101d3..52dea246d9e 100644
--- a/libcilkrts/include/cilk/reducer_min.h
+++ b/libcilkrts/include/cilk/reducer_min.h
@@ -1,1015 +1,41 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
- *
- */
-
-/*
- * reducer_min.h
- *
- * Purpose: Reducer hyperobject to retain the min value.
- *
- * Classes: reducer_min<Type, Compare=std::less<Type> >
- * reducer_min_index<Index, Value, Compare=std::less<Type> >
- *
- * Description:
- * ============
- * This component provides reducer-type hyperobject representations that allow
- * the minimum value, or the minimum value and an index, of a group of values to
- * be determined in parallel.
- *
- * Usage Example:
- * ==============
- * Suppose we wish to compute the minimum value in an array of integers.
- *
- * int test()
- * {
- * int a[ARRAY_SIZE];
- * int min = INT_MIN;
- *
- * ...
- *
- * for (int i = 0; i < ARRAY_SIZE; ++i)
- * {
- * if (a[i] < min)
- * {
- * min = a[i];
- * }
- * }
- * std::cout << "min = " << min << std::endl;
- *
- * ...
- * }
- *
- * Changing the 'for' to a 'cilk_for' will allow the loop to be run in parallel
- * but will create a data race on the variable 'min'. The race can be resolved
- * by changing 'min' to a 'reducer_min' hyperobject:
- *
- * int test()
- * {
- * int a[ARRAY_SIZE];
- * cilk::reducer_min<int> min(INT_MIN);
- *
- * ...
- *
- * cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
- * {
- * min->calc_min(a[i]);
- * }
- * std::cout << "min = " << min->get_value() << std::endl;
- *
- * ...
- * }
- *
- * A similar loop which calculates both the minimum value and index would be:
- *
- * int test()
- * {
- * int a[ARRAY_SIZE];
- * cilk::reducer_min_index<int, int> rmi(INT_MAX, -1);
- *
- * ...
- *
- * cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
- * {
- * rmi->calc_min(i, a[i]);
- * }
- * std::cout << "min = " << rmi->get_value() <<
- * ", index = " << rmi->get_index() << std::endl;
- *
- * ...
- * }
- *
- *
- * Operations provided:
- * ====================
- * reducer_min and reducer_min_index provide set and get methods that are
- * guaranteed to be deterministic iff they are called prior to the first
- * spawn or after the last sync in a parallel algorithm. When called during
- * execution, the value returned by get_value (and get_index) may differ from
- * run to run depending on how the routine or loop is scheduled. Calling
- * set_value anywhere between the first spawn and the last sync may cause the
- * algorithm to produce non-deterministic results.
- *
- * get_value and get_index return imutable values. The matching get_reference
- * and get_index_reference methods return modifiable references
- *
- * The calc_min method is a comparison operation that sets the reducer to the
- * smaller of itself and the object being compared. The min_of routines are
- * provided for convenience:
- *
- * cilk::reducer_min<int> rm;
- *
- * ...
- *
- * rm.calc_min(55); // alternatively: rm = cilk::min_of(rm, 55);
- *
- *
- * Template parameter restrictions:
- * ================================
- * reducer_min and reducer_min_index require that the 'Type' template parameter
- * be DefaultConstructible. The 'Compare' template parameter must
- * implement a strict weak ordering if you want deterministic results.
- *
- * There are no requirements on the 'Index' template parameter of
- * reducer_min_index. All comparisons will be done on the 'Type' value.
- *
- */
-
-#ifndef REDUCER_MIN_H_INCLUDED
-#define REDUCER_MIN_H_INCLUDED
-
-#include <cilk/reducer.h>
-#ifdef __cplusplus
-# include <cstddef>
-# include <functional>
-#else
-# include <stddef.h>
-#endif
-
-#ifdef __cplusplus
-
-/* C++ Interface
- */
-
-namespace cilk {
-
-// Forward declaration
-template <typename Type, typename Compare> class reducer_min;
-
-namespace internal {
- // "PRIVATE" HELPER CLASS - uses the type system to make sure that
- // reducer_max instances aren't copied, but we can still allow statements
- // like *min = cilk::min_of(*min, a[i]);
- template <typename Type, typename Compare>
- class temp_min
- {
- private:
- reducer_min<Type,Compare>* m_reducerPtr;
-
- friend class reducer_min<Type,Compare>;
-
- // Copyable, not assignable
- temp_min& operator=(const temp_min &);
-
- public:
- explicit temp_min(reducer_min<Type,Compare> *reducerPtr);
-
- temp_min calc_min(const Type& x) const;
- };
-
- template <typename Type, typename Compare>
- inline
- temp_min<Type,Compare>
- min_of(const internal::temp_min<Type,Compare>& tmp, const Type& x)
- {
- return tmp.calc_min(x);
- }
-
- template <typename Type, typename Compare>
- inline
- temp_min<Type,Compare>
- min_of(const Type& x, const internal::temp_min<Type,Compare>& tmp)
- {
- return tmp.calc_min(x);
- }
-
-} // end namespace internal
-
-/**
- * @brief Class 'reducer_min' is a hyperobject representation of a value that
- * retains the minimum value of all of the values it sees during its lifetime.
- */
-template <typename Type, typename Compare=std::less<Type> >
-class reducer_min
-{
-public:
- /// Type of data in a reducer_min
- typedef Type basic_value_type;
-
-public:
- /// Internal representation of the per-strand view of the data for
- /// reducer_min
- struct View
- {
- friend class reducer_min<Type,Compare>;
- friend class monoid_base<View>;
-
- public:
- /// Constructs a per-strand view instance, initializing it to the
- /// identity value.
- View();
-
- /// Constructs a per-strand view instance, initializing it to the
- /// specified value.
- View(const Type& v);
-
- /// Sets this view to the specified value.
- void set(const Type &v);
-
- /// Returns current value for this view
- const Type &get_value() const;
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- private:
- Type m_value;
- bool m_isSet;
- };
-
-public:
- /// Definition of data view, operation, and identity for reducer_min
- struct Monoid: monoid_base<View>
- {
- Compare m_comp;
- Monoid() : m_comp() {}
- Monoid(const Compare& comp) : m_comp(comp) {}
- void take_min(View *left, const Type &v) const;
- void reduce(View *left, View *right) const;
- };
-private:
- // Hyperobject to serve up views
- reducer<Monoid> m_imp;
-
-public:
- typedef internal::temp_min<Type,Compare> temp_min;
-
- friend class internal::temp_min<Type,Compare>;
-
-public:
- /// Construct a 'reducer_min' object with a value of 'Type()'.
- reducer_min();
-
- /// Construct a 'reducer_min' object with the specified initial value.
- explicit reducer_min(const Type& initial_value);
-
- /// Construct a 'reducer_min' object with the specified initial value and
- /// comparator.
- reducer_min(const Type& initial_value, const Compare& comp);
-
- /// Return an immutable reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Type& get_value() const;
-
- /// Return a reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Type& get_reference();
-
- /// Return a reference to the value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Type const& get_reference() const;
-
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- /// Set the value of this object.
- ///
- /// @warning Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Type& value);
-
- /// Compare the current value with the one passed and retain the
- /// smaller of the two. Return this reducer.
- reducer_min& calc_min(const Type& value);
-
- /// Merge the result of a 'min' operation into this object. The
- /// operation must involve this hyperobject, i.e., x = min_of(x, 5);
- reducer_min& operator=(const temp_min &temp);
-
- reducer_min& operator*() { return *this; }
- reducer_min const& operator*() const { return *this; }
-
- reducer_min* operator->() { return this; }
- reducer_min const* operator->() const { return this; }
-
-private:
- // Not copyable
- reducer_min(const reducer_min&);
- reducer_min& operator=(const reducer_min&);
-};
-
-// Global "cilk::min_of" functions
-
-using internal::min_of;
-
-template <typename Type, typename Compare>
-inline
-internal::temp_min<Type,Compare>
-min_of(reducer_min<Type,Compare>& r, const Type& x)
-{
- return internal::temp_min<Type,Compare>(&r.calc_min(x));
-}
-
-template <typename Type, typename Compare>
-inline
-internal::temp_min<Type,Compare>
-min_of(const Type& x, reducer_min<Type,Compare>& r)
-{
- return internal::temp_min<Type,Compare>(&r.calc_min(x));
-}
-
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// --------------------------------
-// template class reducer_min::View
-// --------------------------------
-
-template<typename Type, typename Compare>
-reducer_min<Type,Compare>::View::View()
- : m_value()
- , m_isSet(false)
-{
-}
-
-template<typename Type, typename Compare>
-reducer_min<Type,Compare>::View::View(const Type& v)
- : m_value(v)
- , m_isSet(true)
-{
-}
-
-template<typename Type, typename Compare>
-void reducer_min<Type,Compare>::View::set(const Type &v)
-{
- m_value = v;
- m_isSet = true;
-}
-
-template<typename Type, typename Compare>
-const Type &reducer_min<Type,Compare>::View::get_value() const
-{
- return m_value;
-}
-
-template<typename Type, typename Compare>
-bool reducer_min<Type,Compare>::View::is_set() const
-{
- return m_isSet;
-}
-
-// -------------------------------------------
-// template class reducer_min::Monoid
-// -------------------------------------------
-
-template<typename Type, typename Compare>
-void
-reducer_min<Type,Compare>::Monoid::take_min(View *left, const Type &v) const
-{
- if (! left->m_isSet || m_comp(v,left->m_value))
- {
- left->m_value = v;
- left->m_isSet = true;
- }
-}
-
-template<typename Type, typename Compare>
-void
-reducer_min<Type,Compare>::Monoid::reduce(View *left, View *right) const
-{
- if (right->m_isSet)
- {
- // Take the min of the two values
- take_min (left, right->m_value);
- }
-}
-
-// -----------------------------------
-// temp_min private helper class implementation
-// -----------------------------------
-
-template <typename Type, typename Compare> inline
-internal::temp_min<Type,Compare>::temp_min(
- reducer_min<Type,Compare> *reducerPtr)
- : m_reducerPtr(reducerPtr)
-{
-}
-
-template <typename Type, typename Compare> inline
-internal::temp_min<Type,Compare>
-internal::temp_min<Type,Compare>::calc_min(const Type& x) const
-{
- m_reducerPtr->calc_min(x);
- return *this;
-}
-
-// --------------------------
-// template class reducer_min
-// --------------------------
-
-// Default constructor
-template <typename Type, typename Compare>
-inline
-reducer_min<Type,Compare>::reducer_min()
- : m_imp()
-{
-}
-
-template <typename Type, typename Compare>
-inline
-reducer_min<Type,Compare>::reducer_min(const Type& initial_value)
- : m_imp(initial_value)
-{
-}
-
-template <typename Type, typename Compare>
-inline
-reducer_min<Type,Compare>::reducer_min(const Type& initial_value,
- const Compare& comp)
- : m_imp(Monoid(comp), initial_value)
-{
-}
-
-template <typename Type, typename Compare>
-inline
-const Type& reducer_min<Type,Compare>::get_value() const
-{
- const View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Type, typename Compare>
-inline
-Type& reducer_min<Type,Compare>::get_reference()
-{
- View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Type, typename Compare>
-inline
-Type const& reducer_min<Type,Compare>::get_reference() const
-{
- View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Type, typename Compare>
-inline
-bool reducer_min<Type,Compare>::is_set() const
-{
- const View &v = m_imp.view();
-
- return v.m_isSet;
-}
-
-template <typename Type, typename Compare>
-inline
-void reducer_min<Type,Compare>::set_value(const Type& value)
-{
- View &v = m_imp.view();
-
- v.set(value);
-}
-
-template <typename Type, typename Compare> inline
-reducer_min<Type,Compare>&
-reducer_min<Type,Compare>::calc_min(const Type& value)
-{
- View &v = m_imp.view();
-
- // If no previous value has been set, always set the value
-
- m_imp.monoid().take_min(&v, value);
-
- return *this;
-}
-
-template <typename Type, typename Compare>
-reducer_min<Type,Compare>&
-reducer_min<Type,Compare>::operator=(const temp_min& temp)
-{
- // Noop. Just test that temp is the same as this.
- __CILKRTS_ASSERT(this == temp.m_reducerPtr);
- return *this;
-}
-
-
-/**
- * @brief Class 'reducer_min_index' is a hyperobject representation of an
- * index and corresponding value representing the minimum such pair this
- * object has seen.
- */
-template <typename Index, typename Value, typename Compare=std::less<Value> >
-class reducer_min_index
-{
-public:
- /// Type of data in a reducer_min
- typedef Value basic_value_type;
-
-public:
- /// Internal representation of the per-strand view of the data for
- /// reducer_min_index
- struct View
- {
- friend class reducer_min_index<Index, Value, Compare>;
- friend class monoid_base<View>;
-
- public:
- /// Constructs a per-strand view instance, initializing it to the
- /// identity value.
- View();
-
- /// Constructs a per-strand view instance, initializing it to the
- /// specified value and index.
- View(const Index &i, const Value &v);
-
- /// Sets this view to a specified value and index
- void set(const Index &i, const Value &v);
-
- /// Returns current index for this view
- const Index &get_index() const;
- /// Returns current value for this view
- const Value &get_value() const;
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- private:
- Index m_index;
- Value m_value;
- bool m_isSet;
- };
-
-public:
- /// Definition of data view, operation, and identity for reducer_min_index
- struct Monoid: monoid_base<View>
- {
- Compare m_comp;
- Monoid() : m_comp() {}
- Monoid(const Compare& comp) : m_comp(comp) {}
- void take_min(View *left, const Index &i, const Value &v) const;
- void reduce (View *left, View *right) const;
- };
-
-private:
- // Hyperobject to serve up views
- reducer<Monoid> m_imp;
-
-public:
- /// Construct a 'reducer_min_index' object with a value of 'Type()'.
- reducer_min_index();
-
- /// Construct a 'reducer_min_index' object with the specified initial
- /// value and index.
- reducer_min_index(const Index& initial_index,
- const Value& initial_value);
-
- /// Construct a 'reducer_min_index' object with the specified initial
- /// value, index, and comparator.
- reducer_min_index(const Index& initial_index,
- const Value& initial_value,
- const Compare& comp);
-
- /// Return an imutable reference to the value of this object.
- const Value& get_value() const;
-
- /// Return a reference to the value of this object
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Value& get_reference();
-
- /// Return a reference to the value of this object
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Value const& get_reference() const;
-
- /// Return an immutable reference to the minimum index.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Index& get_index() const;
-
- /// Return a mutable reference to the minimum index
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- Index& get_index_reference();
-
- /// Returns true if the value has ever been set
- bool is_set() const;
-
- /// Set the index/value of this object.
- ///
- /// @warning Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Index& index,
- const Value& value);
-
- /// Compare the current value with the one passed and retain the
- /// smaller of the two.
- void calc_min(const Index& index, const Value& value);
-
- // DEPRECATED. Use calc_min instead.
- void min_of(const Index& index, const Value& value)
- { calc_min(index,value); }
-
- reducer_min_index& operator*() { return *this; }
- reducer_min_index const& operator*() const { return *this; }
-
- reducer_min_index* operator->() { return this; }
- reducer_min_index const* operator->() const { return this; }
-
-private:
- // Not copyable
- reducer_min_index(const reducer_min_index&);
- reducer_min_index& operator=(const reducer_min_index&);
-};
-
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// --------------------------------
-// template class reducer_min::View
-// --------------------------------
-
-template<typename Index, typename Value, typename Compare>
-reducer_min_index<Index, Value, Compare>::View::View()
- : m_index()
- , m_value()
- , m_isSet(false)
-{
-}
-
-template<typename Index, typename Value, typename Compare>
-reducer_min_index<Index, Value, Compare>::View::View(const Index &i,
- const Value &v)
- : m_index(i)
- , m_value(v)
- , m_isSet(true)
-{
-}
-
-template<typename Index, typename Value, typename Compare>
-void
-reducer_min_index<Index, Value, Compare>::View::set(const Index &i,
- const Value &v)
-{
- m_index = i;
- m_value = v;
- m_isSet = true;
-}
-
-template<typename Index, typename Value, typename Compare>
-const Index &
-reducer_min_index<Index, Value, Compare>::View::get_index() const
-{
- return m_index;
-}
-
-template<typename Index, typename Value, typename Compare>
-const Value &
-reducer_min_index<Index, Value, Compare>::View::get_value() const
-{
- return m_value;
-}
-
-template<typename Index, typename Value, typename Compare>
-bool
-reducer_min_index<Index, Value, Compare>::View::is_set() const
-{
- return m_isSet;
-}
-
-// -------------------------------------------
-// template class reducer_min::Monoid
-// -------------------------------------------
-
-template<typename Index, typename Value, typename Compare>
-void
-reducer_min_index<Index,Value,Compare>::Monoid::take_min(View *left,
- const Index &i,
- const Value &v) const
-{
- if (! left->m_isSet || m_comp(v,left->m_value ))
- {
- left->m_index = i;
- left->m_value = v;
- left->m_isSet = true;
- }
-}
-
-template<typename Index, typename Value, typename Compare>
-void
-reducer_min_index<Index, Value, Compare>::Monoid::reduce(View *left,
- View *right) const
-{
- if (right->m_isSet)
- take_min (left, right->m_index, right->m_value);
-}
-
-// --------------------------------
-// template class reducer_min_index
-// --------------------------------
-
-// Default constructor
-template <typename Index, typename Value, typename Compare>
-inline
-reducer_min_index<Index, Value, Compare>::reducer_min_index()
- : m_imp()
-{
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-reducer_min_index<Index, Value, Compare>::reducer_min_index(
- const Index& initial_index, const Value& initial_value)
- : m_imp(initial_index, initial_value)
-{
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-reducer_min_index<Index, Value, Compare>::reducer_min_index(
- const Index& initial_index,
- const Value& initial_value,
- const Compare& comp)
- : m_imp(Monoid(comp), initial_index, initial_value)
-{
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-void reducer_min_index<Index, Value, Compare>::calc_min(const Index& index,
- const Value& value)
-{
- View &v = m_imp.view();
-
- m_imp.monoid().take_min(&v, index, value);
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-const Value& reducer_min_index<Index, Value, Compare>::get_value() const
-{
- const View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-Value& reducer_min_index<Index, Value, Compare>::get_reference()
-{
- View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-Value const& reducer_min_index<Index, Value, Compare>::get_reference() const
-{
- const View &v = m_imp.view();
-
- return v.m_value;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-const Index& reducer_min_index<Index, Value, Compare>::get_index() const
-{
- const View &v = m_imp.view();
-
- return v.m_index;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-Index& reducer_min_index<Index, Value, Compare>::get_index_reference()
-{
- View &v = m_imp.view();
-
- return v.m_index;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-bool reducer_min_index<Index, Value, Compare>::is_set() const
-{
- const View &v = m_imp.view();
-
- return v.m_isSet;
-}
-
-template <typename Index, typename Value, typename Compare>
-inline
-void reducer_min_index<Index, Value, Compare>::set_value(const Index& index,
- const Value& value)
-{
- View &v = m_imp.view();
-
- return v.set(index, value);
-}
-
-} // namespace cilk
-
-#endif // __cplusplus
-
-/* C Interface
- */
-
-__CILKRTS_BEGIN_EXTERN_C
-
-/* REDUCER_MIN */
-
-#define CILK_C_REDUCER_MIN_TYPE(tn) \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_,tn)
-#define CILK_C_REDUCER_MIN(obj,tn,v) \
- CILK_C_REDUCER_MIN_TYPE(tn) obj = \
- CILK_C_INIT_REDUCER(_Typeof(obj.value), \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_reduce_,tn), \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_identity_,tn), \
- __cilkrts_hyperobject_noop_destroy, v)
-
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MIN_INSTANCE(t,tn) \
- typedef CILK_C_DECLARE_REDUCER(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_,tn); \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn);
-
-/* CILK_C_REDUCER_MIN_CALC(reducer, v) performs the reducer lookup
- * AND calc_min operation, leaving the current view with the min of the
- * previous value and v.
- */
-#define CILK_C_REDUCER_MIN_CALC(reducer, v) do { \
- _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
- _Typeof(v) __value = (v); \
- if (*view > __value) { \
- *view = __value; \
- } } while (0)
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_MIN_INSTANCE(char,char);
-CILK_C_REDUCER_MIN_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_MIN_INSTANCE(signed char,schar);
-CILK_C_REDUCER_MIN_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_MIN_INSTANCE(short,short);
-CILK_C_REDUCER_MIN_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_MIN_INSTANCE(int,int);
-CILK_C_REDUCER_MIN_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_MIN_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_MIN_INSTANCE(long,long);
-CILK_C_REDUCER_MIN_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_MIN_INSTANCE(long long,longlong);
-CILK_C_REDUCER_MIN_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_MIN_INSTANCE(float,float);
-CILK_C_REDUCER_MIN_INSTANCE(double,double);
-CILK_C_REDUCER_MIN_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MIN_IMP(t,tn,id) \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r) \
- { if (*(t*)l > *(t*)r) *(t*)l = *(t*)r; } \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn) \
- { *(t*)v = id; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric tyeps. The contents of reducer_min.c are as follows:
-
-CILK_C_REDUCER_MIN_IMP(char,char,CHAR_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned char,uchar,CHAR_MIN)
-CILK_C_REDUCER_MIN_IMP(signed char,schar,SCHAR_MAX)
-CILK_C_REDUCER_MIN_IMP(wchar_t,wchar_t,WCHAR_MAX)
-CILK_C_REDUCER_MIN_IMP(short,short,SHRT_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned short,ushort,USHRT_MAX)
-CILK_C_REDUCER_MIN_IMP(int,int,INT_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned int,uint,UINT_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned int,unsigned,UINT_MAX) // alternate name
-CILK_C_REDUCER_MIN_IMP(long,long,LONG_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned long,ulong,ULONG_MAX)
-CILK_C_REDUCER_MIN_IMP(long long,longlong,LLONG_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned long long,ulonglong,ULLONG_MAX)
-CILK_C_REDUCER_MIN_IMP(float,float,HUGE_VALF)
-CILK_C_REDUCER_MIN_IMP(double,double,HUGE_VAL)
-CILK_C_REDUCER_MIN_IMP(long double,longdouble,HUGE_VALL)
-
-*/
-
-/* REDUCER_MIN_INDEX */
-
-#define CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn) \
- typedef struct { \
- __STDNS ptrdiff_t index; \
- t value; \
- } __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)
-
-#define CILK_C_REDUCER_MIN_INDEX_TYPE(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_index_,t)
-#define CILK_C_REDUCER_MIN_INDEX(obj,t,v) \
- CILK_C_REDUCER_MIN_INDEX_TYPE(t) obj = \
- CILK_C_INIT_REDUCER(_Typeof(obj.value), \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_index_reduce_,t), \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_index_identity_,t), \
- __cilkrts_hyperobject_noop_destroy, { 0, v })
-
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t,tn) \
- CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn); \
- typedef CILK_C_DECLARE_REDUCER( \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)) \
- __CILKRTS_MKIDENT(cilk_c_reducer_min_index_,tn); \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn);
-
-/* CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v) performs the reducer lookup
- * AND calc_min operation, leaving the current view with the min of the
- * previous value and v.
- */
-#define CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v) do { \
- _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
- _Typeof(v) __value = (v); \
- if (view->value > __value) { \
- view->index = (i); \
- view->value = __value; \
- } } while (0)
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(char,char);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(signed char,schar);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(short,short);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(int,int);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(long,long);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(long long,longlong);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(float,float);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(double,double);
-CILK_C_REDUCER_MIN_INDEX_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_MIN_INDEX_IMP(t,tn,id) \
- __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r) \
- { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \
- if (((view_t*)l)->value > ((view_t*)r)->value) \
- *(view_t*)l = *(view_t*)r; } \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn) \
- { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \
- ((view_t*)v)->index = 0; ((view_t*)v)->value = id; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric tyeps. The contents of reducer_min_index.c are as follows:
-
-CILK_C_REDUCER_MIN_INDEX_IMP(char,char,CHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned char,uchar,CHAR_MIN)
-CILK_C_REDUCER_MIN_INDEX_IMP(signed char,schar,SCHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(wchar_t,wchar_t,WCHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(short,short,SHRT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned short,ushort,USHRT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(int,int,INT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,uint,UINT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,unsigned,UINT_MAX) // alternate name
-CILK_C_REDUCER_MIN_INDEX_IMP(long,long,LONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long,ulong,ULONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(long long,longlong,LLONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long long,ulonglong,ULLONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(float,float,HUGE_VALF)
-CILK_C_REDUCER_MIN_INDEX_IMP(double,double,HUGE_VAL)
-CILK_C_REDUCER_MIN_INDEX_IMP(long double,longdouble,HUGE_VALL)
-
-*/
-
-
-__CILKRTS_END_EXTERN_C
-
-#endif // defined REDUCER_MIN_H_INCLUDED
+/* reducer_min.h -*- C++ -*-
+ *
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/** @file reducer_min.h
+ *
+ * @brief Defines classes for doing parallel minimum reductions.
+ *
+ * @ingroup ReducersMinMax
+ *
+ * @see ReducersMinMax
+ */
+
+#include "reducer_min_max.h"
diff --git a/libcilkrts/include/cilk/reducer_min_max.h b/libcilkrts/include/cilk/reducer_min_max.h
new file mode 100644
index 00000000000..3348974daf4
--- /dev/null
+++ b/libcilkrts/include/cilk/reducer_min_max.h
@@ -0,0 +1,3601 @@
+/* reducer_min_max.h -*- C++ -*-
+ *
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/** @file reducer_min_max.h
+ *
+ * @brief Defines classes for doing parallel minimum and maximum reductions.
+ *
+ * @ingroup ReducersMinMax
+ *
+ * @see ReducersMinMax
+ */
+
+#ifndef REDUCER_MIN_MAX_H_INCLUDED
+#define REDUCER_MIN_MAX_H_INCLUDED
+
+#include <cilk/reducer.h>
+
+#ifdef __cplusplus
+
+#include <algorithm>
+#include <limits>
+
+/** @defgroup ReducersMinMax Minimum and Maximum Reducers
+ *
+ * Minimum and maximum reducers allow the computation of the minimum or
+ * maximum of a set of values in parallel.
+ *
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file `reducers.md`, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redminmax_usage Usage Examples
+ *
+ * cilk::reducer< cilk::op_max<int> > rm;
+ * cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
+ * {
+ * rm->calc_max(a[i]); // or *rm = cilk::max_of(*max, a[i])
+ * }
+ * std::cout << "maximum value is " << rm.get_value() << std::endl;
+ *
+ * and
+ *
+ * cilk::reducer< cilk::op_min_index<int, double> > rmi;
+ * cilk_for (int i = 0; i < ARRAY_SIZE; ++i)
+ * {
+ * rmi->calc_min(i, a[i]) // or *rmi = cilk::min_of(*rmi, i, a[i]);
+ * }
+ * std::cout << "minimum value a[" << rmi.get_value().first << "] = "
+ * << rmi.get_value().second << std::endl;
+ *
+ * @section redminmax_monoid The Monoid
+ *
+ * @subsection redminmax_monoid_values Value Set
+ *
+ * The value set of a minimum or maximum reducer is the set of values of
+ * `Type`, possibly augmented with a special identity value which is greater
+ * than (less than) any value of `Type`.
+ *
+ * @subsection redminmax_monoid_operator Operator
+ *
+ * In the most common case, the operator of a minimum reducer is defined as
+ *
+ * x MIN y == (x < y) ? x : y
+ *
+ * Thus, `a1 MIN a2 MIN … an` is the first `ai` which is not greater than any
+ * other `ai`.
+ *
+ * The operator of a maximum reducer is defined as
+ *
+ * x MAX y == (x > y) ? x : y
+ *
+ * Thus, `a1 MAX a2 MAX … an` is the first `ai` which is not less than any
+ * other `ai`.
+ *
+ * @subsection redminmax_monoid_comparators Comparators
+ *
+ * Min/max reducers are not limited to finding the minimum or maximum value
+ * determined by the `<` or `>` operator. In fact, all min/max reducers use a
+ * _comparator_, which is either a function or an object of a function class
+ * that defines a [strict weak ordering]
+ * (http://en.wikipedia.org/wiki/Strict_weak_ordering#Strict_weak_orderings)
+ * on a set of values. (This is exactly the same as the requirement for the
+ * comparison predicate for STL associative containers and sorting
+ * algorithms.)
+ *
+ * Just as with STL algorithms and containers, the comparator type parameter
+ * for min/max reducers is optional. If it is omitted, it defaults to
+ * `std::less`, which gives the behavior described in the previous section.
+ * Using non-default comparators (anything other than `std::less`) with
+ * min/max reducers is just like using them with STL containers and
+ * algorithms.
+ *
+ * Taking comparator objects into account, the reduction operation `MIN` for a
+ * minimum reducer is defined as
+ *
+ * x MIN y == compare(x, y) ? x : y
+ *
+ * where `compare()` is the reducer’s comparator. Similarly, the reduction
+ * operation MAX for a maximum reducer is defined as
+ *
+ * x MAX y == compare(y, x) ? x : y
+ *
+ * (If `compare(x, y) == x < y`, then `compare(y, x) == x > y`.)
+ *
+ * @subsection redminmax_monoid_identity Identity
+ *
+ * The identity value of the reducer is the value which is greater than (less
+ * than) any other value in the value set of the reducer. This is the
+ * [“special identity value”](#redminmax_monoid_values) if the reducer has
+ * one, or the largest (smallest) value in the value set otherwise.
+ *
+ * @section redminmax_index Value and Index Reducers
+ *
+ * Min/max reducers come in two families. The _value_ reducers, using `op_min`
+ * and `op_max` monoids, simply find the smallest or largest value from a set
+ * of values. The _index_ reducers, using `op_min_index` and `op_max_index`
+ * monoids, also record an index value associated with the first occurrence of
+ * the smallest or largest value.
+ *
+ * In the `%op_min_index` usage example [above](#redminmax_usage), the values
+ * are taken from an array, and the index of a value is the index of the array
+ * element it comes from. More generally, though, an index can be any sort of
+ * key which identifies a particular value in a collection of values. For
+ * example, if the values were taken from the nodes of a tree, then the
+ * “index” of a value might be a pointer to the node containing that value.
+ *
+ * A min/max index reducer is essentially the same as a min/max value reducer
+ * whose value type is an (index, value) pair, and whose comparator ignores
+ * the index part of the pair. (index, value) pairs are represented by
+ * `std::pair<Index, Type>` objects. This has the consequence that wherever
+ * the interface of a min/max value reducer has a `Type`, the interface of the
+ * corresponding min/max index reducer has a `std::pair<Index, Type>`. (There
+ * are convenience variants of the `reducer(Type)` constructor and the
+ * `calc_min()`, `calc_max()`, `%min_of()`, and `%max_of()` functions that
+ * take an index argument and a value argument instead of an index/value
+ * pair.)
+ *
+ * @section redminmax_operations Operations
+ *
+ * @subsection redminmax_constructors Constructors
+ *
+ * @subsubsection redminmax_constructors_value Min/Max Value Reducers
+ *
+ * reducer() // identity
+ * reducer(const Compare& compare) // identity
+ * reducer(const Type& value)
+ * reducer(move_in(Type& variable))
+ * reducer(const Type& value, const Compare& compare)
+ * reducer(move_in(Type& variable), const Compare& compare)
+ *
+ * @subsubsection redminmax_constructors_index Min/Max Index Reducers
+ *
+ * reducer() // identity
+ * reducer(const Compare& compare) // identity
+ * reducer(const std::pair<Index, Type>& pair)
+ * reducer(const Index& index, const Type& value)
+ * reducer(move_in(std::pair<Index, Type>& variable))
+ * reducer(const std::pair<Index, Type>& pair, const Compare& compare)
+ * reducer(const Index& index, const Type& value, const Compare& compare)
+ * reducer(move_in(std::pair<Index, Type>& variable), const Compare& compare)
+ *
+ * @subsection redminmax_get_set Set and Get
+ *
+ * r.set_value(const Type& value)
+ * Type = r.get_value() const
+ * r.move_in(Type& variable)
+ * r.move_out(Type& variable)
+ *
+ * Note that for an index reducer, the `Type` in these operations is actually a
+ * `std::pair<Index, Type>`. (See @ref redminmax_index.) There is _not_ a
+ * `set_value(value, index)` operation.
+ *
+ * @subsection redminmax_initial Initial Values and is_set()
+ *
+ * A minimum or maximum reducer without a specified initial value, before any
+ * MIN or MAX operation has been performed on it, represents the [identity
+ * value](#redminmax_monoid_identity) of its monoid. For value reducers with a
+ * numeric type and default comparator (`std::less`), this will be a well
+ * defined value. For example,
+ *
+ * reducer< op_max<unsigned> > r1;
+ * // r1.get_value() == 0
+ *
+ * reducer< op_min<float> > r2;
+ * // r2.get_value() == std::numeric_limits<float>::infinity
+ *
+ * In other cases, though (index reducers, non-numeric types, or non-default
+ * comparators), the actual identity value for the monoid may be unknown, or
+ * it may not even be a value of the reducer’s type. For example, there is no
+ * “largest string” to serve as the initial value for a
+ * `reducer< op_min<std::string> >`. In these cases, the result of calling
+ * `get_value()` is undefined.
+ *
+ * To avoid calling `get_value()` when its result is undefined, you can call
+ * the view’s `is_set()` function, which will return true if the reducer
+ * has a well-defined value — either because a MIN or MAX operation has been
+ * performed, or because it had a well-defined initial value:
+ *
+ * reducer< op_max<unsigned> > r1;
+ * // r1->is_set() == true
+ * // r1.get_value() == 0
+ *
+ * reducer< op_min<std::string> > r2;
+ * // r2->is_set() == false
+ * // r2.get_value() is undefined
+ * r2->calc_min("xyzzy");
+ * // r2->is_set() == true
+ * // r2.get_value() == "xyzzy"
+ *
+ * > Note: For an index reducer without a specified initial value, the
+ * > initial value of the index is the default value of the `Index` type.
+ *
+ * @subsection redminmax_view_ops View Operations
+ *
+ * The basic reduction operation is `x = x MIN a` for a minimum reducer, or
+ * `x = x MAX a` for a maximum reducer. The basic syntax for these operations
+ * uses the `calc_min()` and `calc_max()` member functions of the view class.
+ * An assignment syntax is also provided, using the %cilk::min_of() and
+ * %cilk::max_of() global functions:
+ *
+ * Class | Modifier | Assignment
+ * ---------------|---------------------|-----------
+ * `op_min` | `r->calc_min(x)` | `*r = min_of(*r, x)` or `*r = min_of(x, *r)`
+ * `op_max` | `r->calc_max(x)` | `*r = max_of(*r, x)` or `*r = max_of(x, *r)`
+ * `op_min_index` | `r->calc_min(i, x)` | `*r = min_of(*r, i, x)` or `*r = min_of(i, x, *r)`
+ * `op_max_index` | `r->calc_max(i, x)` | `*r = max_of(*r, i, x)` or `*r = max_of(i, x, *r)`
+ *
+ * Wherever an “`i`, `x`” argument pair is shown in the table above, a single
+ * pair argument may be passed instead. For example:
+ *
+ * Index index;
+ * Type value;
+ * std::pair<Index, Type> ind_val(index, value);
+ * // The following statements are all equivalent.
+ * r->calc_min(index, value);
+ * r->calc_min(ind_val);
+ * *r = min_of(*r, index, value);
+ * *r = min_of(*r, ind_val);
+ *
+ * The `calc_min()` and `calc_max()` member functions return a reference to
+ * the view, so they can be chained:
+ *
+ * r->calc_max(x).calc_max(y).calc_max(z);
+ *
+ * In a `%min_of()` or `%max_of()` assignment, the view on the left-hand side
+ * of the assignment must be the same as the view argument in the call.
+ * Otherwise, the behavior is undefined (but an assertion error will occur if
+ * the code is compiled with debugging enabled).
+ *
+ * *r = max_of(*r, x); // OK
+ * *r1 = max_of(*r2, y); // ERROR
+ *
+ * `%min_of()` and `%max_of()` calls can be nested:
+ *
+ * *r = max_of(max_of(max_of(*r, x), y), z);
+ * *r = min_of(i, a[i], min_of(j, a[j], min_of(k, a[k], *r)));
+ *
+ * @section redminmax_compatibility Compatibility Issues
+ *
+ * Most Cilk library reducers provide
+ * * Binary compatibility between `reducer_KIND` reducers compiled with Cilk
+ * library version 0.9 (distributed with Intel® C++ Composer XE version
+ * 13.0 and earlier) and the same reducers compiled with Cilk library
+ * version 1.0 and later.
+ * * Transparent casting between references to `reducer<op_KIND>` and
+ * `reducer_KIND`.
+ *
+ * This compatibility is not available in all cases for min/max reducers.
+ * There are two areas of incompatibility.
+ *
+ * @subsection redminmax_compatibility_stateful Non-empty Comparators
+ *
+ * There is no way to provide binary compatibility between the 0.9 and 1.0
+ * definitions of min/max reducers that use a non-empty comparator class or a
+ * comparator function. (Empty comparator classes like `std::less` are not a
+ * problem.)
+ *
+ * To avoid run-time surprises, the legacy `reducer_{min|max}[_index]` classes
+ * have been coded in the 1.0 library so that they will not even compile when
+ * instantiated with a non-empty comparator class.
+ *
+ * @subsection redminmax_compatibility_optimized Numeric Optimization
+ *
+ * Min/max reducers with a numeric value type and the default comparator can
+ * be implemented slightly more efficiently than other min/max reducers.
+ * However, the optimization is incompatible with the 0.9 library
+ * implementation of min/max reducers.
+ *
+ * The default min/max reducers implementation in the 1.0 library uses this
+ * numeric optimization. Code using legacy reducers compiled with the 1.0
+ * library can be safely used in the same program as code compiled with the
+ * 0.9 library, but classes compiled with the different Cilk libraries will be
+ * defined in different namespaces.
+ *
+ * The simplest solution is just to recompile the code that was compiled with
+ * the older version of Cilk. However, if this is impossible, you can define
+ * the `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro (on the compiler command line,
+ * or in your source code before including `reducer_min_max.h`) when compiling
+ * with the new library. This will cause it to generate numeric reducers that
+ * will be less efficient, but will be fully compatible with previously
+ * compiled code. (Note that this macro has no effect on [the non-empty
+ * comparator incompatibility] (redminmax_compatibility_stateful).)
+ *
+ * @section redminmax_types Type Requirements
+ *
+ * `Type` and `Index` must be `Copy Constructible`, `Default Constructible`,
+ * and `Assignable`.
+ *
+ * `Compare` must be `Copy Constructible` if the reducer is constructed with a
+ * `compare` argument, and `Default Constructible` otherwise.
+ *
+ * The `Compare` function must induce a strict weak ordering on the elements
+ * of `Type`.
+ *
+ * @section redminmax_in_c Minimum and Maximum Reducers in C
+ *
+ * These macros can be used to do minimum and maximum reductions in C:
+ *
+ * Declaration | Type | Operation
+ * -----------------------------|-----------------------------------|----------
+ * @ref CILK_C_REDUCER_MIN |@ref CILK_C_REDUCER_MIN_TYPE |@ref CILK_C_REDUCER_MIN_CALC
+ * @ref CILK_C_REDUCER_MAX |@ref CILK_C_REDUCER_MAX_TYPE |@ref CILK_C_REDUCER_MAX_CALC
+ * @ref CILK_C_REDUCER_MIN_INDEX |@ref CILK_C_REDUCER_MIN_INDEX_TYPE |@ref CILK_C_REDUCER_MIN_INDEX_CALC
+ * @ref CILK_C_REDUCER_MAX_INDEX |@ref CILK_C_REDUCER_MAX_INDEX_TYPE |@ref CILK_C_REDUCER_MAX_INDEX_CALC
+ *
+ * For example:
+ *
+ * CILK_C_REDUCER_MIN(r, int, INT_MAX);
+ * CILK_C_REGISTER_REDUCER(r);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * CILK_C_REDUCER_MIN_CALC(r, a[i]);
+ * }
+ * CILK_C_UNREGISTER_REDUCER(r);
+ * printf("The smallest value in a is %d\n", REDUCER_VIEW(r));
+ *
+ *
+ * CILK_C_REDUCER_MAX_INDEX(r, uint, 0);
+ * CILK_C_REGISTER_REDUCER(r);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * CILK_C_REDUCER_MAX_INDEX_CALC(r, i, a[i]);
+ * }
+ * CILK_C_UNREGISTER_REDUCER(r);
+ * printf("The largest value in a is %u at %d\n",
+ * REDUCER_VIEW (r).value, REDUCER_VIEW(r).index);
+ *
+ * See @ref reducers_c_predefined.
+ */
+
+namespace cilk {
+
+/** @defgroup ReducersMinMaxBinComp Binary compatibility
+ *
+ * If the macro CILK_LIBRARY_0_9_REDUCER_MINMAX is defined, then we generate
+ * reducer code and data structures which are binary-compatible with code that
+ * was compiled with the old min/max wrapper definitions, so we want the
+ * mangled names of the legacy min/max reducer wrapper classes to be the
+ * same as the names produced by the old definitions.
+ *
+ * Conversely, if the macro is not defined, then we generate binary-
+ * incompatible code, so we want different mangled names, to make sure that
+ * the linker does not allow new and old compiled legacy wrappers to be passed
+ * to one another. (Global variables are a different, and probably insoluble,
+ * problem.)
+ *
+ * Similarly, min/max classes compiled with and without
+ * CILK_LIBRARY_0_9_REDUCER_MINMAX are binary-incompatible, and must get
+ * different mangled names.
+ *
+ * The trick is, when compiling in normal (non-compatibility) mode, wrap
+ * everything in an extra namespace, and then `use` it into the top-level cilk
+ * namespace. Then
+ *
+ * * Classes and functions compiled in normal mode will be in
+ * different namespaces from the same classes and functions compiled in
+ * compatibility mode.
+ * * The legacy wrapper classes and functions will be in the same namespace
+ * as the same classes and functions compiled with the0.9 library if and
+ * only if the are compiled in compatibility mode.
+ *
+ * @ingroup ReducersMinMax
+ */
+
+#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX
+/** Namespace to wrap min/max reducer definitions when not compiling in “binary
+ * compatibility” mode.
+ *
+ * By default, all of the min/max reducer definitions are defined in this
+ * namespace and then imported into namespace ::cilk, so that they do not
+ * clash with the legacy definitions with the same names. However, if the
+ * macro `CILK_LIBRARY_0_9_REDUCER_MINMAX` is defined, then the min/max
+ * definitions go directly into namespace ::cilk, so that, for example,
+ * cilk::reducer_max defined with the 1.0 library is equivalent (to the
+ * linker) to cilk::reducer_max defined with the 0.9 library.
+ *
+ * @ingroup ReducersMinMaxBinComp
+ * @ingroup ReducersMinMax
+ */
+namespace cilk_lib_1_0 {
+#endif
+
+/** Namespace containing internal implementation classes and functions for
+ * min/max reducers.
+ *
+ * @ingroup ReducersMinMax
+ */
+namespace min_max_internal {
+
+using ::cilk::internal::binary_functor;
+using ::cilk::internal::typed_indirect_binary_function;
+using ::cilk::internal::class_is_empty;
+
+/** @defgroup ReducersMinMaxIsSet The “is_set optimization”
+ *
+ * The obvious definition of the identity value for a max or min reducer is as
+ * the smallest (or largest) value of the value type. However, for an
+ * arbitrary comparator and/or an arbitrary value type, the largest / smallest
+ * value may not be known. It may not even be defined — what is the largest
+ * string?
+ *
+ * Therefore, min/max reducers represent their value internally as a pair
+ * `(value, is_set)`. When `is_set` is true, the pair represents the known
+ * value `value`; when `is_set` is false, the pair represents the identity
+ * value.
+ *
+ * This is an effective solution, but the most common use of min/max reducers
+ * is probably with numeric types and the default definition of minimum or
+ * maximum (using `std::less`), in which case there are well-defined, knowable
+ * smallest and largest values. Testing `is_set` for every comparison is then
+ * unnecessary and wasteful.
+ *
+ * The “is_set optimization” just means generating code that doesn’t use
+ * `is_set` when it isn’t needed. It is implemented using two metaprogramming
+ * classes:
+ *
+ * - do_is_set_optimization tests whether the optimization is applicable.
+ * - identity_value gets the appropriate identity value for a type.
+ *
+ * The is_set optimization is the reason that min/max reducers compiled with
+ * Cilk library 1.0 are binary-incompatible with the same reducers compiled
+ * with library 0.9, and therefore the optimization is suppressed when
+ * compiling in
+ * ReducersMinMaxBinComp "binary compatibility mode".
+ *
+ * @ingroup ReducersMinMax
+ */
+
+/** Test whether the ReducersMinMaxIsSet "is_set optimization" is
+ * applicable.
+ *
+ * The @ref do_is_set_optimization class is used to test whether the is_set
+ * optimization should be applied for a particular reducer. It is instantiated
+ * with a value type and a comparator, and defines a boolean constant,
+ * `value`. Then `%do_is_set_optimization<Type, Comp>::%value` can be used as
+ * a boolean template parameter to control the specialization of another
+ * class.
+ *
+ * In ReducersMinMaxBinComp "binary compatibility mode", when the
+ * `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro is defined, `value` will always
+ * be false.
+ *
+ * @tparam Type The value type for the reducer.
+ * @tparam Compare The comparator type for the reducer.
+ *
+ * @result The `value` data member will be `true` if @a Type is a numeric
+ * type, @a Compare is `std::less<Type>`, and
+ * `CILK_LIBRARY_0_9_REDUCER_MINMAX` is not defined.
+ *
+ * @see ReducersMinMaxIsSet
+ * @see @ref view_content
+ *
+ * @ingroup ReducersMinMaxIsSet
+ */
+template < typename Type,
+ typename Compare >
+struct do_is_set_optimization
+{
+ /// `True` if the is_set optimization should be applied to min/max reducers
+ /// with this value type and comparator; `false` otherwise.
+ static const bool value = false;
+};
+
+#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX
+/// @cond
+template <typename Type>
+struct do_is_set_optimization<Type, std::less<Type> >
+{
+ /// True in the special case where optimization is possible.
+ static const bool value = std::numeric_limits<Type>::is_specialized;
+};
+/// @endcond
+#endif
+
+
+/** Get the identity value when using the ReducersMinMaxIsSet
+ * "is_set optimization".
+ *
+ * This class defines a function which assigns the appropriate identity value
+ * to a variable when the is_set optimization is applicable.
+ *
+ * @tparam Type The value type for the reducer.
+ * @tparam Compare The comparator type for the reducer.
+ * @tparam ForMax `true` to get the identity value for a max reducer (i.e.,
+ * the smallest value of @a Type), `false` to get the identity
+ * value for a min reducer (i.e., the largest value of
+ * @a Type).
+ *
+ * @result If @a Type and @a Compare qualify for the is_set optimization, the
+ * `set_identity()' function will set its argument variable to the
+ * smallest or largest value of @a Type, depending on @a ForMax.
+ * Otherwise, `set_identity()` will be a no-op.
+ *
+ * @see ReducersMinMaxIsSet
+ *
+ * @ingroup ReducersMinMaxIsSet
+ * @see @ref view_content
+ */
+template < typename Type,
+ typename Compare,
+ bool ForMax,
+ bool = std::numeric_limits<Type>::is_specialized,
+ bool = std::numeric_limits<Type>::has_infinity >
+struct identity_value {
+ /// Assign the identity value to the reference parameter.
+ static void set_identity(Type&) {}
+};
+
+/// @cond
+template <typename Type>
+struct identity_value<Type, std::less<Type>, true, true, true> {
+ /// Floating max identity is negative infinity.
+ static void set_identity(Type& id)
+ { id = -std::numeric_limits<Type>::infinity(); }
+};
+
+template <typename Type>
+struct identity_value<Type, std::less<Type>, true, true, false> {
+ /// Integer max identity is minimum value of type.
+ static void set_identity(Type& id)
+ { id = std::numeric_limits<Type>::min(); }
+};
+
+template <typename Type>
+struct identity_value<Type, std::less<Type>, false, true, true> {
+ /// Floating min identity is positive infinity.
+ static void set_identity(Type& id)
+ { id = std::numeric_limits<Type>::infinity(); }
+};
+
+template <typename Type>
+struct identity_value<Type, std::less<Type>, false, true, false> {
+ /// Integer min identity is maximum value of type.
+ static void set_identity(Type& id)
+ { id = std::numeric_limits<Type>::max(); }
+};
+
+/// @endcond
+
+
+/** Adapter class to reverse the arguments of a predicate.
+ *
+ * Observe that:
+ *
+ * (x < y) == (y > x)
+ * max(x, y) == (x < y) ? y : x
+ * min(x, y) == (y < x) ? y : x == (x > y) ? y : x
+ *
+ * More generally, if `c` is a predicate defining a `Strict Weak Ordering`,
+ * and `c*(x, y) == c(y, x)`, then
+ *
+ * max(x, y, c) == c(x, y) ? y : x
+ * min(x, y, c) == c(y, x) ? y : x == c*(x, y) ? y : x == max(x, y, c*)
+ *
+ * For any predicate `C` with argument type `T`, the template class
+ * `%reverse_predicate<C, T>` defines a predicate which is identical to `C`,
+ * except that its arguments are reversed. Thus, for example, we could
+ * implement `%op_min_view<Type, Compare>` as
+ * `%op_max_view<Type, %reverse_predicate<Compare, Type> >`.
+ * (Actually, op_min_view and op_max_view are both implemented as subclasses
+ * of a common base class, view_base.)
+ *
+ * @note If `C` is an empty functor class, then `reverse_predicate(C)` will
+ * also be an empty functor class.
+ *
+ * @tparam Predicate The predicate whose arguments are to be reversed.
+ * @tparam Argument @a Predicate’s argument type.
+ *
+ * @ingroup ReducersMinMax
+ */
+template <typename Predicate,
+ typename Argument = typename Predicate::first_argument_type>
+class reverse_predicate : private binary_functor<Predicate>::type {
+ typedef typename binary_functor<Predicate>::type base;
+public:
+ /// Default constructor
+ reverse_predicate() : base() {}
+ /// Constructor with predicate object
+ reverse_predicate(const Predicate& p) : base(p) {}
+ /// The reversed predicate operation
+ bool operator()(const Argument& x, const Argument& y) const
+ { return base::operator()(y, x); }
+};
+
+
+/** Class to represent the comparator for a min/max view class.
+ *
+ * This class is intended to accomplish two objectives in the implementation
+ * of min/max views.
+ *
+ * 1. To minimize data bloat, when we have a reducer with a non-stateless
+ * comparator, we want to keep a single instance of the comparator object
+ * in the monoid, and just call it from the views.
+ * 2. In ReducersMinMaxBinComp "binary compatibility mode", views for
+ * reducers with a stateless comparator must have the same content as in
+ * Cilk library 0.9 — that is, they must contain only `value` and
+ * `is_set` data members.
+ *
+ * To achieve the first objective, we use the
+ * @ref internal::typed_indirect_binary_function class defined in
+ * metaprogramming.h to wrap a pointer to the actual comparator. If no
+ * pointer is needed because the actual comparator is stateless, the
+ * `typed_indirect_binary_function` class will be empty, too.
+ *
+ * To achieve the second objective, we make the
+ * `typed_indirect_binary_function` class a base class of the view rather than
+ * a data member, so the “empty base class” rule will ensure no that no
+ * additional space is allocated in the view unless it is needed.
+ *
+ * We could simply use typed_indirect_binary_function as the base class of the
+ * view, but this would mean writing comparisons as `(*this)(x, y)`, which is
+ * just weird. So, instead, we comparator_base as a subclass of
+ * typed_indirect_binary_function which provides function `compare()`
+ * as a synonym for `operator()`.
+ *
+ * @tparam Type The value type of the comparator class.
+ * @tparam Compare A predicate class.
+ *
+ * @see internal::typed_indirect_binary_function
+ *
+ * @ingroup ReducersMinMax
+ */
+template <typename Type, typename Compare>
+class comparator_base : private typed_indirect_binary_function<Compare, Type, Type, bool>
+{
+ typedef typed_indirect_binary_function<Compare, Type, Type, bool> base;
+protected:
+ comparator_base(const Compare* f) : base(f) {} ///< Constructor.
+
+ /// Comparison function.
+ bool compare(const Type& a, const Type& b) const
+ {
+ return base::operator()(a, b);
+ }
+
+ /// Get the comparator pointer.
+ const Compare* compare_pointer() const { return base::pointer(); }
+};
+
+
+/** @defgroup ReducersMinMaxViewContent Content classes for min/max views
+ *
+ * @ingroup ReducersMinMax
+ *
+ * Minimum and maximum reducer view classes inherit from a “view content”
+ * class. The content class defines the actual data members for the view,
+ * and provides typedefs and member functions for accessing the data members
+ * as needed to support the view functionality.
+ *
+ * There are two content classes, which encapsulate the differences between
+ * simple min/max reducers and min/max with index reducers:
+ *
+ * - view_content
+ * - index_view_content
+ *
+ * @note An obvious, and arguably simpler, encapsulation strategy would be
+ * to just let the `Type` of a min/max view be an (index, value) pair
+ * structure for min_index and max_index reducers. Then all views
+ * would just have a `Type` data member and an `is_set` data member,
+ * and the comparator for min_index and max_index views could be
+ * customized to consider only the value component of the (index,
+ * value) `Type` pair. Unfortunately, this would break binary
+ * compatibility with reducer_max_index and reducer_min_index in
+ * Cilk library 0.9, because the memory layout of an (index, value)
+ * pair followed by a `bool` is different from the memory layout of an
+ * index data member followed by a value data member followed by a
+ * `bool` data member. The content class is designed to exactly
+ * replicate the layout of the views in library 0.9 reducers.
+ *
+ * A content class `C`, and its objects `c`, must define the following:
+ *
+ * Definition | Meaning
+ * ------------------------------------|--------
+ * `C::value_type` | A typedef for `Type` of the view. (A `std::pair<Index, Type>` for min_index and max_index views).
+ * `C::comp_value_type` | A typedef for the type of value compared by the view’s `compare()` function.
+ * `C()` | Constructs the content with the identity value.
+ * `C(const value_type&)` | Constructs the content with a specified value.
+ * `c.is_set()` | Returns true if the content has a known value.
+ * `c.value()` | Returns the content’s value.
+ * `c.set_value(const value_type&)` | Sets the content’s value. (The value becomes known.)
+ * `c.comp_value()` | Returns a const reference to the value or component of the value that is to be compared by the view’s comparator.
+ * `C::comp_value(const value_type&)` | Returns a const reference to a value or component of a value that is to be compared by the view’s comparator.
+ *
+ * @see view_base
+ */
+
+/** Content class for op_min_view and op_max_view.
+ *
+ * @tparam Type The value type of the op_min_view or op_max_view.
+ * @tparam Compare The comparator class specified for the op_min_view or
+ * op_max_view. (_Not_ the derived comparator class actually
+ * used by the view_base. For example, the view_content of an
+ * `op_min_view<int>` will have `Compare = std::less<int>`,
+ * but its comparator_base will have
+ * `Compare = reverse_predicate< std::less<int> >`.)
+ * @tparam ForMax `true` if this is the content class for an op_max_view,
+ * `false` if it is for an op_min_view.
+ *
+ * @note The general implementation of view_content uses an `is_set` data
+ * member. There is also a specialization which implements the
+ * ReducersMinMaxIsSet "is_set optimization". View classes that
+ * inherit from view_content do not need to know anything about the
+ * difference, though; the details are abstracted away in the
+ * view_content interface.
+ *
+ * @see ReducersMinMaxViewContent
+ *
+ * @ingroup ReducersMinMaxViewContent
+ * @ingroup ReducersMinMax
+ */
+template < typename Type
+ , typename Compare
+ , bool ForMax
+ , bool = do_is_set_optimization<Type, Compare>::value
+ >
+class view_content {
+ Type m_value;
+ bool m_is_set;
+public:
+ /// The value type of the view.
+ typedef Type value_type;
+
+ /// The type compared by the view’s `compare()` function (which is the same
+ /// as the value type for view_content).
+ typedef Type comp_value_type;
+
+ /// Construct with the identity value.
+ view_content() : m_value(), m_is_set(false) {}
+
+ /// Construct with a defined value.
+ view_content(const value_type& value) : m_value(value), m_is_set(true) {}
+
+ /// Get the value.
+ value_type value() const { return m_value; }
+
+ /// Set the value.
+ void set_value(const value_type& value)
+ {
+ m_value = value;
+ m_is_set = true;
+ }
+
+ /// Get the comparison value (which is the same as the value for
+ /// view_content).
+ const comp_value_type& comp_value() const { return m_value; }
+
+ /// Given an arbitrary value, get the corresponding comparison value (which
+ /// is the same as the value for view_content).
+ static const comp_value_type& comp_value(const value_type& value)
+ {
+ return value;
+ }
+
+ /// Get a const reference to value part of the value (which is the same as
+ /// the value for view_content).
+ const Type& get_reference() const { return m_value; }
+
+ /// Get a const reference to the index part of the value (which is
+ /// meaningless for non-index reducers, but required for view_base.
+ const Type& get_index_reference() const { return m_value; }
+
+ /// Test if the value is defined.
+ bool is_set() const { return m_is_set; }
+};
+
+/// @cond
+
+/* This is the specialization of the view_content class for cases where
+ * `AssumeIsSet` is true (i.e., where the is_set optimization is applicable).
+ */
+template < typename Type
+ , typename Compare
+ , bool ForMax
+ >
+class view_content<Type, Compare, ForMax, true> {
+ typedef identity_value<Type, Compare, ForMax> Identity;
+ Type m_value;
+public:
+ typedef Type value_type;
+ typedef Type comp_value_type;
+
+ /// Construct with identity value.
+ view_content() { Identity::set_identity(m_value); }
+
+ view_content(const value_type& value) : m_value(value) {}
+
+ value_type value() const { return m_value; }
+
+ void set_value(const value_type& value)
+ {
+ m_value = value;
+ }
+
+ const comp_value_type& comp_value() const { return m_value; }
+
+ static const comp_value_type& comp_value(const value_type& value)
+ {
+ return value;
+ }
+
+ const Type& get_reference() const { return m_value; }
+
+ const Type& get_index_reference() const { return m_value; }
+
+ /// Test if the value is defined.
+ bool is_set() const { return true; }
+};
+
+/// @endcond
+
+
+/** Content class for op_min_index_view and op_max_index_view.
+ *
+ * @tparam Index The index type of the op_min_index_view or
+ op_max_index_view.
+ * @tparam Type The value type of the op_min_view or op_max_view. (_Not_
+ * the value type of the view, which will be
+ * `std::pair<Index, Type>`.)
+ * @tparam Compare The comparator class specified for the op_min_index_view or
+ * op_max_index_view. (_Not_ the derived comparator class
+ * actually used by the view_base. For example, the
+ * index_view_content of an `op_min_index_view<int>` will have
+ * `Compare = std::less<int>`, but its comparator_base will
+ * have `Compare = reverse_predicate< std::less<int> >`.)
+ * @tparam ForMax `true` if this is the content class for an
+ * op_max_index_view, `false` if it is for an
+ * op_min_index_view.
+ *
+ * @see ReducersMinMaxViewContent
+ *
+ * @ingroup ReducersMinMaxViewContent
+ * @ingroup ReducersMinMax
+ */
+template < typename Index
+ , typename Type
+ , typename Compare
+ , bool ForMax
+ >
+class index_view_content {
+ typedef identity_value<Type, Compare, ForMax> Identity;
+
+ Index m_index;
+ Type m_value;
+ bool m_is_set;
+public:
+ /// The value type of the view (which is an <index, value> pair for
+ /// index_view_content).
+ typedef std::pair<Index, Type> value_type;
+
+ /// The type compared by the view’s `compare()` function (which is the data
+ /// value type for index_view_content).
+ typedef Type comp_value_type;
+
+ /// Construct with the identity value.
+ index_view_content() : m_index(), m_value(), m_is_set(false) {}
+
+ /// Construct with an index/value pair.
+ index_view_content(const value_type& value) :
+ m_index(value.first), m_value(value.second), m_is_set(true) {}
+
+ /// Construct with an index and a value.
+ index_view_content(const Index& index, const Type& value) :
+ m_index(index), m_value(value), m_is_set(true) {}
+
+ /// Construct with just an index.
+ index_view_content(const Index& index) :
+ m_index(index), m_value(), m_is_set(false) {}
+
+ /// Get the value.
+ value_type value() const { return value_type(m_index, m_value); }
+
+ /// Set value.
+ void set_value(const value_type& value)
+ {
+ m_index = value.first;
+ m_value = value.second;
+ m_is_set = true;
+ }
+
+ /// Get the comparison value (which is the value component of the
+ /// index/value pair for index_view_content).
+ const comp_value_type& comp_value() const { return m_value; }
+
+ /// Given an arbitrary value (i.e., index/value pair), get the
+ /// corresponding comparison value (which is the value component of the
+ /// index/value pair for index_view_content).
+ static const comp_value_type& comp_value(const value_type& value)
+ { return value.second; }
+
+ /// Get a const reference to value part of the value.
+ const Type& get_reference() const { return m_value; }
+
+ /// Get a const reference to the index part of the value.
+ const Index& get_index_reference() const { return m_index; }
+
+ /// Test if the value is defined.
+ bool is_set() const { return m_is_set; }
+};
+
+
+template <typename View> class rhs_proxy;
+
+/** Create an rhs_proxy.
+ */
+template <typename View>
+inline rhs_proxy<View>
+make_proxy(const typename View::value_type& value, const View& view);
+
+template <typename Content, typename Less, typename Compare> class view_base;
+
+
+/** Class to represent the right-hand side of
+ * `*reducer = {min|max}_of(*reducer, value)`.
+ *
+ * The only assignment operator for a min/max view class takes a rhs_proxy as
+ * its operand. This results in the syntactic restriction that the only
+ * expressions that can be assigned to a min/max view are ones which generate
+ * an rhs_proxy — that is, expressions of the form `max_of(view, value)` and
+ * `min_of(view, value)`.
+ *
+ * @warning
+ * The lhs and rhs views in such an assignment must be the same; otherwise,
+ * the behavior will be undefined. (I.e., `*r1 = min_of(*r1, x)` is legal;
+ * `*r1 = min_of(*r2, x)` is illegal.) This condition will be checked with a
+ * runtime assertion when compiled in debug mode.
+ *
+ * @tparam View The view class (op_{min|max}[_index]_view) that this proxy
+ * was created from.
+ *
+ * @see view_base
+ *
+ * @ingroup ReducersMinMax
+ */
+template <typename View>
+class rhs_proxy {
+ typedef typename View::less_type less_type;
+ typedef typename View::compare_type compare_type;
+ typedef typename View::value_type value_type;
+ typedef typename View::content_type content_type;
+ typedef typename content_type::comp_value_type comp_value_type;
+
+ friend class view_base<content_type, less_type, compare_type>;
+ friend rhs_proxy make_proxy<View>(
+ const typename View::value_type& value,
+ const View& view);
+
+ typed_indirect_binary_function<
+ compare_type, comp_value_type, comp_value_type, bool>
+ m_comp;
+ const View* m_view;
+ value_type m_value;
+
+ rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
+ rhs_proxy(); // Disable default constructor
+
+ // Constructor (called from view_base::make_proxy).
+ rhs_proxy(const View* view,
+ const value_type& value,
+ const compare_type* compare) :
+ m_view(view), m_value(value), m_comp(compare) {}
+
+ // Check matching view, then return value (called from view_base::assign).
+ value_type value(const typename View::base* view) const
+ {
+ __CILKRTS_ASSERT(view == m_view);
+ return m_value;
+ }
+
+public:
+
+ /** Support max_of(max_of(view, value), value) and the like.
+ */
+ rhs_proxy calc(const value_type& x) const
+ {
+ return rhs_proxy(
+ m_view,
+ m_comp( content_type::comp_value(m_value),
+ content_type::comp_value(x)
+ ) ? x : m_value,
+ m_comp.pointer());
+ }
+};
+
+
+template <typename View>
+inline rhs_proxy<View>
+make_proxy(const typename View::value_type& value, const View& view)
+{
+ return rhs_proxy<View>(&view, value, view.compare_pointer());
+}
+
+//@}
+
+/** Base class for min and max view classes.
+ *
+ * This class accumulates the minimum or maximum of a set of values which have
+ * occurred as arguments to the `calc()` function, as determined by a
+ * comparator. The accumulated value will be the first `calc()` argument value
+ * `x` such that `compare(x, y)` is false for every `calc()` argument value
+ * `y`.
+ *
+ * If the comparator is `std::less`, then the accumulated value is the first
+ * argument value which is not less than any other argument value, i.e., the
+ * maximum. Similarly, if the comparator is `reverse_predicate<std::less>`,
+ * which is equivalent to `std::greater`, then the accumulated value is the
+ * first argument value which is not greater than any other argument value,
+ * i.e., the minimum.
+ *
+ * @note This class provides the definitions that are required for a class
+ * that will be used as the parameter of a
+ * min_max_internal::monoid_base specialization.
+ *
+ * @tparam Content A content class that provides the value types and data
+ * members for the view.
+ * @tparam Less A “less than” binary predicate that defines the min or
+ * max function.
+ * @tparam Compare A binary predicate to be used to compare the values.
+ * (The same as @a Less for max reducers; its reversal for
+ * min reducers.)
+ *
+ * @see ReducersMinMaxViewContent
+ * @see op_max_view
+ * @see op_min_view
+ * @see op_max_index_view
+ * @see op_min_index_view
+ * @see monoid_base
+ *
+ * @ingroup ReducersMinMax
+ */
+template <typename Content, typename Less, typename Compare>
+class view_base :
+ // comparator_base comes first to ensure that it will get empty base class
+ // treatment
+ private comparator_base<typename Content::comp_value_type, Compare>,
+ private Content
+{
+ typedef comparator_base<typename Content::comp_value_type, Compare> base;
+ using base::compare;
+ using Content::value;
+ using Content::set_value;
+ using Content::comp_value;
+ typedef Content content_type;
+
+ template <typename View> friend class rhs_proxy;
+ template <typename View>
+ friend rhs_proxy<View> make_proxy(const typename View::value_type& value, const View& view);
+
+public:
+
+ /** @name Monoid support.
+ */
+ //@{
+
+ /** Value type. Required by @ref monoid_with_view.
+ */
+ typedef typename Content::value_type value_type;
+
+ /** The type of the comparator specified by the user, that defines the
+ * ordering on @a Type. Required by min_max::monoid_base.
+ */
+ typedef Less less_type;
+
+ /** The type of the comparator actually used by the view. Required by
+ * min_max::monoid_base. (This is the same as the @ref less_type for a
+ * max reducer, or `reverse_predicate<less_type>` for a min reducer.)
+ */
+ typedef Compare compare_type;
+
+ /** Reduce operation. Required by @ref monoid_with_view.
+ */
+ void reduce(view_base* other)
+ {
+ if ( other->is_set() &&
+ ( !this->is_set() ||
+ compare(this->comp_value(), other->comp_value()) ) )
+ {
+ this->set_value(other->value());
+ }
+ }
+
+ //@}
+
+ /** Default constructor. Initializes to identity value.
+ */
+ explicit view_base(const compare_type* compare) :
+ base(compare), Content() {}
+
+ /** Value constructor.
+ */
+ template <typename T1>
+ view_base(const T1& x1, const compare_type* compare) :
+ base(compare), Content(x1) {}
+
+ /** Value constructor.
+ */
+ template <typename T1, typename T2>
+ view_base(const T1& x1, const T2& x2, const compare_type* compare) :
+ base(compare), Content(x1, x2) {}
+
+
+ /** Move-in constructor.
+ */
+ explicit view_base(move_in_wrapper<value_type> w, const compare_type* compare) :
+ base(compare), Content(w.value()) {}
+
+ /** @name Reducer support.
+ */
+ //@{
+
+ void view_move_in(value_type& v) { set_value(v); }
+ void view_move_out(value_type& v) { v = value(); }
+ void view_set_value(const value_type& v) { set_value(v); }
+ value_type view_get_value() const { return value(); }
+ // view_get_reference() NOT SUPPORTED
+
+ //@}
+
+ /** Is the value defined?
+ */
+ using Content::is_set;
+
+ /** Reference to contained value data member.
+ * @deprecated For legacy reducers only.
+ */
+ using Content::get_reference;
+
+ /** Reference to contained index data member.
+ * (Meaningless for non-index reducers.)
+ * @deprecated For legacy reducers only.
+ */
+ using Content::get_index_reference;
+
+protected:
+
+ /** Update the min/max value.
+ */
+ void calc(const value_type& x)
+ {
+ if (!is_set() || compare(comp_value(), comp_value(x))) set_value(x);
+ }
+
+ /** Assign the result of a `{min|max}_of(view, value)` expression to the
+ * view.
+ *
+ * @see rhs_proxy
+ */
+ template <typename View>
+ void assign(const rhs_proxy<View>& rhs)
+ {
+ calc(rhs.value(this));
+ }
+
+};
+
+
+/** Base class for min and max monoid classes.
+ *
+ * The unique characteristic of minimum and maximum reducers is that they
+ * incorporate a comparator functor that defines what “minimum” or “maximum”
+ * means. The monoid for a reducer contains the comparator that will be used
+ * for the reduction. If the comparator is a function or a class with state,
+ * then each view will have a pointer to the comparator.
+ *
+ * This means that the `construct()` functions first construct the monoid
+ * (possibly with an explicit comparator argument), and then construct the
+ * view with a pointer to the monoid’s comparator.
+ *
+ * @tparam View The view class.
+ * @tparam Align If true, reducers instantiated on this monoid will be
+ * aligned. By default, library reducers (unlike legacy
+ * library reducer _wrappers_) are unaligned.
+ *
+ * @see view_base
+ *
+ * @ingroup ReducersMinMax
+ */
+template <typename View, bool Align = false>
+class monoid_base : public monoid_with_view<View, Align>
+{
+ typedef typename View::compare_type compare_type;
+ typedef typename View::less_type less_type;
+ const compare_type m_compare;
+
+ const compare_type* compare_pointer() const { return &m_compare; }
+
+ using cilk::monoid_base<typename View::value_type, View>::provisional;
+
+public:
+
+ /** Default constructor uses default comparator.
+ */
+ monoid_base() : m_compare() {}
+
+ /** Constructor.
+ *
+ * @param compare The comparator to use.
+ */
+ monoid_base(const compare_type& compare) : m_compare(compare) {}
+
+ /** Create an identity view.
+ *
+ * List view identity constructors take the list allocator as an argument.
+ *
+ * @param v The address of the uninitialized memory in which the view
+ * will be constructed.
+ */
+ void identity(View *v) const { ::new((void*) v) View(compare_pointer()); }
+
+ /** @name construct functions
+ *
+ * Min/max monoid `construct()` functions optionally take one or two value
+ * arguments, a @ref move_in argument, and/or a comparator argument.
+ */
+ //@{
+
+ template <typename Monoid>
+ static void construct(Monoid* monoid, View* view)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(monoid->compare_pointer()) ); }
+
+ template <typename Monoid, typename T1>
+ static void construct(Monoid* monoid, View* view, const T1& x1)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1, monoid->compare_pointer()) ); }
+
+ template <typename Monoid, typename T1, typename T2>
+ static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2)
+ { provisional( new ((void*)monoid) Monoid() ).confirm_if(
+ new ((void*)view) View(x1, x2, monoid->compare_pointer()) ); }
+
+ template <typename Monoid>
+ static void construct(Monoid* monoid, View* view, const less_type& compare)
+ { provisional( new ((void*)monoid) Monoid(compare) ).confirm_if(
+ new ((void*)view) View(monoid->compare_pointer()) ); }
+
+ template <typename Monoid, typename T1>
+ static void construct(Monoid* monoid, View* view, const T1& x1, const less_type& compare)
+ { provisional( new ((void*)monoid) Monoid(compare) ).confirm_if(
+ new ((void*)view) View(x1, monoid->compare_pointer()) ); }
+
+ template <typename Monoid, typename T1, typename T2>
+ static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2, const less_type& compare)
+ { provisional( new ((void*)monoid) Monoid(compare) ).confirm_if(
+ new ((void*)view) View(x1, x2, monoid->compare_pointer()) ); }
+
+ //@}
+};
+
+} //namespace min_max_internal
+
+
+/** @defgroup ReducersMinMaxMaxValue Maximum reducers (value only)
+ *
+ * These reducers will find the largest value from a set of values.
+ *
+ * @ingroup ReducersMinMax
+ */
+//@{
+
+/** The maximum reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_max<Type, Compare> >`. It accumulates the maximum,
+ * as determined by a comparator, of a set of values which have occurred as
+ * arguments to the `calc_max()` function. The accumulated value will be the
+ * first argument `x` such that `compare(x, y)` is false for every argument
+ * `y`.
+ *
+ * If the comparator is `std::less`, then the accumulated value is the first
+ * argument value which is not less than any other argument value, i.e., the
+ * maximum.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `calc_max()` function would be used in an expression like
+ * `r->calc_max(a)` where `r` is an op_max reducer variable.
+ *
+ * @tparam Type The type of the values compared by the reducer. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
+ * @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It
+ * defines the “less than” relation used to compute the
+ * maximum.
+ *
+ * @see ReducersMinMax
+ * @see op_max
+ */
+template <typename Type, typename Compare>
+class op_max_view : public min_max_internal::view_base<
+ min_max_internal::view_content<Type, Compare, true>,
+ Compare,
+ Compare>
+{
+ typedef min_max_internal::view_base<
+ min_max_internal::view_content<Type, Compare, true>,
+ Compare,
+ Compare> base;
+ using base::calc;
+ using base::assign;
+ friend class min_max_internal::rhs_proxy<op_max_view>;
+
+public:
+
+ /** @name Constructors.
+ *
+ * All op_max_view constructors simply pass their arguments on to the
+ * @ref view_base base class.
+ */
+ //@{
+
+ op_max_view() : base() {}
+
+ template <typename T1>
+ op_max_view(const T1& x1) : base(x1) {}
+
+ template <typename T1, typename T2>
+ op_max_view(const T1& x1, const T2& x2) : base(x1, x2) {}
+
+ //@}
+
+ /** @name View modifier operations.
+ */
+ //@{
+
+ /** Maximize with a value.
+ *
+ * If @a x is greater than the current value of the view (as defined by
+ * the reducer’s comparator), or if the view was created without an
+ * initial value and its value has never been updated (with `calc_max()`
+ * or `= max_of()`), then the value of the view is set to @a x.
+ *
+ * @param x The value to maximize the view’s value with.
+ *
+ * @return A reference to the view. (Allows chaining
+ * `view.comp_max(a).comp_max(b)…`.)
+ */
+ op_max_view& calc_max(const Type& x) { calc(x); return *this; }
+
+ /** Assign the result of a `max_of(view, value)` expression to the view.
+ *
+ * @param rhs An rhs_proxy value created by a `max_of(view, value)`
+ * expression.
+ *
+ * @return A reference to the view.
+ *
+ * @see min_max_internal::view_base::rhs_proxy
+ */
+ op_max_view& operator=(const min_max_internal::rhs_proxy<op_max_view>& rhs)
+ { assign(rhs); return *this; }
+
+ //@}
+};
+
+
+/** Compute the maximum of the value in an op_max_view and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or used in another max_of() call. For example,
+ *
+ * *reducer = max_of(*reducer, x);
+ * *reducer = max_of(x, *reducer);
+ *
+ * @see min_max_internal::rhs_proxy
+ */
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> >
+max_of(const op_max_view<Type, Compare>& view, const Type& value)
+{
+ return min_max_internal::make_proxy(value, view);
+}
+
+/// @copydoc max_of(const op_max_view<Type, Compare>&, const Type&)
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> >
+max_of(const Type& value, const op_max_view<Type, Compare>& view)
+{
+ return min_max_internal::make_proxy(value, view);
+}
+
+/** Nested maximum computation.
+ *
+ * Compute the maximum of the result of a max_of() call and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or wrapper, or used in another max_of() call. For example,
+ *
+ * *reducer = max_of(x, max_of(y, *reducer));
+ * wrapper = max_of(max_of(wrapper, x), y);
+ *
+ * @see min_max_internal::rhs_proxy
+ */
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> >
+max_of(const min_max_internal::rhs_proxy< op_max_view<Type, Compare> >& proxy,
+ const Type& value)
+{
+ return proxy.calc(value);
+}
+
+/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_view<Type, Compare> >&, const Type&)
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> >
+max_of(const Type& value,
+ const min_max_internal::rhs_proxy< op_max_view<Type, Compare> >& proxy)
+{
+ return proxy.calc(value);
+}
+
+
+/** Monoid class for maximum reductions. Instantiate the cilk::reducer template
+ * class with an op_max monoid to create a maximum reducer class. For example,
+ * to compute the maximum of a set of `int` values:
+ *
+ * cilk::reducer< cilk::op_max<int> > r;
+ *
+ * @see ReducersMinMax
+ * @see op_max_view
+ */
+template <typename Type, typename Compare=std::less<Type>, bool Align = false>
+class op_max :
+ public min_max_internal::monoid_base<op_max_view<Type, Compare>, Align>
+{
+ typedef min_max_internal::monoid_base<op_max_view<Type, Compare>, Align>
+ base;
+public:
+ /// Construct with default comparator.
+ op_max() {}
+ /// Construct with specified comparator.
+ op_max(const Compare& compare) : base(compare) {}
+};
+
+//@}
+
+
+/** @defgroup ReducersMinMaxMinValue Minimum reducers (value only)
+ *
+ * These reducers will find the smallest value from a set of values.
+ *
+ * @ingroup ReducersMinMax
+ */
+//@{
+
+/** The minimum reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_min<Type, Compare> >`. It accumulates the minimum,
+ * as determined by a comparator, of a set of values which have occurred as
+ * arguments to the `calc_min()` function. The accumulated value will be the
+ * first argument `x` such that `compare(y, x)` is false for every argument
+ * `y`.
+ *
+ * If the comparator is `std::less`, then the accumulated value is the first
+ * argument value which no other argument value is less than, i.e., the
+ * minimum.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `calc_min()` function would be used in an expression like
+ * `r->calc_min(a)` where `r` is an op_min reducer variable.
+ *
+ * @tparam Type The type of the values compared by the reducer. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
+ * @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It
+ * defines the “less than” relation used to compute the
+ * minimum.
+ *
+ * @see ReducersMinMax
+ * @see op_min
+ */
+template <typename Type, typename Compare>
+class op_min_view : public min_max_internal::view_base<
+ min_max_internal::view_content<Type, Compare, false>,
+ Compare,
+ min_max_internal::reverse_predicate<Compare, Type> >
+{
+ typedef min_max_internal::view_base<
+ min_max_internal::view_content<Type, Compare, false>,
+ Compare,
+ min_max_internal::reverse_predicate<Compare, Type> > base;
+ using base::calc;
+ using base::assign;
+ friend class min_max_internal::rhs_proxy<op_min_view>;
+
+public:
+ /** @name Constructors.
+ *
+ * All op_min_view constructors simply pass their arguments on to the
+ * @ref view_base base class.
+ */
+ //@{
+
+ op_min_view() : base() {}
+
+ template <typename T1>
+ op_min_view(const T1& x1) : base(x1) {}
+
+ template <typename T1, typename T2>
+ op_min_view(const T1& x1, const T2& x2) : base(x1, x2) {}
+
+ //@}
+
+ /** @name View modifier operations.
+ */
+ //@{
+
+ /** Minimize with a value.
+ *
+ * If @a x is less than the current value of the view (as defined by the
+ * reducer’s comparator), or if the view was created without an initial
+ * value and its value has never been updated (with `calc_min()` or
+ * `= min_of()`), then the value of the view is set to @a x.
+ *
+ * @param x The value to minimize the view’s value with.
+ *
+ * @return A reference to the view. (Allows chaining
+ * `view.comp_min(a).comp_min(b)…`.)
+ */
+ op_min_view& calc_min(const Type& x) { calc(x); return *this; }
+
+ /** Assign the result of a `min_of(view, value)` expression to the view.
+ *
+ * @param rhs An rhs_proxy value created by a `min_of(view, value)`
+ * expression.
+ *
+ * @return A reference to the view.
+ *
+ * @see min_max_internal::view_base::rhs_proxy
+ */
+ op_min_view& operator=(const min_max_internal::rhs_proxy<op_min_view>& rhs)
+ { assign(rhs); return *this; }
+};
+
+
+/** Compute the minimum of the value in a view and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or used in another min_of() call. For example,
+ *
+ * *reducer = min_of(*reducer, x);
+ * *reducer = min_of(x, *reducer);
+ *
+ * @see min_max_internal::view_base::rhs_proxy
+ */
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> >
+min_of(const op_min_view<Type, Compare>& view, const Type& value)
+{
+ return min_max_internal::make_proxy(value, view);
+}
+
+/// @copydoc min_of(const op_min_view<Type, Compare>&, const Type&)
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> >
+min_of(const Type& value, const op_min_view<Type, Compare>& view)
+{
+ return min_max_internal::make_proxy(value, view);
+}
+
+/** Nested minimum computation.
+ *
+ * Compute the minimum of the result of a min_of() call and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or wrapper, or used in another min_of() call. For example,
+ *
+ * *reducer = min_of(x, min_of(y, *reducer));
+ * wrapper = min_of(min_of(wrapper, x), y);
+ *
+ * @see min_max_internal::rhs_proxy
+ */
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> >
+min_of(const min_max_internal::rhs_proxy< op_min_view<Type, Compare> >& proxy,
+ const Type& value)
+{
+ return proxy.calc(value);
+}
+
+/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_view<Type, Compare> >&, const Type&)
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> >
+min_of(const Type& value,
+ const min_max_internal::rhs_proxy< op_min_view<Type, Compare> >& proxy)
+{
+ return proxy.calc(value);
+}
+
+
+/** Monoid class for minimum reductions. Instantiate the cilk::reducer template
+ * class with an op_min monoid to create a minimum reducer class. For example,
+ * to compute the minimum of a set of `int` values:
+ *
+ * cilk::reducer< cilk::op_min<int> > r;
+ *
+ * @see ReducersMinMax
+ * @see op_min_view
+ */
+template <typename Type, typename Compare=std::less<Type>, bool Align = false>
+class op_min : public min_max_internal::monoid_base<op_min_view<Type, Compare>, Align> {
+ typedef min_max_internal::monoid_base<op_min_view<Type, Compare>, Align> base;
+public:
+ /// Construct with default comparator.
+ op_min() {}
+ /// Construct with specified comparator.
+ op_min(const Compare& compare) : base(compare) {}
+};
+
+//@}
+
+
+/** @defgroup ReducersMinMaxMaxIndex Maximum reducers (value and index)
+ *
+ * These reducers will find the largest value from a set of values, and its
+ * index in the set.
+ *
+ * @ingroup ReducersMinMax
+ */
+//@{
+
+/** The maximum index reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_max_index<Index, Type, Compare> >`. It accumulates
+ * the maximum, as determined by a comparator, of a set of values which have
+ * occurred as arguments to the `calc_max()` function, and records the index
+ * of the maximum value. The accumulated value will be the first argument `x`
+ * such that `compare(x, y)` is false for every argument `y`.
+ *
+ * If the comparator is `std::less`, then the accumulated value is the first
+ * argument value which is not less than any other argument value, i.e., the
+ * maximum.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `calc_max()` function would be used in an expression like
+ * `r->calc_max(i, a)`where `r` is an op_max_index reducer
+ * variable.
+ *
+ * @note The word “index” suggests an integer index into an array, but there
+ * is no restriction on the index type or how it should be used. In
+ * general, it may be convenient to use it for any kind of key that
+ * can be used to locate the maximum value in the collection that it
+ * came from — for example:
+ * - An index into an array.
+ * - A key into an STL map.
+ * - An iterator into any STL container.
+ *
+ * @note A max_index reducer is essentially a max reducer whose value type
+ * is a `std::pair<Index, Type>`. This fact is camouflaged in the view
+ * `calc_max` function, the global `max_of` functions, and the reducer
+ * value constructor, which can all take an index argument and a value
+ * argument as an alternative to a single `std::pair` argument.
+ * However, the reducer `set_value()`, `get_value()`, `move_in()`, and
+ * `move_out()` functions work only with pairs, not with individual
+ * value and/or index arguments.
+ *
+ * @tparam Index The type of the indices associated with the values.
+ * @tparam Type The type of the values compared by the reducer. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
+ * @tparam Compare Used to compare the values. It must be a binary predicate.
+ * If it is omitted, then the view computes the conventional
+ * arithmetic maximum.
+ *
+ * @see ReducersMinMax
+ * @see op_max_index
+ */
+template <typename Index, typename Type, typename Compare>
+class op_max_index_view : public min_max_internal::view_base<
+ min_max_internal::index_view_content<Index, Type, Compare, true>,
+ Compare,
+ Compare>
+{
+ typedef min_max_internal::view_base<
+ min_max_internal::index_view_content<Index, Type, Compare, true>,
+ Compare,
+ Compare> base;
+ using base::calc;
+ using base::assign;
+ typedef std::pair<Index, Type> pair_type;
+ friend class min_max_internal::rhs_proxy<op_max_index_view>;
+
+public:
+ /** @name Constructors.
+ *
+ * All op_max_index_view constructors simply pass their arguments on to the
+ * @ref view_base base class, except for the `(index, value [, compare])`
+ * constructors, which create a `std::pair` containing the index and value.
+ */
+ //@{
+
+ op_max_index_view() : base() {}
+
+ template <typename T1>
+ op_max_index_view(const T1& x1) : base(x1) {}
+
+ template <typename T1, typename T2>
+ op_max_index_view(const T1& x1, const T2& x2) : base(x1, x2) {}
+
+ template <typename T1, typename T2, typename T3>
+ op_max_index_view(const T1& x1, const T2& x2, const T3& x3) : base(x1, x2, x3) {}
+
+ op_max_index_view(const Index& i, const Type& v) : base(pair_type(i, v)) {}
+
+ op_max_index_view(const Index& i, const Type& v, const typename base::compare_type* c) :
+ base(pair_type(i, v), c) {}
+
+ //@}
+
+ /** Maximize with a value and index.
+ *
+ * If @a x is greater than the current value of the view (as defined by
+ * the reducer’s comparator), or if the view was created without an
+ * initial value and its value has never been updated (with `calc_max()`
+ * or `= max_of()`), then the value of the view is set to @a x, and the
+ * index is set to @a i..
+ *
+ * @param i The index of the value @a x.
+ * @param x The value to maximize the view’s value with.
+ *
+ * @return A reference to the view. (Allows
+ * `view.comp_max(i, a).comp_max(j, b)…`.)
+ */
+ op_max_index_view& calc_max(const Index& i, const Type& x)
+ { calc(pair_type(i, x)); return *this; }
+
+ /** Maximize with an index/value pair.
+ *
+ * If @a pair.second is greater than the current value of the view (as
+ * defined by the reducer’s comparator), or if the view was created
+ * without an initial value and its value has never been updated (with
+ * `calc_max()` or `= max_of()`), then the value of the view is set to
+ * @a pair.second, and the index is set to @a pair.first.
+ *
+ * @param pair A pair containing a value to maximize the view’s value
+ * with and its associated index.
+ *
+ * @return A reference to the view. (Allows
+ * `view.comp_max(p1).comp_max(p2)…`.)
+ */
+ op_max_index_view& calc_max(const pair_type& pair)
+ { calc(pair); return *this; }
+
+ /** Assign the result of a `max_of(view, index, value)` expression to the
+ * view.
+ *
+ * @param rhs An rhs_proxy value created by a `max_of(view, index, value)`
+ * expression.
+ *
+ * @return A reference to the view.
+ *
+ * @see min_max_internal::view_base::rhs_proxy
+ */
+ op_max_index_view& operator=(const min_max_internal::rhs_proxy<op_max_index_view>& rhs)
+ { assign(rhs); return *this; }
+};
+
+
+/** Compute the maximum of the value in a view and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or used in another max_of() call. For example,
+ *
+ * *reducer = max_of(*reducer, i, x);
+ * *reducer = max_of(i, x, *reducer);
+ *
+ * @see min_max_internal::rhs_proxy
+ */
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const op_max_index_view<Index, Type, Compare>& view,
+ const Index& index, const Type& value)
+{
+ return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view);
+}
+
+/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const Index& index, const Type& value,
+ const op_max_index_view<Index, Type, Compare>& view)
+{
+ return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view);
+}
+
+/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const op_max_index_view<Index, Type, Compare>& view,
+ const std::pair<Index, Type>& pair)
+{
+ return min_max_internal::make_proxy(pair, view);
+}
+
+/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const std::pair<Index, Type>& pair,
+ const op_max_index_view<Index, Type, Compare>& view)
+{
+ return min_max_internal::make_proxy(pair, view);
+}
+
+/** Nested computation of the maximum of the value in a view and other values.
+ *
+ * Compute the maximum of the result of a max_of() call and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or used in another max_of() call. For example,
+ *
+ * *reducer = max_of(x, max_of(y, *reducer));
+ * *reducer = max_of(max_of(*reducer, x), y);
+ *
+ * @see min_max_internal::rhs_proxy
+ */
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy,
+ const Index& index, const Type& value)
+{
+ return proxy.calc(std::pair<Index, Type>(index, value));
+}
+
+/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const Index& index, const Type& value,
+ const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy)
+{
+ return proxy.calc(std::pair<Index, Type>(index, value));
+}
+
+/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy,
+ const std::pair<Index, Type>& pair)
+{
+ return proxy.calc(pair);
+}
+
+/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >
+max_of(const std::pair<Index, Type>& pair,
+ const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy)
+{
+ return proxy.calc(pair);
+}
+
+
+/** Monoid class for maximum reductions with index. Instantiate the
+ * cilk::reducer template class with an op_max_index monoid to create a
+ * max_index reducer class. For example, to compute the maximum of an array of
+ * `double` values and the array index of the max value:
+ *
+ * cilk::reducer< cilk::op_max_index<unsigned, double> > r;
+ *
+ * @see ReducersMinMax
+ * @see op_max_index_view
+ */
+template < typename Index
+ , typename Type
+ , typename Compare=std::less<Type>
+ , bool Align = false
+ >
+class op_max_index : public min_max_internal::monoid_base<op_max_index_view<Index, Type, Compare>, Align>
+{
+ typedef min_max_internal::monoid_base<
+ op_max_index_view<Index, Type, Compare>, Align> base;
+public:
+ /// Construct with default comparator.
+ op_max_index() {}
+ /// Construct with specified comparator.
+ op_max_index(const Compare& compare) : base(compare) {}
+};
+
+//@}
+
+
+
+/** @defgroup ReducersMinMaxMinIndex Minimum reducers (value and index)
+ *
+ * These reducers will find the smallest value from a set of values, and its
+ * index in the set.
+ *
+ * @ingroup ReducersMinMax
+ */
+//@{
+
+/** The minimum index reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer<cilk::op_min_index<Index, Type, Compare> >`. It accumulates
+ * the minimum, as determined by a comparator, of a set of values which have
+ * occurred as arguments to the `calc_min()` function, and records the index
+ * of the minimum value. The accumulated value will be the first argument `x`
+ * such that `compare(y, x)` is false for every argument `y`.
+ *
+ * If the comparator is `std::less`, then the accumulated value is the first
+ * argument value which no other argument value is less than, i.e., the
+ * minimum.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `calc_min()` function would be
+ * used in an expression like `r->calc_min(i, a)`where `r` is an
+ * op_min_index reducer variable.
+ *
+ * @note The word “index” suggests an integer index into an array, but there
+ * is no restriction on the index type or how it should be used. In
+ * general, it may be convenient to use it for any kind of key that
+ * can be used to locate the minimum value in the collection that it
+ * came from — for example:
+ * - An index into an array.
+ * - A key into an STL map.
+ * - An iterator into any STL container.
+ *
+ * @note A min_index reducer is essentially a min reducer whose value type
+ * is a `std::pair<Index, Type>`. This fact is camouflaged in the view
+ * `calc_min` function, the global `min_of` functions, and the reducer
+ * value constructor, which can all take an index argument and a value
+ * argument as an alternative to a single `std::pair` argument.
+ * However, the reducer `set_value()`, `get_value()`, `move_in()`, and
+ * `move_out()` functions work only with pairs, not with individual
+ * value and/or index arguments.
+ *
+ * @tparam Index The type of the indices associated with the values.
+ * @tparam Type The type of the values compared by the reducer. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
+ * @tparam Compare Used to compare the values. It must be a binary predicate.
+ * If it is omitted, then the view computes the conventional
+ * arithmetic minimum.
+ *
+ * @see ReducersMinMax
+ * @see op_min_index
+ */
+template <typename Index, typename Type, typename Compare>
+class op_min_index_view : public min_max_internal::view_base<
+ min_max_internal::index_view_content<Index, Type, Compare, false>,
+ Compare,
+ min_max_internal::reverse_predicate<Compare, Type> >
+{
+ typedef min_max_internal::view_base<
+ min_max_internal::index_view_content<Index, Type, Compare, false>,
+ Compare,
+ min_max_internal::reverse_predicate<Compare, Type> > base;
+ using base::calc;
+ using base::assign;
+ typedef std::pair<Index, Type> pair_type;
+ friend class min_max_internal::rhs_proxy<op_min_index_view>;
+
+public:
+ /** @name Constructors.
+ *
+ * All op_min_index_view constructors simply pass their arguments on to the
+ * @ref view_base base class, except for the `(index, value [, compare])`
+ * constructors, which create a `std::pair` containing the index and value.
+ */
+ //@{
+
+ op_min_index_view() : base() {}
+
+ template <typename T1>
+ op_min_index_view(const T1& x1) : base(x1) {}
+
+ template <typename T1, typename T2>
+ op_min_index_view(const T1& x1, const T2& x2) : base(x1, x2) {}
+
+ template <typename T1, typename T2, typename T3>
+ op_min_index_view(const T1& x1, const T2& x2, const T3& x3) : base(x1, x2, x3) {}
+
+ op_min_index_view(const Index& i, const Type& v) : base(pair_type(i, v)) {}
+
+ op_min_index_view(const Index& i, const Type& v, const typename base::compare_type* c) :
+ base(pair_type(i, v), c) {}
+
+ //@}
+
+ /** Minimize with a value and index.
+ *
+ * If @a x is greater than the current value of the view (as defined by
+ * the reducer’s comparator), or if the view was created without an
+ * initial value and its value has never been updated (with `calc_min()`
+ * or `= min_of()`), then the value of the view is set to @a x, and the
+ * index is set to @a i..
+ *
+ * @param i The index of the value @a x.
+ * @param x The value to minimize the view’s value with.
+ *
+ * @return A reference to the view. (Allows
+ * `view.comp_min(i, a).comp_min(j, b)…`.)
+ */
+ op_min_index_view& calc_min(const Index& i, const Type& x)
+ { calc(pair_type(i, x)); return *this; }
+
+ /** Maximize with an index/value pair.
+ *
+ * If @a pair.second is less than the current value of the view (as
+ * defined by the reducer’s comparator), or if the view was created
+ * without an initial value and its value has never been updated (with
+ * `calc_min()` or `= min_of()`), then the value of the view is set to
+ * @a pair.second, and the index is set to @a pair.first.
+ *
+ * @param pair A pair containing a value to minimize the view’s value
+ * with and its associated index.
+ *
+ * @return A reference to the view. (Allows
+ * `view.comp_min(p1).comp_min(p2)…`.)
+ */
+ op_min_index_view& calc_min(const pair_type& pair)
+ { calc(pair); return *this; }
+
+ /** Assign the result of a `min_of(view, index, value)` expression to the
+ * view.
+ *
+ * @param rhs An rhs_proxy value created by a `min_of(view, index, value)`
+ * expression.
+ *
+ * @return A reference to the view.
+ *
+ * @see min_max_internal::view_base::rhs_proxy
+ */
+ op_min_index_view& operator=(const min_max_internal::rhs_proxy<op_min_index_view>& rhs)
+ { assign(rhs); return *this; }
+};
+
+
+/** Compute the minimum of the value in a view and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or used in another min_of() call. For example,
+ *
+ * *reducer = min_of(*reducer, i, x);
+ * *reducer = min_of(i, x, *reducer);
+ *
+ * @see min_max_internal::min_min_view_base::rhs_proxy
+ */
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const op_min_index_view<Index, Type, Compare>& view,
+ const Index& index, const Type& value)
+{
+ return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view);
+}
+
+/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const Index& index, const Type& value,
+ const op_min_index_view<Index, Type, Compare>& view)
+{
+ return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view);
+}
+
+/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const op_min_index_view<Index, Type, Compare>& view,
+ const std::pair<Index, Type>& pair)
+{
+ return min_max_internal::make_proxy(pair, view);
+}
+
+/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const std::pair<Index, Type>& pair,
+ const op_min_index_view<Index, Type, Compare>& view)
+{
+ return min_max_internal::make_proxy(pair, view);
+}
+
+/** Nested computation of the minimum of the value in a view and other values.
+ *
+ * Compute the minimum of the result of a min_of() call and another value.
+ *
+ * The result of this computation can only be assigned back to the original
+ * view or used in another min_of() call. For example,
+ *
+ * *reducer = min_of(x, min_of(y, *reducer));
+ * *reducer = min_of(min_of(*reducer, x), y);
+ *
+ * @see min_max_internal::min_min_view_base::rhs_proxy
+ */
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy,
+ const Index& index, const Type& value)
+{
+ return proxy.calc(std::pair<Index, Type>(index, value));
+}
+
+/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const Index& index, const Type& value,
+ const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy)
+{
+ return proxy.calc(std::pair<Index, Type>(index, value));
+}
+
+/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy,
+ const std::pair<Index, Type>& pair)
+{
+ return proxy.calc(pair);
+}
+
+/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >&, const Index&, const Type&)
+template <typename Index, typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >
+min_of(const std::pair<Index, Type>& pair,
+ const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy)
+{
+ return proxy.calc(pair);
+}
+
+
+/** Monoid class for minimum reductions with index. Instantiate the
+ * cilk::reducer template class with an op_min_index monoid to create a
+ * min_index reducer class. For example, to compute the minimum of an array of
+ * `double` values and the array index of the min value:
+ *
+ * cilk::reducer< cilk::op_min_index<unsigned, double> > r;
+ *
+ * @see ReducersMinMax
+ * @see op_min_index_view
+ */
+template < typename Index
+ , typename Type
+ , typename Compare=std::less<Type>
+ , bool Align = false
+ >
+class op_min_index : public min_max_internal::monoid_base<op_min_index_view<Index, Type, Compare>, Align>
+{
+ typedef min_max_internal::monoid_base<
+ op_min_index_view<Index, Type, Compare>, Align> base;
+public:
+ /// Construct with default comparator.
+ op_min_index() {}
+ /// Construct with specified comparator.
+ op_min_index(const Compare& compare) : base(compare) {}
+};
+
+//@}
+
+
+/** Deprecated maximum reducer wrapper class.
+ *
+ * reducer_max is the same as @ref reducer<@ref op_max>, except that
+ * reducer_max is a proxy for the contained view, so that accumulator
+ * variable update operations can be applied directly to the reducer. For
+ * example, a value is maximized with a `reducer<%op_max>` with
+ * `r->calc_max(a)`, but a value can be maximized with a `%reducer_max` with
+ * `r.calc_max(a)`.
+ *
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_max.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_max`
+ * and `reducer<%op_max>`. This allows incremental code
+ * conversion: old code that used `%reducer_max` can pass a
+ * `%reducer_max` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_max>`, and vice
+ * versa. **But see @ref redminmax_compatibility.**
+ *
+ * @tparam Type The value type of the reducer.
+ * @tparam Compare The “less than” comparator type for the reducer.
+ *
+ * @see op_max
+ * @see op_max_view
+ * @see reducer
+ * @see ReducersMinMax
+ * @ingroup ReducersMinMaxMaxValue
+ */
+template <typename Type, typename Compare=std::less<Type> >
+class reducer_max : public reducer< op_max<Type, Compare, true> >
+{
+ __CILKRTS_STATIC_ASSERT(
+ ::cilk::internal::class_is_empty<
+ typename ::cilk::internal::binary_functor<Compare>::type >::value,
+ "cilk::reducer_max<Type, Compare> only works with "
+ "an empty Compare class");
+ typedef reducer< op_max<Type, Compare, true> > base;
+public:
+
+ /// Type of data in a reducer_max.
+ typedef Type basic_value_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type monoid_type;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /// The view’s rhs proxy type.
+ typedef min_max_internal::rhs_proxy<View> rhs_proxy;
+
+ using base::view;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /// Construct the wrapper in its identity state (either `!is_set()`, or
+ /// `value() == identity value`).
+ reducer_max() : base() {}
+
+ /// Construct the wrapper with a specified initial value.
+ explicit reducer_max(const Type& initial_value) : base(initial_value) {}
+
+ /// Construct the wrapper in its identity state with a specified
+ /// comparator.
+ explicit reducer_max(const Compare& comp) : base(comp) {}
+
+ /// Construct the wrapper with a specified initial value and a specified
+ /// comparator.
+ reducer_max(const Type& initial_value, const Compare& comp)
+ : base(initial_value, comp) {}
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_max_view. */
+ //@{
+
+ /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const
+ bool is_set() const { return view().is_set(); }
+
+ /// @copydoc op_max_view::calc_max(const Type&)
+ reducer_max& calc_max(const Type& x)
+ { view().calc_max(x); return *this; }
+
+ /// @copydoc op_max_view::operator=(const min_max_internal::rhs_proxy<op_max_view>&)
+ reducer_max& operator=(const rhs_proxy& rhs)
+ { view() = rhs; return *this; }
+
+ //@}
+
+ /** Allow read-only access to the value within the current view.
+ *
+ * @returns A const reference to the value within the current view.
+ */
+ const Type& get_reference() const { return view().get_reference(); }
+
+ /// @name Dereference
+ /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
+ * Combined with the rule that a wrapper forwards view operations to the
+ * view, this means that view operations can be written the same way on
+ * reducers and wrappers, which is convenient for incrementally
+ * converting code using wrappers to code using reducers. That is:
+ *
+ * reducer< op_max<int> > r;
+ * r->calc_max(a); // *r returns the view
+ * // calc_max is a view member function
+ *
+ * reducer_max<int> w;
+ * w->calc_max(a); // *w returns the wrapper
+ * // calc_max is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
+ reducer_max& operator*() { return *this; }
+ reducer_max const& operator*() const { return *this; }
+
+ reducer_max* operator->() { return this; }
+ reducer_max const* operator->() const { return this; }
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_max<Type, Compare, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_max<Type, Compare, false> >* >(this);
+ }
+
+ operator const reducer< op_max<Type, Compare, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_max<Type, Compare, false> >* >(this);
+ }
+ //@}
+};
+
+
+/// @cond internal
+// The legacy definition of max_of(reducer_max, value) has different
+// behavior and a different return type than this definition. We add an
+// unused third argument to this version of the function to give it a different
+// signature, so that they won’t end up sharing a single object file entry.
+struct max_of_1_0_t {};
+const max_of_1_0_t max_of_1_0 = {};
+/// @endcond
+
+/** Compute the maximum of the value in a reducer_max and another value.
+ *
+ * @deprecated Because reducer_max is deprecated.
+ *
+ * The result of this computation can only be assigned back to the original
+ * reducer or used in another max_of() call. For example,
+ *
+ * reducer = max_of(reducer, x);
+ * reducer = max_of(x, reducer);
+ *
+ * @see min_max_internal::rhs_proxy
+ *
+ * @ingroup ReducersMinMaxMaxValue
+ */
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> >
+max_of(const reducer_max<Type, Compare>& r, const Type& value,
+ const max_of_1_0_t& = max_of_1_0)
+{
+ return min_max_internal::make_proxy(value, r.view());
+}
+
+/// @copydoc max_of(const reducer_max<Type, Compare>&, const Type&, const max_of_1_0_t&)
+/// @ingroup ReducersMinMaxMaxValue
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> >
+max_of(const Type& value, const reducer_max<Type, Compare>& r,
+ const max_of_1_0_t& = max_of_1_0)
+{
+ return min_max_internal::make_proxy(value, r.view());
+}
+
+
+/** Deprecated minimum reducer wrapper class.
+ *
+ * reducer_min is the same as @ref reducer<@ref op_min>, except that
+ * reducer_min is a proxy for the contained view, so that accumulator
+ * variable update operations can be applied directly to the reducer. For
+ * example, a value is minimized with a `reducer<%op_min>` with
+ * `r->calc_min(a)`, but a value can be minimized with a `%reducer_min` with
+ * `r.calc_min(a)`.
+ *
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_min.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_min`
+ * and `reducer<%op_min>`. This allows incremental code
+ * conversion: old code that used `%reducer_min` can pass a
+ * `%reducer_min` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_min>`, and vice
+ * versa. **But see @ref redminmax_compatibility.**
+ *
+ * @tparam Type The value type of the reducer.
+ * @tparam Compare The “less than” comparator type for the reducer.
+ *
+ * @see op_min
+ * @see op_min_view
+ * @see reducer
+ * @see ReducersMinMax
+ * @ingroup ReducersMinMaxMinValue
+ */
+template <typename Type, typename Compare=std::less<Type> >
+class reducer_min : public reducer< op_min<Type, Compare, true> >
+{
+ __CILKRTS_STATIC_ASSERT(
+ ::cilk::internal::class_is_empty<
+ typename ::cilk::internal::binary_functor<Compare>::type >::value,
+ "cilk::reducer_min<Type, Compare> only works with "
+ "an empty Compare class");
+ typedef reducer< op_min<Type, Compare, true> > base;
+public:
+
+ /// Type of data in a reducer_min.
+ typedef Type basic_value_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type monoid_type;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /// The view’s rhs proxy type.
+ typedef min_max_internal::rhs_proxy<View> rhs_proxy;
+
+ using base::view;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /// Construct the wrapper in its identity state (either `!is_set()`, or
+ /// `value() == identity value`).
+ reducer_min() : base() {}
+
+ /// Construct the wrapper with a specified initial value.
+ explicit reducer_min(const Type& initial_value) : base(initial_value) {}
+
+ /// Construct the wrapper in its identity state with a specified
+ /// comparator.
+ explicit reducer_min(const Compare& comp) : base(comp) {}
+
+ /// Construct the wrapper with a specified initial value and a specified
+ /// comparator.
+ reducer_min(const Type& initial_value, const Compare& comp)
+ : base(initial_value, comp) {}
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_min_view. */
+ //@{
+
+ /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const
+ bool is_set() const { return view().is_set(); }
+
+ /// @copydoc op_min_view::calc_min(const Type&)
+ reducer_min& calc_min(const Type& x)
+ { view().calc_min(x); return *this; }
+
+ /// @copydoc op_min_view::operator=(const min_max_internal::rhs_proxy<op_min_view>&)
+ reducer_min& operator=(const rhs_proxy& rhs)
+ { view() = rhs; return *this; }
+
+ //@}
+
+ /** Allow read-only access to the value within the current view.
+ *
+ * @returns A const reference to the value within the current view.
+ */
+ const Type& get_reference() const { return view().get_reference(); }
+
+ /// @name Dereference
+ /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
+ * Combined with the rule that a wrapper forwards view operations to the
+ * view, this means that view operations can be written the same way on
+ * reducers and wrappers, which is convenient for incrementally
+ * converting code using wrappers to code using reducers. That is:
+ *
+ * reducer< op_min<int> > r;
+ * r->calc_min(a); // *r returns the view
+ * // calc_min is a view member function
+ *
+ * reducer_min<int> w;
+ * w->calc_min(a); // *w returns the wrapper
+ * // calc_min is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
+ reducer_min& operator*() { return *this; }
+ reducer_min const& operator*() const { return *this; }
+
+ reducer_min* operator->() { return this; }
+ reducer_min const* operator->() const { return this; }
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_min<Type, Compare, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_min<Type, Compare, false> >* >(this);
+ }
+
+ operator const reducer< op_min<Type, Compare, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_min<Type, Compare, false> >* >(this);
+ }
+ //@}
+};
+
+
+/** Compute the minimum of a reducer and a value.
+ *
+ * @deprecated Because reducer_min is deprecated.
+ */
+//@{
+// The legacy definition of min_of(reducer_min, value) has different
+// behavior and a different return type than this definition. We add an
+// unused third argument to this version of the function to give it a different
+// signature, so that they won’t end up sharing a single object file entry.
+struct min_of_1_0_t {};
+const min_of_1_0_t min_of_1_0 = {};
+
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> >
+min_of(const reducer_min<Type, Compare>& r, const Type& value,
+ const min_of_1_0_t& = min_of_1_0)
+{
+ return min_max_internal::make_proxy(value, r.view());
+}
+
+template <typename Type, typename Compare>
+inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> >
+min_of(const Type& value, const reducer_min<Type, Compare>& r,
+ const min_of_1_0_t& = min_of_1_0)
+{
+ return min_max_internal::make_proxy(value, r.view());
+}
+//@}
+
+
+/** Deprecated maximum with index reducer wrapper class.
+ *
+ * reducer_max_index is the same as @ref reducer<@ref op_max_index>, except
+ * that reducer_max_index is a proxy for the contained view, so that
+ * accumulator variable update operations can be applied directly to the
+ * reducer. For example, a value is maximized with a `reducer<%op_max_index>`
+ * with `r->calc_max(i, a)`, but a value can be maximized with a
+ * `%reducer_max` with `r.calc_max(i, aa)`.
+ *
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_max.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_max_index`
+ * and `reducer<%op_max_index>`. This allows incremental code
+ * conversion: old code that used `%reducer_max_index` can pass a
+ * `%reducer_max_index` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_max_index>`, and vice
+ * versa. **But see @ref redminmax_compatibility.**
+ *
+ * @tparam Index The index type of the reducer.
+ * @tparam Type The value type of the reducer.
+ * @tparam Compare The “less than” comparator type for the reducer.
+ *
+ * @see op_max_index
+ * @see op_max_index_view
+ * @see reducer
+ * @see ReducersMinMax
+ * @ingroup ReducersMinMaxMaxIndex
+ */
+template < typename Index
+ , typename Type
+ , typename Compare = std::less<Type>
+ >
+class reducer_max_index :
+ public reducer< op_max_index<Index, Type, Compare, true> >
+{
+ __CILKRTS_STATIC_ASSERT(
+ ::cilk::internal::class_is_empty<
+ typename ::cilk::internal::binary_functor<Compare>::type >::value,
+ "cilk::reducer_max_index<Type, Compare> only works with "
+ "an empty Compare class");
+ typedef reducer< op_max_index<Index, Type, Compare, true> > base;
+public:
+
+ /// Type of data in a reducer_max_index.
+ typedef Type basic_value_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type monoid_type;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /// The view’s rhs proxy type.
+ typedef min_max_internal::rhs_proxy<View> rhs_proxy;
+
+ using base::view;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /// Construct the wrapper in its identity state (`!is_set()`).
+ reducer_max_index() : base() {}
+
+ /// Construct with a specified initial index and value.
+ reducer_max_index(const Index& initial_index,
+ const Type& initial_value)
+ : base(initial_index, initial_value) {}
+
+ /// Construct the wrapper with a specified comparator.
+ explicit reducer_max_index(const Compare& comp) : base(comp) {}
+
+ /// Construct the wrapper with a specified initial index, value,
+ /// and comparator.
+ reducer_max_index(const Index& initial_index,
+ const Type& initial_value,
+ const Compare& comp)
+ : base(initial_index, initial_value, comp) {}
+
+ //@}
+
+ /** @name Set / Get
+ */
+ //@{
+
+ /// Set the index and value of this object.
+ void set_value(const Index& index, const Type& value)
+ { base::set_value(std::make_pair(index, value)); }
+
+ /// Return the maximum value.
+ const Type& get_value() const
+ { return view().get_reference(); }
+
+ /// Return the maximum index.
+ const Index& get_index() const
+ { return view().get_index_reference(); }
+
+ /// Return a const reference to value data member in the view.
+ const Type& get_reference() const
+ { return view().get_reference(); }
+
+ /// Return a const reference to index data member in the view.
+ const Index& get_index_reference() const
+ { return view().get_index_reference(); }
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_max_view. */
+ //@{
+
+ /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const
+ bool is_set() const { return view().is_set(); }
+
+ /// @copydoc op_max_index_view::calc_max(const Index&, const Type&)
+ reducer_max_index& calc_max(const Index& i, const Type& x)
+ { view().calc_max(i, x); return *this; }
+
+ /// @copydoc op_max_view::operator=(const min_max_internal::rhs_proxy<op_max_view>&)
+ reducer_max_index& operator=(const rhs_proxy& rhs)
+ { view() = rhs; return *this; }
+
+ //@}
+
+ /// @name Dereference
+ /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
+ * Combined with the rule that a wrapper forwards view operations to the
+ * view, this means that view operations can be written the same way on
+ * reducers and wrappers, which is convenient for incrementally
+ * converting code using wrappers to code using reducers. That is:
+ *
+ * reducer< op_max_index<int, int> > r;
+ * r->calc_max(i, a); // *r returns the view
+ * // calc_max is a view member function
+ *
+ * reducer_max_index<int, int> w;
+ * w->calc_max(i, a); // *w returns the wrapper
+ * // calc_max is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
+ reducer_max_index& operator*() { return *this; }
+ reducer_max_index const& operator*() const { return *this; }
+
+ reducer_max_index* operator->() { return this; }
+ reducer_max_index const* operator->() const { return this; }
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_max_index<Index, Type, Compare, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_max_index<Index, Type, Compare, false> >* >(this);
+ }
+
+ operator const reducer< op_max_index<Index, Type, Compare, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_max_index<Index, Type, Compare, false> >* >(this);
+ }
+ //@}
+
+};
+
+
+/** Deprecated minimum with index reducer wrapper class.
+ *
+ * reducer_min_index is the same as @ref reducer<@ref op_min_index>, except
+ * that reducer_min_index is a proxy for the contained view, so that
+ * accumulator variable update operations can be applied directly to the
+ * reducer. For example, a value is minimized with a `reducer<%op_min_index>`
+ * with `r->calc_min(i, a)`, but a value can be minimized with a
+ * `%reducer_min` with `r.calc_min(i, aa)`.
+ *
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_min.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_min_index`
+ * and `reducer<%op_min_index>`. This allows incremental code
+ * conversion: old code that used `%reducer_min_index` can pass a
+ * `%reducer_min_index` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_min_index>`, and vice
+ * versa. **But see @ref redminmax_compatibility.**
+ *
+ * @tparam Index The index type of the reducer.
+ * @tparam Type The value type of the reducer.
+ * @tparam Compare The “less than” comparator type for the reducer.
+ *
+ * @see op_min_index
+ * @see op_min_index_view
+ * @see reducer
+ * @see ReducersMinMax
+ * @ingroup ReducersMinMaxMinIndex
+ */
+template < typename Index
+ , typename Type
+ , typename Compare = std::less<Type>
+ >
+class reducer_min_index :
+ public reducer< op_min_index<Index, Type, Compare, true> >
+{
+ __CILKRTS_STATIC_ASSERT(
+ ::cilk::internal::class_is_empty<
+ typename ::cilk::internal::binary_functor<Compare>::type >::value,
+ "cilk::reducer_min_index<Type, Compare> only works with "
+ "an empty Compare class");
+ typedef reducer< op_min_index<Index, Type, Compare, true> > base;
+public:
+
+ /// Type of data in a reducer_min_index.
+ typedef Type basic_value_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view type for the reducer.
+ typedef typename base::view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type monoid_type;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /// The view’s rhs proxy type.
+ typedef min_max_internal::rhs_proxy<View> rhs_proxy;
+
+ using base::view;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /// Construct the wrapper in its identity state (`!is_set()`).
+ reducer_min_index() : base() {}
+
+ /// Construct with a specified initial index and value.
+ reducer_min_index(const Index& initial_index,
+ const Type& initial_value)
+ : base(initial_index, initial_value) {}
+
+ /// Construct the wrapper with a specified comparator.
+ explicit reducer_min_index(const Compare& comp) : base(comp) {}
+
+ /// Construct the wrapper with a specified initial index, value,
+ /// and comparator.
+ reducer_min_index(const Index& initial_index,
+ const Type& initial_value,
+ const Compare& comp)
+ : base(initial_index, initial_value, comp) {}
+
+ //@}
+
+ /** @name Set / Get
+ */
+ //@{
+
+ /// Set the index and value of this object.
+ void set_value(const Index& index, const Type& value)
+ { base::set_value(std::make_pair(index, value)); }
+
+ /// Return the minimum value.
+ const Type& get_value() const
+ { return view().get_reference(); }
+
+ /// Return the minimum index.
+ const Index& get_index() const
+ { return view().get_index_reference(); }
+
+ /// Return a const reference to value data member in the view.
+ const Type& get_reference() const
+ { return view().get_reference(); }
+
+ /// Return a const reference to index data member in the view.
+ const Index& get_index_reference() const
+ { return view().get_index_reference(); }
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_min_view. */
+ //@{
+
+ /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const
+ bool is_set() const { return view().is_set(); }
+
+ /// @copydoc op_min_index_view::calc_min(const Index&, const Type&)
+ reducer_min_index& calc_min(const Index& i, const Type& x)
+ { view().calc_min(i, x); return *this; }
+
+ /// @copydoc op_min_view::operator=(const min_max_internal::rhs_proxy<op_min_view>&)
+ reducer_min_index& operator=(const rhs_proxy& rhs)
+ { view() = rhs; return *this; }
+
+ //@}
+
+ /// @name Dereference
+ /** Dereferencing a wrapper is a no-op. It simply returns the wrapper.
+ * Combined with the rule that a wrapper forwards view operations to the
+ * view, this means that view operations can be written the same way on
+ * reducers and wrappers, which is convenient for incrementally
+ * converting code using wrappers to code using reducers. That is:
+ *
+ * reducer< op_min_index<int, int> > r;
+ * r->calc_min(i, a); // *r returns the view
+ * // calc_min is a view member function
+ *
+ * reducer_min_index<int, int> w;
+ * w->calc_min(i, a); // *w returns the wrapper
+ * // calc_min is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
+ reducer_min_index& operator*() { return *this; }
+ reducer_min_index const& operator*() const { return *this; }
+
+ reducer_min_index* operator->() { return this; }
+ reducer_min_index const* operator->() const { return this; }
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_min_index<Index, Type, Compare, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_min_index<Index, Type, Compare, false> >* >(this);
+ }
+
+ operator const reducer< op_min_index<Index, Type, Compare, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_min_index<Index, Type, Compare, false> >* >(this);
+ }
+ //@}
+
+};
+
+
+#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX
+} // namespace cilk_lib_1_0
+using namespace cilk_lib_1_0;
+#endif
+
+
+/// @cond internal
+/** Metafunction specialization for reducer conversion.
+ *
+ * These specializations of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes each `reducer< op_xxxx<Type> >` classes to have
+ * an `operator reducer_xxxx<Type>& ()` conversion operator that statically
+ * downcasts the `reducer<op_xxxx>` to the corresponding `reducer_xxxx` type.
+ * (The reverse conversion, from `reducer_xxxx` to `reducer<op_xxxx>`, is just
+ * an upcast, which is provided for free by the language.)
+ */
+template <typename Type, typename Compare, bool Align>
+struct legacy_reducer_downcast< reducer< op_max<Type, Compare, Align> > >
+{
+ typedef reducer_max<Type> type;
+};
+
+template <typename Type, typename Compare, bool Align>
+struct legacy_reducer_downcast< reducer< op_min<Type, Compare, Align> > >
+{
+ typedef reducer_min<Type> type;
+};
+
+template <typename Index, typename Type, typename Compare, bool Align>
+struct legacy_reducer_downcast< reducer< op_max_index<Index, Type, Compare, Align> > >
+{
+ typedef reducer_max_index<Index, Type> type;
+};
+
+template <typename Index, typename Type, typename Compare, bool Align>
+struct legacy_reducer_downcast< reducer< op_min_index<Index, Type, Compare, Align> > >
+{
+ typedef reducer_min_index<Index, Type> type;
+};
+/// @endcond
+
+} // namespace cilk
+
+#endif // __cplusplus
+
+
+/** @name C language reducer macros
+ *
+ * These macros are used to declare and work with numeric minimum and maximum reducers in C
+ * code.
+ *
+ * @see @ref page_reducers_in_c
+ */
+ //@{
+
+
+#ifdef CILK_C_DEFINE_REDUCERS
+
+/* Integer min/max constants */
+#include <limits.h>
+
+/* Wchar_t min/max constants */
+#if defined(_MSC_VER) || defined(ANDROID)
+# include <wchar.h>
+#else
+# include <stdint.h>
+#endif
+
+/* Floating-point min/max constants */
+#include <math.h>
+#ifndef HUGE_VALF
+ static const unsigned int __huge_valf[] = {0x7f800000};
+# define HUGE_VALF (*((const float *)__huge_valf))
+#endif
+
+#ifndef HUGE_VALL
+ static const unsigned int __huge_vall[] = {0, 0, 0x00007f80, 0};
+# define HUGE_VALL (*((const long double *)__huge_vall))
+#endif
+
+#endif
+
+/** Max reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the max reducer
+ * type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MAX_TYPE(tn) \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_,tn)
+
+/** Declare a max reducer object.
+ *
+ * This macro expands into a declaration of a max reducer object for a specified numeric
+ * type. For example:
+ *
+ * CILK_C_REDUCER_MAX(my_reducer, double, -DBL_MAX);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ * @param v The initial value for the reducer. (A value which can be assigned to the
+ * numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MAX(obj,tn,v) \
+ CILK_C_REDUCER_MAX_TYPE(tn) obj = \
+ CILK_C_INIT_REDUCER(_Typeof(obj.value), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_reduce_,tn), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_identity_,tn), \
+ __cilkrts_hyperobject_noop_destroy, v)
+
+/** Maximize with a value.
+ *
+ * `CILK_C_REDUCER_MAX_CALC(reducer, v)` sets the current view of the
+ * reducer to the max of its previous value and a specified new value.
+ * This is equivalent to
+ *
+ * REDUCER_VIEW(reducer) = max(REDUCER_VIEW(reducer), v)
+ *
+ * @param reducer The reducer whose contained value is to be updated.
+ * @param v The value that it is to be maximized with.
+ */
+#define CILK_C_REDUCER_MAX_CALC(reducer, v) do { \
+ _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
+ _Typeof(v) __value = (v); \
+ if (*view < __value) { \
+ *view = __value; \
+ } } while (0)
+
+/// @cond internal
+
+/** Declare the max reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which implement
+ * the reducer functionality for the max reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MAX_DECLARATION(t,tn,id) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r); \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn);
+
+/** Define the max reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement the
+ * reducer functionality for the max reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MAX_DEFINITION(t,tn,id) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r) \
+ { if (*(t*)l < *(t*)r) *(t*)l = *(t*)r; } \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn) \
+ { *(t*)v = id; }
+
+//@{
+/** @def CILK_C_REDUCER_MAX_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and
+ * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS`
+ * will be undefined, and this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_MAX_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MAX_DEFINITION(t,tn,id)
+#else
+# define CILK_C_REDUCER_MAX_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MAX_DECLARATION(t,tn,id)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+__CILKRTS_BEGIN_EXTERN_C
+CILK_C_REDUCER_MAX_INSTANCE(char, char, CHAR_MIN)
+CILK_C_REDUCER_MAX_INSTANCE(unsigned char, uchar, 0)
+CILK_C_REDUCER_MAX_INSTANCE(signed char, schar, SCHAR_MIN)
+CILK_C_REDUCER_MAX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN)
+CILK_C_REDUCER_MAX_INSTANCE(short, short, SHRT_MIN)
+CILK_C_REDUCER_MAX_INSTANCE(unsigned short, ushort, 0)
+CILK_C_REDUCER_MAX_INSTANCE(int, int, INT_MIN)
+CILK_C_REDUCER_MAX_INSTANCE(unsigned int, uint, 0)
+CILK_C_REDUCER_MAX_INSTANCE(unsigned int, unsigned, 0) // alternate name
+CILK_C_REDUCER_MAX_INSTANCE(long, long, LONG_MIN)
+CILK_C_REDUCER_MAX_INSTANCE(unsigned long, ulong, 0)
+CILK_C_REDUCER_MAX_INSTANCE(long long, longlong, LLONG_MIN)
+CILK_C_REDUCER_MAX_INSTANCE(unsigned long long, ulonglong, 0)
+CILK_C_REDUCER_MAX_INSTANCE(float, float, -HUGE_VALF)
+CILK_C_REDUCER_MAX_INSTANCE(double, double, -HUGE_VAL)
+CILK_C_REDUCER_MAX_INSTANCE(long double, longdouble, -HUGE_VALL)
+__CILKRTS_END_EXTERN_C
+
+/// @endcond
+
+/** Max_index reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the max_index reducer
+ * type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MAX_INDEX_TYPE(tn) \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_index_,tn)
+
+/** Declare an op_max_index reducer object.
+ *
+ * This macro expands into a declaration of a max_index reducer object for a specified
+ * numeric type. For example:
+ *
+ * CILK_C_REDUCER_MAX_INDEX(my_reducer, double, -DBL_MAX_INDEX);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ * @param v The initial value for the reducer. (A value which can be assigned to the
+ * numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MAX_INDEX(obj,tn,v) \
+ CILK_C_REDUCER_MAX_INDEX_TYPE(tn) obj = \
+ CILK_C_INIT_REDUCER(_Typeof(obj.value), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_index_reduce_,tn), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_index_identity_,tn), \
+ __cilkrts_hyperobject_noop_destroy, {0, v})
+
+/** Maximize with a value.
+ *
+ * `CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v)` sets the current view of the
+ * reducer to the max of its previous value and a specified new value.
+ * This is equivalent to
+ *
+ * REDUCER_VIEW(reducer) = max_index(REDUCER_VIEW(reducer), v)
+ *
+ * If the value of the reducer is changed to @a v, then the index of the reducer is
+ * changed to @a i.
+ *
+ * @param reducer The reducer whose contained value and index are to be updated.
+ * @param i The index associated with the new value.
+ * @param v The value that it is to be maximized with.
+ */
+#define CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v) do { \
+ _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
+ _Typeof(v) __value = (v); \
+ if (view->value < __value) { \
+ view->index = (i); \
+ view->value = __value; \
+ } } while (0)
+
+/// @cond internal
+
+/** Declare the max_index view type.
+ *
+ * The view of a max_index reducer is a structure containing both the
+ * maximum value for the reducer and the index that was associated with
+ * that value in the sequence of input values.
+ */
+#define CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn) \
+ typedef struct { \
+ __STDNS ptrdiff_t index; \
+ t value; \
+ } __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)
+
+/** Declare the max_index reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which implement
+ * the reducer functionality for the max_index reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MAX_INDEX_DECLARATION(t,tn,id) \
+ CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn); \
+ typedef CILK_C_DECLARE_REDUCER( \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)) \
+ CILK_C_REDUCER_MAX_INDEX_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r); \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn);
+
+/** Define the max_index reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement the
+ * reducer functionality for the max_index reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MAX_INDEX_DEFINITION(t,tn,id) \
+ CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn); \
+ typedef CILK_C_DECLARE_REDUCER( \
+ __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)) \
+ CILK_C_REDUCER_MAX_INDEX_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r) \
+ { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \
+ if (((view_t*)l)->value < ((view_t*)r)->value) \
+ *(view_t*)l = *(view_t*)r; } \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn) \
+ { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \
+ ((view_t*)v)->index = 0; ((view_t*)v)->value = id; }
+
+//@{
+/** @def CILK_C_REDUCER_MAX_INDEX_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and
+ * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS`
+ * will be undefined, and this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MAX_INDEX_DEFINITION(t,tn,id)
+#else
+# define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MAX_INDEX_DECLARATION(t,tn,id)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+__CILKRTS_BEGIN_EXTERN_C
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(char, char, CHAR_MIN)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned char, uchar, 0)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(signed char, schar, SCHAR_MIN)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(short, short, SHRT_MIN)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned short, ushort, 0)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(int, int, INT_MIN)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, uint, 0)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, unsigned, 0) // alternate name
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(long, long, LONG_MIN)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long, ulong, 0)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(long long, longlong, LLONG_MIN)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long long, ulonglong, 0)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(float, float, -HUGE_VALF)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(double, double, -HUGE_VAL)
+CILK_C_REDUCER_MAX_INDEX_INSTANCE(long double, longdouble, -HUGE_VALL)
+__CILKRTS_END_EXTERN_C
+
+/// @endcond
+
+/** Min reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the min reducer
+ * type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MIN_TYPE(tn) \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_,tn)
+
+/** Declare a min reducer object.
+ *
+ * This macro expands into a declaration of a min reducer object for a specified numeric
+ * type. For example:
+ *
+ * CILK_C_REDUCER_MIN(my_reducer, double, DBL_MAX);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ * @param v The initial value for the reducer. (A value which can be assigned to the
+ * numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MIN(obj,tn,v) \
+ CILK_C_REDUCER_MIN_TYPE(tn) obj = \
+ CILK_C_INIT_REDUCER(_Typeof(obj.value), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_reduce_,tn), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_identity_,tn), \
+ __cilkrts_hyperobject_noop_destroy, v)
+
+/** Minimize with a value.
+ *
+ * `CILK_C_REDUCER_MIN_CALC(reducer, v)` sets the current view of the
+ * reducer to the min of its previous value and a specified new value.
+ * This is equivalent to
+ *
+ * REDUCER_VIEW(reducer) = min(REDUCER_VIEW(reducer), v)
+ *
+ * @param reducer The reducer whose contained value is to be updated.
+ * @param v The value that it is to be minimized with.
+ */
+#define CILK_C_REDUCER_MIN_CALC(reducer, v) do { \
+ _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
+ _Typeof(v) __value = (v); \
+ if (*view > __value) { \
+ *view = __value; \
+ } } while (0)
+
+/// @cond internal
+
+/** Declare the min reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which implement
+ * the reducer functionality for the min reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MIN_DECLARATION(t,tn,id) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r); \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn);
+
+/** Define the min reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement the
+ * reducer functionality for the min reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MIN_DEFINITION(t,tn,id) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r) \
+ { if (*(t*)l > *(t*)r) *(t*)l = *(t*)r; } \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn) \
+ { *(t*)v = id; }
+
+//@{
+/** @def CILK_C_REDUCER_MIN_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and
+ * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS`
+ * will be undefined, and this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_MIN_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MIN_DEFINITION(t,tn,id)
+#else
+# define CILK_C_REDUCER_MIN_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MIN_DECLARATION(t,tn,id)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+__CILKRTS_BEGIN_EXTERN_C
+CILK_C_REDUCER_MIN_INSTANCE(char, char, CHAR_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(unsigned char, uchar, CHAR_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(signed char, schar, SCHAR_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(wchar_t, wchar_t, WCHAR_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(short, short, SHRT_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(unsigned short, ushort, USHRT_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(int, int, INT_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(unsigned int, uint, UINT_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(unsigned int, unsigned, UINT_MAX) // alternate name
+CILK_C_REDUCER_MIN_INSTANCE(long, long, LONG_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(unsigned long, ulong, ULONG_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(long long, longlong, LLONG_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX)
+CILK_C_REDUCER_MIN_INSTANCE(float, float, HUGE_VALF)
+CILK_C_REDUCER_MIN_INSTANCE(double, double, HUGE_VAL)
+CILK_C_REDUCER_MIN_INSTANCE(long double, longdouble, HUGE_VALL)
+__CILKRTS_END_EXTERN_C
+
+/// @endcond
+
+/** Min_index reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the min_index reducer
+ * type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MIN_INDEX_TYPE(tn) \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_index_,tn)
+
+/** Declare an op_min_index reducer object.
+ *
+ * This macro expands into a declaration of a min_index reducer object for a specified
+ * numeric type. For example:
+ *
+ * CILK_C_REDUCER_MIN_INDEX(my_reducer, double, -DBL_MIN_INDEX);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the
+ * reducer.
+ * @param v The initial value for the reducer. (A value which can be assigned to the
+ * numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ */
+#define CILK_C_REDUCER_MIN_INDEX(obj,tn,v) \
+ CILK_C_REDUCER_MIN_INDEX_TYPE(tn) obj = \
+ CILK_C_INIT_REDUCER(_Typeof(obj.value), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_index_reduce_,tn), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_index_identity_,tn), \
+ __cilkrts_hyperobject_noop_destroy, {0, v})
+
+/** Minimize with a value.
+ *
+ * `CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v)` sets the current view of the
+ * reducer to the min of its previous value and a specified new value.
+ * This is equivalent to
+ *
+ * REDUCER_VIEW(reducer) = min_index(REDUCER_VIEW(reducer), v)
+ *
+ * If the value of the reducer is changed to @a v, then the index of the reducer is
+ * changed to @a i.
+ *
+ * @param reducer The reducer whose contained value and index are to be updated.
+ * @param i The index associated with the new value.
+ * @param v The value that it is to be minimized with.
+ */
+#define CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v) do { \
+ _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \
+ _Typeof(v) __value = (v); \
+ if (view->value > __value) { \
+ view->index = (i); \
+ view->value = __value; \
+ } } while (0)
+
+/// @cond internal
+
+/** Declare the min_index view type.
+ *
+ * The view of a min_index reducer is a structure containing both the
+ * minimum value for the reducer and the index that was associated with
+ * that value in the sequence of input values.
+ */
+#define CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn) \
+ typedef struct { \
+ __STDNS ptrdiff_t index; \
+ t value; \
+ } __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)
+
+/** Declare the min_index reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which implement
+ * the reducer functionality for the min_index reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MIN_INDEX_DECLARATION(t,tn,id) \
+ CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn); \
+ typedef CILK_C_DECLARE_REDUCER( \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)) \
+ CILK_C_REDUCER_MIN_INDEX_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r); \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn);
+
+/** Define the min_index reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement the
+ * reducer functionality for the min_index reducer type for a specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer type name,
+ * function names, etc.
+ */
+#define CILK_C_REDUCER_MIN_INDEX_DEFINITION(t,tn,id) \
+ CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn); \
+ typedef CILK_C_DECLARE_REDUCER( \
+ __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)) \
+ CILK_C_REDUCER_MIN_INDEX_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r) \
+ { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \
+ if (((view_t*)l)->value > ((view_t*)r)->value) \
+ *(view_t*)l = *(view_t*)r; } \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn) \
+ { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \
+ ((view_t*)v)->index = 0; ((view_t*)v)->value = id; }
+
+//@{
+/** @def CILK_C_REDUCER_MIN_INDEX_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and
+ * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS`
+ * will be undefined, and this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MIN_INDEX_DEFINITION(t,tn,id)
+#else
+# define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t,tn,id) \
+ CILK_C_REDUCER_MIN_INDEX_DECLARATION(t,tn,id)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+__CILKRTS_BEGIN_EXTERN_C
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(char, char, CHAR_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned char, uchar, CHAR_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(signed char, schar, SCHAR_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(short, short, SHRT_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned short, ushort, USHRT_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(int, int, INT_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, uint, UINT_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, unsigned, UINT_MAX) // alternate name
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(long, long, LONG_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long, ulong, ULONG_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(long long, longlong, LLONG_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(float, float, HUGE_VALF)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(double, double, HUGE_VAL)
+CILK_C_REDUCER_MIN_INDEX_INSTANCE(long double, longdouble, HUGE_VALL)
+__CILKRTS_END_EXTERN_C
+
+/// @endcond
+
+//@}
+
+#endif // defined REDUCER_MAX_H_INCLUDED
diff --git a/libcilkrts/include/cilk/reducer_opadd.h b/libcilkrts/include/cilk/reducer_opadd.h
index 9380a0ac122..9c2a97c17e9 100644
--- a/libcilkrts/include/cilk/reducer_opadd.h
+++ b/libcilkrts/include/cilk/reducer_opadd.h
@@ -1,33 +1,41 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+/* reducer_opadd.h -*- C++ -*-
*
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-/*
- * reducer_opadd.h
+/** @file reducer_opadd.h
*
- * Purpose: Reducer hyperobject to sum values
+ * @brief Defines classes for doing parallel addition reductions.
+ *
+ * @ingroup ReducersAdd
+ *
+ * @see ReducersAdd
*/
#ifndef REDUCER_OPADD_H_INCLUDED
@@ -35,408 +43,558 @@
#include <cilk/reducer.h>
-#ifdef __cplusplus
-
-/* C++ Interface
- *
- * Classes: reducer_opadd<Type>
- *
- * Description:
- * ============
- * This component provides a reducer-type hyperobject representation
- * that allows adding values to a non-local variable using the +=, -=,
- * ++, --, +, and - operators. A common operation when traversing a data
- * structure is to sum values into a non-local numeric variable. When
- * Cilk parallelism is introduced, however, a data race will occur on
- * the variable holding the sum. By replacing the variable with the
- * hyperobject defined in this component, the data race is eliminated.
- *
- * Usage Example:
- * ==============
- * Assume we wish to traverse an array of objects, performing an operation on
- * each object and accumulating the result of the operation into an integer
- * variable.
- *..
- * int compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * int result = 0;
- * for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * result += compute(myArray[i]);
+/** @defgroup ReducersAdd Addition Reducers
+ *
+ * Addition reducers allow the computation of the sum of a set of values in
+ * parallel.
+ *
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file `reducers.md`, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redopadd_usage Usage Example
+ *
+ * cilk::reducer< cilk::op_add<int> > r;
+ * cilk_for (int i = 0; i != N; ++i) {
+ * *r += a[i];
* }
+ * return r.get_value();
+ *
+ * @section redopadd_monoid The Monoid
+ *
+ * @subsection redopadd_monoid_values Value Set
+ *
+ * The value set of an addition reducer is the set of values of `Type`, which
+ * is expected to be a builtin numeric type (or something like it, such as
+ * `std::complex`).
+ *
+ * @subsection redopadd_monoid_operator Operator
+ *
+ * The operator of an addition reducer is the addition operator, defined by
+ * the “`+`” binary operator on `Type`.
+ *
+ * @subsection redopadd_monoid_identity Identity
+ *
+ * The identity value of the reducer is the numeric value “`0`”. This is
+ * expected to be the value of the default constructor `Type()`.
+ *
+ * @section redopadd_operations Operations
+ *
+ * @subsection redopadd_constructors Constructors
+ *
+ * reducer() // identity
+ * reducer(const Type& value)
+ * reducer(move_in(Type& variable))
+ *
+ * @subsection redopadd_get_set Set and Get
+ *
+ * r.set_value(const Type& value)
+ * const Type& = r.get_value() const
+ * r.move_in(Type& variable)
+ * r.move_out(Type& variable)
*
- * std::cout << "The result is: " << result << std::endl;
- *
- * return 0;
- * }
- *..
- * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel,
- * but doing so will create a data race on the 'result' variable.
- * The race is solved by changing 'result' to a 'reducer_opadd' hyperobject:
- *..
- * int compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * cilk::reducer_opadd<int> result;
- * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * *result += compute(myArray[i]);
+ * @subsection redopadd_initial Initial Values
+ *
+ * If an addition reducer is constructed without an explicit initial value,
+ * then its initial value will be its identity value, as long as `Type`
+ * satisfies the requirements of @ref redopadd_types.
+ *
+ * @subsection redopadd_view_ops View Operations
+ *
+ * *r += a
+ * *r -= a
+ * ++*r
+ * --*r
+ * (*r)++
+ * (*r)--
+ * *r = *r + a
+ * *r = *r - a
+ * *r = *r ± a1 ± a2 … ± an
+ *
+ * The post-increment and post-decrement operations do not return a value. (If
+ * they did, they would expose the value contained in the view, which is
+ * non-deterministic in the middle of a reduction.)
+ *
+ * Note that subtraction operations are allowed on an addition reducer because
+ * subtraction is equivalent to addition with a negated operand. It is true
+ * that `(x - y) - z` is not equivalent to `x - (y - z)`, but
+ * `(x + (-y)) + (-z)` _is_ equivalent to `x + ((-y) + (-z))`.
+ *
+ * @section redopadd_floating_point Issues with Floating-Point Types
+ *
+ * Because of precision and round-off issues, floating-point addition is not
+ * really associative. For example, `(1e30 + -1e30) + 1 == 1`, but
+ * `1e30 + (-1e30 + 1) == 0`.
+ *
+ * In many cases, this won’t matter, but computations which have been
+ * carefully ordered to control round-off errors may not deal well with
+ * being reassociated. In general, you should be sure to understand the
+ * floating-point behavior of your program before doing any transformation
+ * that will reassociate its computations.
+ *
+ * @section redopadd_types Type and Operator Requirements
+ *
+ * `Type` must be `Copy Constructible`, `Default Constructible`, and
+ * `Assignable`.
+ *
+ * The operator “`+=`” must be defined on `Type`, with `x += a` having the
+ * same meaning as `x = x + a`. In addition, if the code uses the “`-=`”,
+ * pre-increment, post-increment, pre-decrement, or post-decrement operators,
+ * then the corresponding operators must be defined on `Type`.
+ *
+ * The expression `Type()` must be a valid expression which yields the
+ * identity value (the value of `Type` whose numeric value is zero).
+ *
+ * @section redopadd_in_c Addition Reducers in C
+ *
+ * The @ref CILK_C_REDUCER_OPADD and @ref CILK_C_REDUCER_OPADD_TYPE macros can
+ * be used to do addition reductions in C. For example:
+ *
+ * CILK_C_REDUCER_OPADD(r, double, 0);
+ * CILK_C_REGISTER_REDUCER(r);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * REDUCER_VIEW(r) += a[i];
* }
+ * CILK_C_UNREGISTER_REDUCER(r);
+ * printf("The sum of the elements of a is %f\n", REDUCER_VIEW(r));
*
- * std::cout << "The result is: " << result.get_value() << std::endl;
- *
- * return 0;
- * }
- *..
- *
- * Operations provided:
- * ====================
- * Given 'reducer_opadd' objects, x and y, the following are
- * valid statements:
- *..
- * *x += 5;
- * *x = *x + 5;
- * *x -= 5;
- * *y = *y - 5;
- * ++*x;
- * --*x;
- * (*x)++;
- * (*x)--;
- *..
- * The following are not valid expressions and will result in a run-time error
- * in a debug build:
- *..
- * x = y; // Cannot assign one reducer to another
- * *x = *y + 5; // Mixed reducers
- * *x = 5 + *x; // operator+ is not necessarily commutative
- * *x = 5 - *x; // Violates associativity
- *..
- * The the current value of the reducer can be get and set using the
- * 'get_value' and 'set_value' methods, respectively. As with most reducers,
- * 'set_value' and 'get_value' methods produce deterministic results only if
- * called before the first spawn after creating a 'hyperobject' or when all
- * strands spawned since creating the 'hyperobject' have been synced. However,
- * the difference two values of the same reducer read twice in the same Cilk
- * strand *is* typically deterministic (assuming the usual relationship between
- * operator '+' and operator '-' for the specified 'Type'):
- *..
- * cilk::reducer_opadd<int> x;
- * cilk_spawn func();
- * int a = x.get_value();
- * *x += 5;
- * int b = x.get_value();
- * assert(b - a == 5);
- *..
- *
- * Requirements on the 'Type' parameter
- * ====================================
- * The 'Type' parameter used to instantiate the 'reducer_opadd' class must
- * provide a += operator that meets the requirements for an
- * *associative* *mutating* *operator* as defined in the Cilk++ user manual.
- * The default constructor for 'Type' must yield an additive identity, i.e.,
- * a value (such as integer zero) that, when added to any other value, yields
- * the other value. If 'Type' also provides a -= operator, then subtraction
- * is also supported by this reducer. C++ integral types satisfy these
- * requirements.
- *
- * Note that C++ floating-point types do not support truly
- * associative addition in that (a + b) + c will exhibit different
- * round-off error than a + (b + c). However, for numbers of similar
- * magnitude, a floating-point 'reducer_opadd' may still be useful.
+ * See @ref reducers_c_predefined.
*/
-namespace cilk
-{
+#ifdef __cplusplus
+
+namespace cilk {
-/**
- * @brief A reducer-type hyperobject representation that allows adding values
- * to a non-local variable using the +=, -=, ++, --, +, and - operators.
- *
- * A common operation when traversing a data structure is to sum values into a
- * non-local numeric variable. When Cilk parallelism is introduced, however,
- * a data race will occur on the variable holding the sum. By replacing the
- * variable with the hyperobject defined in this component, the data race is
- * eliminated.
+/** The addition reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_add<Type> >`. It holds the accumulator variable
+ * for the reduction, and allows only addition and subtraction operations to
+ * be performed on it.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `+=` operation would be used in an expression like `*r += a`, where
+ * `r` is an op_add reducer variable.
+ *
+ * @tparam Type The type of the contained accumulator variable. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
+ *
+ * @see ReducersAdd
+ * @see op_add
+ *
+ * @ingroup ReducersAdd
*/
template <typename Type>
-class reducer_opadd
+class op_add_view : public scalar_view<Type>
{
+ typedef scalar_view<Type> base;
+
public:
- /// Definition of data view, operation, and identity for reducer_opadd
- class Monoid : public monoid_base<Type>
- {
- public:
- static void reduce(Type* left, Type* right);
+ /** Class to represent the right-hand side of
+ * `*reducer = *reducer ± value`.
+ *
+ * The only assignment operator for the op_add_view class takes an
+ * rhs_proxy as its operand. This results in the syntactic restriction
+ * that the only expressions that can be assigned to an op_add_view are
+ * ones which generate an rhs_proxy — that is, expressions of the form
+ * `op_add_view ± value ... ± value`.
+ *
+ * @warning
+ * The lhs and rhs views in such an assignment must be the same;
+ * otherwise, the behavior will be undefined. (I.e., `v1 = v1 + x` is
+ * legal; `v1 = v2 + x` is illegal.) This condition will be checked with a
+ * runtime assertion when compiled in debug mode.
+ *
+ * @see op_add_view
+ */
+ class rhs_proxy {
+ friend class op_add_view;
+
+ const op_add_view* m_view;
+ Type m_value;
+
+ // Constructor is invoked only from op_add_view::operator+() and
+ // op_add_view::operator-().
+ //
+ rhs_proxy(const op_add_view* view, const Type& value) :
+ m_view(view), m_value(value) {}
+
+ rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
+ rhs_proxy(); // Disable default constructor
+
+ public:
+ //@{
+ /** Add or subtract an additional rhs value. If `v` is an op_add_view
+ * and `a1` is a value, then the expression `v + a1` invokes the view’s
+ * `operator+()` to create an rhs_proxy for `(v, a1)`; then
+ * `v + a1 + a2` invokes the rhs_proxy’s `operator+()` to create a new
+ * rhs_proxy for `(v, a1+a2)`. This allows the right-hand side of an
+ * assignment to be not just `view ± value`, but
+ * `view ± value ± value ... ± value`. The effect is that
+ *
+ * v = v ± a1 ± a2 ... ± an;
+ *
+ * is evaluated as
+ *
+ * v = v ± (±a1 ± a2 ... ± an);
+ */
+ rhs_proxy& operator+(const Type& x) { m_value += x; return *this; }
+ rhs_proxy& operator-(const Type& x) { m_value -= x; return *this; }
+ //@}
};
- /// "PRIVATE" HELPER CLASS
- class temp_sum {
- friend class reducer_opadd;
-
- Type* valuePtr_;
+
+ /** Default/identity constructor. This constructor initializes the
+ * contained value to `Type()`, which is expected to be the identity value
+ * for addition on `Type`.
+ */
+ op_add_view() : base() {}
+
+ /** Construct with a specified initial value.
+ */
+ explicit op_add_view(const Type& v) : base(v) {}
+
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_add monoid to combine the views
+ * of two strands when the right strand merges with the left one. It adds
+ * the value contained in the right-strand view to the value contained in
+ * the left-strand view, and leaves the value in the right-strand view
+ * undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_add monoid to implement the monoid
+ * reduce operation.
+ */
+ void reduce(op_add_view* right) { this->m_value += right->m_value; }
+
+ /** @name Accumulator variable updates.
+ *
+ * These functions support the various syntaxes for incrementing or
+ * decrementing the accumulator variable contained in the view.
+ */
+ //@{
+
+ /** Increment the accumulator variable by @a x.
+ */
+ op_add_view& operator+=(const Type& x) { this->m_value += x; return *this; }
+
+ /** Decrement the accumulator variable by @a x.
+ */
+ op_add_view& operator-=(const Type& x) { this->m_value -= x; return *this; }
+
+ /** Pre-increment.
+ */
+ op_add_view& operator++() { ++this->m_value; return *this; }
+
+ /** Post-increment.
+ *
+ * @note Conventionally, post-increment operators return the old value
+ * of the incremented variable. However, reducer views do not
+ * expose their contained values, so `view++` does not have a
+ * return value.
+ */
+ void operator++(int) { this->m_value++; }
+
+ /** Pre-decrement.
+ */
+ op_add_view& operator--() { --this->m_value; return *this; }
+
+ /** Post-decrement.
+ *
+ * @note Conventionally, post-decrement operators return the old value
+ * of the decremented variable. However, reducer views do not
+ * expose their contained values, so `view--` does not have a
+ * return value.
+ */
+ void operator--(int) { this->m_value--; }
+
+ /** Create an object representing `*this + x`.
+ *
+ * @see rhs_proxy
+ */
+ rhs_proxy operator+(const Type& x) const { return rhs_proxy(this, x); }
+
+ /** Create an object representing `*this - x`.
+ *
+ * @see rhs_proxy
+ */
+ rhs_proxy operator-(const Type& x) const { return rhs_proxy(this, -x); }
+
+ /** Assign the result of a `view ± value` expression to the view. Note that
+ * this is the only assignment operator for this class.
+ *
+ * @see rhs_proxy
+ */
+ op_add_view& operator=(const rhs_proxy& rhs) {
+ __CILKRTS_ASSERT(this == rhs.m_view);
+ this->m_value += rhs.m_value;
+ return *this;
+ }
+
+ //@}
+};
- // Default copy constructor, no assignment operator
- temp_sum& operator=(const temp_sum&);
- explicit temp_sum(Type* valuePtr);
+/** Monoid class for addition reductions. Instantiate the cilk::reducer
+ * template class with an op_add monoid to create an addition reducer class.
+ * For example, to compute
+ * the sum of a set of `int` values:
+ *
+ * cilk::reducer< cilk::op_add<int> > r;
+ *
+ * @tparam Type The reducer value type.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
+ *
+ * @see ReducersAdd
+ * @see op_add_view
+ *
+ * @ingroup ReducersAdd
+ */
+template <typename Type, bool Align = false>
+struct op_add : public monoid_with_view<op_add_view<Type>, Align> {};
- public:
- temp_sum& operator+(const Type& x);
- temp_sum& operator-(const Type& x);
- };
+/** **Deprecated** addition reducer wrapper class.
+ *
+ * reducer_opadd is the same as @ref reducer<@ref op_add>, except that
+ * reducer_opadd is a proxy for the contained view, so that accumulator
+ * variable update operations can be applied directly to the reducer. For
+ * example, a value is added to a `reducer<%op_add>` with `*r += a`, but a
+ * value can be added to a `%reducer_opadd` with `r += a`.
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_opadd.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_opadd`
+ * and `reducer<%op_add>`. This allows incremental code
+ * conversion: old code that used `%reducer_opadd` can pass a
+ * `%reducer_opadd` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_add>`, and vice
+ * versa.
+ *
+ * @tparam Type The value type of the reducer.
+ *
+ * @see op_add
+ * @see reducer
+ * @see ReducersAdd
+ *
+ * @ingroup ReducersAdd
+ */
+template <typename Type>
+class reducer_opadd : public reducer< op_add<Type, true> >
+{
+ typedef reducer< op_add<Type, true> > base;
+ using base::view;
public:
-
- /// Construct an 'reducer_opadd' object with a value of 'Type()'.
- reducer_opadd();
-
- /// Construct an 'reducer_opadd' object with the specified initial value.
- explicit reducer_opadd(const Type& initial_value);
-
- /// Return a const reference to the current value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Type& get_value() const;
-
- /// Set the value of this object.
- ///
- /// @warning Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Type& value);
-
- /// Add 'x' to the value of this reducer and produce a temporary sum object.
- /// The temporary sum can be used for additional arithmetic or assigned back
- /// to this reducer.
- temp_sum operator+(const Type& x) const;
-
- /// Subtract 'x' from the value of this reducer and produce a temporary sum
- /// object. The temporary sum can be used for additional arithmetic or
- /// assigned back to this reducer.
- temp_sum operator-(const Type& x) const;
-
- /// Add 'x' to the value of this object.
- reducer_opadd& operator+=(const Type& x);
-
- /// Subtract 'x' from the value of this object.
- reducer_opadd& operator-=(const Type& x);
-
- /// Increment the value of this object using pre-increment syntax.
- reducer_opadd& operator++();
-
- /// Increment the value of this object using post-increment syntax.
- /// Because the reducer is not copy-constructible, it is not possible to
- /// return the previous value.
- void operator++(int);
-
- /// Decrement the value of this object using pre-decrement syntax.
- reducer_opadd& operator--();
-
- /// Decrement the value of this object using post-decrement syntax.
- /// Because the reducer is not copy-constructible, it is not possible to
- /// return the previous value.
- void operator--(int);
-
- /// Merge the result of an addition into this object. The addition
- /// must involve this reducer, i.e., x = x + 5; not x = y + 5;
- reducer_opadd& operator=(const temp_sum& temp);
-
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view’s rhs proxy type.
+ typedef typename view_type::rhs_proxy rhs_proxy;
+
+ /// The view type for the reducer.
+ typedef view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /** Default (identity) constructor.
+ *
+ * Constructs the wrapper with the default initial value of `Type()`.
+ */
+ reducer_opadd() {}
+
+ /** Value constructor.
+ *
+ * Constructs the wrapper with a specified initial value.
+ */
+ explicit reducer_opadd(const Type& initial_value) : base(initial_value) {}
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_add_view. */
+ //@{
+
+ /// @copydoc op_add_view::operator+=(const Type&)
+ reducer_opadd& operator+=(const Type& x) { view() += x; return *this; }
+
+ /// @copydoc op_add_view::operator-=(const Type&)
+ reducer_opadd& operator-=(const Type& x) { view() -= x; return *this; }
+
+ /// @copydoc op_add_view::operator++()
+ reducer_opadd& operator++() { ++view(); return *this; }
+
+ /// @copydoc op_add_view::operator++(int)
+ void operator++(int) { view()++; }
+
+ /// @copydoc op_add_view::operator-\-()
+ reducer_opadd& operator--() { --view(); return *this; }
+
+ /// @copydoc op_add_view::operator-\-(int)
+ void operator--(int) { view()--; }
+
+ // The legacy definitions of reducer_opadd::operator+() and
+ // reducer_opadd::operator-() have different behavior and a different
+ // return type than this definition. The legacy version is defined as a
+ // member function, so this new version is defined as a free function to
+ // give it a different signature, so that they won’t end up sharing a
+ // single object file entry.
+
+ /// @copydoc op_add_view::operator+(const Type&) const
+ friend rhs_proxy operator+(const reducer_opadd& r, const Type& x)
+ {
+ return r.view() + x;
+ }
+ /// @copydoc op_add_view::operator-(const Type&) const
+ friend rhs_proxy operator-(const reducer_opadd& r, const Type& x)
+ {
+ return r.view() - x;
+ }
+ /// @copydoc op_add_view::operator=(const rhs_proxy&)
+ reducer_opadd& operator=(const rhs_proxy& temp)
+ {
+ view() = temp;
+ return *this;
+ }
+ //@}
+
+ /** @name Dereference
+ * @details Dereferencing a wrapper is a no-op. It simply returns the
+ * wrapper. Combined with the rule that the wrapper forwards view
+ * operations to its contained view, this means that view operations can
+ * be written the same way on reducers and wrappers, which is convenient
+ * for incrementally converting old code using wrappers to use reducers
+ * instead. That is:
+ *
+ * reducer< op_add<int> > r;
+ * *r += a; // *r returns the view
+ * // operator += is a view member function
+ *
+ * reducer_opadd<int> w;
+ * *w += a; // *w returns the wrapper
+ * // operator += is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
reducer_opadd& operator*() { return *this; }
reducer_opadd const& operator*() const { return *this; }
reducer_opadd* operator->() { return this; }
reducer_opadd const* operator->() const { return this; }
-
- private:
- friend class temp_sum;
-
- // Hyperobject to serve up views
- reducer<Monoid> imp_;
-
- // Not copyable
- reducer_opadd(const reducer_opadd&);
- reducer_opadd& operator=(const reducer_opadd&);
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_add<Type, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_add<Type, false> >* >(this);
+ }
+ operator const reducer< op_add<Type, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_add<Type, false> >* >(this);
+ }
+ //@}
};
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// ------------------------------------
-// template class reducer_opadd::Monoid
-// ------------------------------------
-
-/**
- * Combines two views of the data.
+/// @cond internal
+/** Metafunction specialization for reducer conversion.
+ *
+ * This specialization of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes the `reducer< op_add<Type> >` class to have an
+ * `operator reducer_opadd<Type>& ()` conversion operator that statically
+ * downcasts the `reducer<op_add>` to the corresponding `reducer_opadd` type.
+ * (The reverse conversion, from `reducer_opadd` to `reducer<op_add>`, is just
+ * an upcast, which is provided for free by the language.)
+ *
+ * @ingroup ReducersAdd
*/
-template <typename Type>
-void
-reducer_opadd<Type>::Monoid::reduce(Type* left, Type* right)
-{
- *left += *right;
-}
-
-// ----------------------------
-// template class reducer_opadd
-// ----------------------------
-
-template <typename Type>
-inline
-reducer_opadd<Type>::reducer_opadd()
- : imp_(Type())
-{
-}
-
-template <typename Type>
-inline
-reducer_opadd<Type>::reducer_opadd(const Type& initial_value)
- : imp_(initial_value)
-{
-}
-
-template <typename Type>
-inline
-const Type& reducer_opadd<Type>::get_value() const
-{
- return imp_.view();
-}
-
-template <typename Type>
-inline
-void reducer_opadd<Type>::set_value(const Type& value)
-{
- imp_.view() = value;
-}
-
-template <typename Type>
-inline
-typename reducer_opadd<Type>::temp_sum
-reducer_opadd<Type>::operator+(const Type& x) const
-{
- Type* valuePtr = const_cast<Type*>(&imp_.view());
- *valuePtr = *valuePtr + x;
- return temp_sum(valuePtr);
-}
-
-template <typename Type>
-inline
-typename reducer_opadd<Type>::temp_sum
-reducer_opadd<Type>::operator-(const Type& x) const
-{
- Type* valuePtr = const_cast<Type*>(&imp_.view());
- *valuePtr = *valuePtr - x;
- return temp_sum(valuePtr);
-}
-
-template <typename Type>
-inline
-reducer_opadd<Type>& reducer_opadd<Type>::operator+=(const Type& x)
-{
- imp_.view() += x;
- return *this;
-}
-
-template <typename Type>
-inline
-reducer_opadd<Type>& reducer_opadd<Type>::operator-=(const Type& x)
-{
- imp_.view() -= x;
- return *this;
-}
-
-template <typename Type>
-inline
-reducer_opadd<Type>& reducer_opadd<Type>::operator++()
-{
- imp_.view() += 1;
- return *this;
-}
-
-template <typename Type>
-inline
-void reducer_opadd<Type>::operator++(int)
-{
- imp_.view() += 1;
-}
-
-template <typename Type>
-inline
-reducer_opadd<Type>& reducer_opadd<Type>::operator--()
-{
- imp_.view() -= 1;
- return *this;
-}
-
-template <typename Type>
-inline
-void reducer_opadd<Type>::operator--(int)
+template <typename Type, bool Align>
+struct legacy_reducer_downcast<reducer<op_add<Type, Align> > >
{
- imp_.view() -= 1;
-}
-
-template <typename Type>
-inline
-reducer_opadd<Type>&
-reducer_opadd<Type>::operator=(
- const typename reducer_opadd<Type>::temp_sum& temp)
-{
- // No-op. Just test that temp was constructed from this.
- __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_);
- return *this;
-}
-
-// --------------------------------------
-// template class reducer_opadd::temp_sum
-// --------------------------------------
-
-template <typename Type>
-inline
-reducer_opadd<Type>::temp_sum::temp_sum(Type *valuePtr)
- : valuePtr_(valuePtr)
-{
-}
-
-template <typename Type>
-inline
-typename reducer_opadd<Type>::temp_sum&
-reducer_opadd<Type>::temp_sum::operator+(const Type& x)
-{
- *valuePtr_ = *valuePtr_ + x;
- return *this;
-}
-
-template <typename Type>
-inline
-typename reducer_opadd<Type>::temp_sum&
-reducer_opadd<Type>::temp_sum::operator-(const Type& x)
-{
- *valuePtr_ = *valuePtr_ - x;
- return *this;
-}
+ typedef reducer_opadd<Type> type;
+};
+/// @endcond
} // namespace cilk
#endif // __cplusplus
-/* C Interface
+
+/** @ingroup ReducersAdd
*/
+//@{
+/** @name C Language Reducer Macros
+ *
+ * These macros are used to declare and work with numeric op_add reducers in
+ * C code.
+ *
+ * @see @ref page_reducers_in_c
+ */
+ //@{
+
__CILKRTS_BEGIN_EXTERN_C
+/** Opadd reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the op_add
+ * reducer type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersAdd
+ */
#define CILK_C_REDUCER_OPADD_TYPE(tn) \
__CILKRTS_MKIDENT(cilk_c_reducer_opadd_,tn)
+
+/** Declare an op_add reducer object.
+ *
+ * This macro expands into a declaration of an op_add reducer object for a
+ * specified numeric type. For example:
+ *
+ * CILK_C_REDUCER_OPADD(my_reducer, double, 0.0);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ * @param v The initial value for the reducer. (A value which can be
+ * assigned to the numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersAdd
+ */
#define CILK_C_REDUCER_OPADD(obj,tn,v) \
CILK_C_REDUCER_OPADD_TYPE(tn) obj = \
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
@@ -444,60 +602,84 @@ __CILKRTS_BEGIN_EXTERN_C
__CILKRTS_MKIDENT(cilk_c_reducer_opadd_identity_,tn), \
__cilkrts_hyperobject_noop_destroy, v)
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \
- typedef CILK_C_DECLARE_REDUCER(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_opadd_,tn); \
+/// @cond internal
+
+/** Declare the op_add reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which
+ * implement the reducer functionality for the op_add reducer type for a
+ * specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPADD_DECLARATION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn);
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_OPADD_INSTANCE(char,char);
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_OPADD_INSTANCE(signed char,schar);
-CILK_C_REDUCER_OPADD_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_OPADD_INSTANCE(short,short);
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_OPADD_INSTANCE(int,int);
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_OPADD_INSTANCE(long,long);
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_OPADD_INSTANCE(long long,longlong);
-CILK_C_REDUCER_OPADD_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_OPADD_INSTANCE(float,float);
-CILK_C_REDUCER_OPADD_INSTANCE(double,double);
-CILK_C_REDUCER_OPADD_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPADD_IMP(t,tn) \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn);
+
+/** Define the op_add reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement
+ * the reducer functionality for the op_add reducer type for a specified
+ * numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPADD_DEFINITION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r) \
{ *(t*)l += *(t*)r; } \
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn) \
{ *(t*)v = 0; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric tyeps. The contents of reducer_opadd.c are as follows:
-
-CILK_C_REDUCER_OPADD_IMP(char,char)
-CILK_C_REDUCER_OPADD_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPADD_IMP(signed char,schar)
-CILK_C_REDUCER_OPADD_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPADD_IMP(short,short)
-CILK_C_REDUCER_OPADD_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPADD_IMP(int,int)
-CILK_C_REDUCER_OPADD_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPADD_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPADD_IMP(long,long)
-CILK_C_REDUCER_OPADD_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPADD_IMP(long long,longlong)
-CILK_C_REDUCER_OPADD_IMP(unsigned long long,ulonglong)
-CILK_C_REDUCER_OPADD_IMP(float,float)
-CILK_C_REDUCER_OPADD_IMP(double,double)
-CILK_C_REDUCER_OPADD_IMP(long double,longdouble)
-
-*/
+
+//@{
+/** @def CILK_C_REDUCER_OPADD_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
+ * will be defined, and this macro will generate reducer implementation
+ * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined,
+ * and this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPADD_DEFINITION(t,tn)
+#else
+# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPADD_DECLARATION(t,tn)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+CILK_C_REDUCER_OPADD_INSTANCE(char, char)
+CILK_C_REDUCER_OPADD_INSTANCE(unsigned char, uchar)
+CILK_C_REDUCER_OPADD_INSTANCE(signed char, schar)
+CILK_C_REDUCER_OPADD_INSTANCE(wchar_t, wchar_t)
+CILK_C_REDUCER_OPADD_INSTANCE(short, short)
+CILK_C_REDUCER_OPADD_INSTANCE(unsigned short, ushort)
+CILK_C_REDUCER_OPADD_INSTANCE(int, int)
+CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, uint)
+CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, unsigned) /* alternate name */
+CILK_C_REDUCER_OPADD_INSTANCE(long, long)
+CILK_C_REDUCER_OPADD_INSTANCE(unsigned long, ulong)
+CILK_C_REDUCER_OPADD_INSTANCE(long long, longlong)
+CILK_C_REDUCER_OPADD_INSTANCE(unsigned long long, ulonglong)
+CILK_C_REDUCER_OPADD_INSTANCE(float, float)
+CILK_C_REDUCER_OPADD_INSTANCE(double, double)
+CILK_C_REDUCER_OPADD_INSTANCE(long double, longdouble)
+
+//@endcond
__CILKRTS_END_EXTERN_C
+//@}
+
+//@}
+
#endif /* REDUCER_OPADD_H_INCLUDED */
diff --git a/libcilkrts/include/cilk/reducer_opand.h b/libcilkrts/include/cilk/reducer_opand.h
index 0ee7b9faf57..3b27246d0ea 100644
--- a/libcilkrts/include/cilk/reducer_opand.h
+++ b/libcilkrts/include/cilk/reducer_opand.h
@@ -1,33 +1,41 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+/* reducer_opand.h -*- C++ -*-
*
+ * @copyright
+ * Copyright (C) 20009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-/*
- * reducer_opand.h
+/** @file reducer_opand.h
+ *
+ * @brief Defines classes for doing parallel bitwise and reductions.
*
- * Purpose: Reducer hyperobject to compute bitwise AND of values
+ * @ingroup ReducersAnd
+ *
+ * @see ReducersAnd
*/
#ifndef REDUCER_OPAND_H_INCLUDED
@@ -35,317 +43,475 @@
#include <cilk/reducer.h>
-#ifdef __cplusplus
-
-/* C++ Interface
- *
- * Purpose: Reducer hyperobject to compute bitwise AND values
- * When bool is passed as 'Type', it computes logical AND
- * operation.
- *
- * Classes: reducer_opand<Type>
- *
- * Description:
- * ============
- * This component provides a reducer-type hyperobject representation
- * that allows conducting bitwise AND operation to a non-local variable
- * using the &=, & operators. A common operation
- * when traversing a data structure is to bit-wise AND values
- * into a non-local numeric variable. When Cilk parallelism is
- * introduced, however, a data race will occur on the variable holding
- * the bit-wise AND result. By replacing the variable with the
- * hyperobject defined in this component, the data race is eliminated.
- *
- * When bool is passed as the 'Type', this reducer conducts logic AND
- * operation.
- *
- * Usage Example:
- * ==============
- * Assume we wish to traverse an array of objects, performing a bit-wise AND
- * operation on each object and accumulating the result of the operation
- * into an integer variable.
- *..
- * unsigned int compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * unsigned int result = 1;
- * for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * result &= compute(myArray[i]);
- * }
+/** @defgroup ReducersAnd Bitwise And Reducers
*
- * std::cout << "The result is: " << result << std::endl;
+ * Bitwise and reducers allow the computation of the bitwise and of a set of
+ * values in parallel.
*
- * return 0;
- * }
- *..
- * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel,
- * but doing so will create a data race on the 'result' variable.
- * The race is solved by changing 'result' to a 'reducer_opand' hyperobject:
- *..
- * unsigned int compute(const X& v);
- *
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * cilk::reducer_opand<unsigned int> result(1);
- * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * *result &= compute(myArray[i]);
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file `reducers.md`, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redopand_usage Usage Example
+ *
+ * cilk::reducer< cilk::op_and<unsigned> > r;
+ * cilk_for (int i = 0; i != N; ++i) {
+ * *r &= a[i];
* }
+ * unsigned result;
+ * r.move_out(result);
*
- * std::cout << "The result is: "
- * << result.get_value() << std::endl;
+ * @section redopand_monoid The Monoid
*
- * return 0;
- * }
- *
+ * @subsection redopand_monoid_values Value Set
+ *
+ * The value set of a bitwise and reducer is the set of values of `Type`,
+ * which is expected to be a builtin integer type which has a representation
+ * as a sequence of bits (or something like it, such as `bool` or
+ * `std::bitset`).
+ *
+ * @subsection redopand_monoid_operator Operator
+ *
+ * The operator of a bitwise and reducer is the bitwise and operator, defined
+ * by the “`&`” binary operator on `Type`.
+ *
+ * @subsection redopand_monoid_identity Identity
*
- * Operations provided:
- * ====================
- * Given 'reducer_opand' objects, x and y, the following are
- * valid statements:
- *..
- * *x &= 5;
- * *x = *x & 5;
- *..
- * The following are not valid expressions and will result in a run-time error
- * in a debug build:
- *..
- * x = y; // Cannot assign one reducer to another
- * *x = *y & 5; // Mixed reducers
- * *x = 5 & *x; // operator& is not necessarily commutative
- *..
- *..
- *
- * Requirements on the 'Type' parameter
- * ====================================
- * The 'Type' parameter used to instantiate the 'reducer_opand' class must
- * provide a &= operator that meets the requirements for an
- * *associative* *mutating* *operator* as defined in the Cilk++ user manual.
- * The identity function of 'Type' in class Monoid must yield a bit-wise
- * AND identity, i.e.,
- * a value (such as true in bool) that, when AND with any other value, yields
- * the other value.
- *
- * When unsigned int or bool is passed as 'Type', the identity function of
- * Monoid returns AND identity.
+ * The identity value of the reducer is the value whose representation
+ * contains all 1-bits. This is expected to be the value of the expression
+ * `~Type()` (i.e., the bitwise negation operator applied to the default value
+ * of the value type).
+ *
+ * @section redopand_operations Operations
+ *
+ * @subsection redopand_constructors Constructors
+ *
+ * reducer() // identity
+ * reducer(const Type& value)
+ * reducer(move_in(Type& variable))
+ *
+ * @subsection redopand_get_set Set and Get
+ *
+ * r.set_value(const Type& value)
+ * const Type& = r.get_value() const
+ * r.move_in(Type& variable)
+ * r.move_out(Type& variable)
+ *
+ * @subsection redopand_initial Initial Values
+ *
+ * If a bitwise and reducer is constructed without an explicit initial value,
+ * then its initial value will be its identity value, as long as `Type`
+ * satisfies the requirements of @ref redopand_types.
+ *
+ * @subsection redopand_view_ops View Operations
+ *
+ * *r &= a
+ * *r = *r & a
+ * *r = *r & a1 & a2 … & an
+ *
+ * @section redopand_types Type and Operator Requirements
+ *
+ * `Type` must be `Copy Constructible`, `Default Constructible`, and
+ * `Assignable`.
+ *
+ * The operator “`&=`” must be defined on `Type`, with `x &= a` having the
+ * same meaning as `x = x & a`.
+ *
+ * The expression `~ Type()` must be a valid expression which yields the
+ * identity value (the value of `Type` whose representation consists of all
+ * 1-bits).
+ *
+ * @section redopand_in_c Bitwise And Reducers in C
+ *
+ * The @ref CILK_C_REDUCER_OPAND and @ref CILK_C_REDUCER_OPAND_TYPE macros can
+ * be used to do bitwise and reductions in C. For example:
+ *
+ * CILK_C_REDUCER_OPAND(r, uint, ~0);
+ * CILK_C_REGISTER_REDUCER(r);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * REDUCER_VIEW(r) &= a[i];
+ * }
+ * CILK_C_UNREGISTER_REDUCER(r);
+ * printf("The bitwise AND of the elements of a is %x\n", REDUCER_VIEW(r));
+ *
+ * See @ref reducers_c_predefined.
*/
-#include <new>
+#ifdef __cplusplus
namespace cilk {
-/**
- * @brief A reducer-type hyperobject representation that allows conducting
- * bitwise AND operation to a non-local variable using the &=, & operators.
+/** The bitwise and reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_and<Type> >`. It holds the accumulator variable
+ * for the reduction, and allows only `and` operations to be performed on it.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `&=` operation would be used in an expression like `*r &= a`, where
+ * `r` is an opmod reducer variable.
+ *
+ * @tparam Type The type of the contained accumulator variable. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
*
- * A common operation when traversing a data structure is to bit-wise AND
- * values into a non-local numeric variable. When Cilk parallelism is
- * introduced, however, a data race will occur on the variable holding
- * the bit-wise AND result. By replacing the variable with the
- * hyperobject defined in this component, the data race is eliminated.
+ * @see ReducersAnd
+ * @see op_and
+ *
+ * @ingroup ReducersAnd
*/
-
template <typename Type>
-class reducer_opand
+class op_and_view : public scalar_view<Type>
{
- public:
- /// Definition of data view, operation, and identity for reducer_opand
- class Monoid : public monoid_base<Type>
- {
- public:
- static void reduce(Type* left, Type* right);
-
- /// identity function must provide a value that,
- /// when AND with any other values, yields the other value
- void identity(Type* p) const { new ((void*) p) Type(~0); }
+ typedef scalar_view<Type> base;
+
+public:
+ /** Class to represent the right-hand side of `*reducer = *reducer & value`.
+ *
+ * The only assignment operator for the op_and_view class takes an
+ * rhs_proxy as its operand. This results in the syntactic restriction
+ * that the only expressions that can be assigned to an op_and_view are
+ * ones which generate an rhs_proxy — that is, expressions of the form
+ * `op_and_view & value ... & value`.
+ *
+ * @warning
+ * The lhs and rhs views in such an assignment must be the same;
+ * otherwise, the behavior will be undefined. (I.e., `v1 = v1 & x` is
+ * legal; `v1 = v2 & x` is illegal.) This condition will be checked with
+ * a runtime assertion when compiled in debug mode.
+ *
+ * @see op_and_view
+ */
+ class rhs_proxy {
+ private:
+ friend class op_and_view;
+
+ const op_and_view* m_view;
+ Type m_value;
+
+ // Constructor is invoked only from op_and_view::operator&().
+ //
+ rhs_proxy(const op_and_view* view, const Type& value) : m_view(view), m_value(value) {}
+
+ rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
+ rhs_proxy(); // Disable default constructor
+
+ public:
+ /** Bitwise and with an additional rhs value. If `v` is an op_and_view
+ * and `a1` is a value, then the expression `v & a1` invokes the
+ * view’s `operator&()` to create an rhs_proxy for `(v, a1)`; then
+ * `v & a1 & a2` invokes the rhs_proxy’s `operator&()` to create a new
+ * rhs_proxy for `(v, a1&a2)`. This allows the right-hand side of an
+ * assignment to be not just `view & value`, but
+ * `view & value & value ... & value`. The effect is that
+ *
+ * v = v & a1 & a2 ... & an;
+ *
+ * is evaluated as
+ *
+ * v = v & (a1 & a2 ... & an);
+ */
+ rhs_proxy& operator&(const Type& x) { m_value &= x; return *this; }
};
- /// "PRIVATE" HELPER CLASS
- class temp_and {
- friend class reducer_opand;
-
- Type* valuePtr_;
-
- // Default copy constructor, no assignment operator
- temp_and& operator=(const temp_and&);
-
- explicit temp_and(Type* valuePtr);
-
- public:
- temp_and& operator&(const Type& x);
- };
-
- public:
-
- /// Construct an 'reducer_opand' object with a value of 'Type()'.
- reducer_opand();
-
- /// Construct an 'reducer_opand' object with the specified initial value.
- explicit reducer_opand(const Type& initial_value);
- /// Return a const reference to the current value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Type& get_value() const;
-
- /// Set the value of this object.
- ///
- /// @warning Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Type& value);
-
- /// AND 'x' to the value of this reducer and produce a temporary and object.
- /// The temporary and can be used for additional bit-wise operations
- /// or assigned back to this reducer.
- temp_and operator&(const Type& x) const;
-
- /// AND 'x' to the value of this object.
- reducer_opand& operator&=(const Type& x);
+ /** Default/identity constructor. This constructor initializes the
+ * contained value to `~ Type()`.
+ */
+ op_and_view() : base(~Type()) {}
+
+ /** Construct with a specified initial value.
+ */
+ explicit op_and_view(const Type& v) : base(v) {}
+
+
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_and monoid to combine the views
+ * of two strands when the right strand merges with the left one. It
+ * “ands” the value contained in the left-strand view with the value
+ * contained in the right-strand view, and leaves the value in the
+ * right-strand view undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_and monoid to implement the monoid
+ * reduce operation.
+ */
+ void reduce(op_and_view* right) { this->m_value &= right->m_value; }
+
+ /** @name Accumulator variable updates.
+ *
+ * These functions support the various syntaxes for “anding” the
+ * accumulator variable contained in the view with some value.
+ */
+ //@{
+
+ /** And the accumulator variable with @a x.
+ */
+ op_and_view& operator&=(const Type& x) { this->m_value &= x; return *this; }
+
+ /** Create an object representing `*this & x`.
+ *
+ * @see rhs_proxy
+ */
+ rhs_proxy operator&(const Type& x) const { return rhs_proxy(this, x); }
+
+ /** Assign the result of a `view & value` expression to the view. Note that
+ * this is the only assignment operator for this class.
+ *
+ * @see rhs_proxy
+ */
+ op_and_view& operator=(const rhs_proxy& rhs) {
+ __CILKRTS_ASSERT(this == rhs.m_view);
+ this->m_value &= rhs.m_value;
+ return *this;
+ }
+
+ //@}
+};
- /// Merge the result of AND operation into this object. The AND operation
- /// must involve this reducer, i.e., x = x + 5; not x = y + 5;
- reducer_opand& operator=(const temp_and& temp);
+/** Monoid class for bitwise and reductions. Instantiate the cilk::reducer
+ * template class with an op_and monoid to create a bitwise and reducer
+ * class. For example, to compute the bitwise and of a set of `unsigned long`
+ * values:
+ *
+ * cilk::reducer< cilk::op_and<unsigned long> > r;
+ *
+ * @tparam Type The reducer value type.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
+ *
+ * @see ReducersAnd
+ * @see op_and_view
+ *
+ * @ingroup ReducersAnd
+ */
+template <typename Type, bool Align = false>
+struct op_and : public monoid_with_view<op_and_view<Type>, Align> {};
+/** Deprecated bitwise and reducer class.
+ *
+ * reducer_opand is the same as @ref reducer<@ref op_and>, except that
+ * reducer_opand is a proxy for the contained view, so that accumulator
+ * variable update operations can be applied directly to the reducer. For
+ * example, a value is anded with a `reducer<%op_and>` with `*r &= a`, but a
+ * value can be anded with a `%reducer_opand` with `r &= a`.
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_opand.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_opand`
+ * and `reducer<%op_and>`. This allows incremental code
+ * conversion: old code that used `%reducer_opand` can pass a
+ * `%reducer_opand` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_and>`, and vice
+ * versa.
+ *
+ * @tparam Type The value type of the reducer.
+ *
+ * @see op_and
+ * @see reducer
+ * @see ReducersAnd
+ *
+ * @ingroup ReducersAnd
+ */
+template <typename Type>
+class reducer_opand : public reducer< op_and<Type, true> >
+{
+ typedef reducer< op_and<Type, true> > base;
+ using base::view;
+
+public:
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view’s rhs proxy type.
+ typedef typename view_type::rhs_proxy rhs_proxy;
+
+ /// The view type for the reducer.
+ typedef view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /** Default constructor.
+ *
+ * Constructs the wrapper with the default initial value of `Type()`
+ * (not the identity value).
+ */
+ reducer_opand() : base(Type()) {}
+
+ /** Value constructor.
+ *
+ * Constructs the wrapper with a specified initial value.
+ */
+ explicit reducer_opand(const Type& initial_value) : base(initial_value) {}
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_and_view. */
+ //@{
+
+ /// @copydoc op_and_view::operator&=(const Type&)
+ reducer_opand& operator&=(const Type& x)
+ {
+ view() &= x;
+ return *this;
+ }
+
+ // The legacy definition of reducer_opand::operator&() has different
+ // behavior and a different return type than this definition. The legacy
+ // version is defined as a member function, so this new version is defined
+ // as a free function to give it a different signature, so that they won’t
+ // end up sharing a single object file entry.
+
+ /// @copydoc op_and_view::operator&(const Type&) const
+ friend rhs_proxy operator&(const reducer_opand& r, const Type& x)
+ {
+ return r.view() & x;
+ }
+
+ /// @copydoc op_and_view::operator=(const rhs_proxy&)
+ reducer_opand& operator=(const rhs_proxy& temp)
+ {
+ view() = temp;
+ return *this;
+ }
+ //@}
+
+ /** @name Dereference
+ * @details Dereferencing a wrapper is a no-op. It simply returns the
+ * wrapper. Combined with the rule that the wrapper forwards view
+ * operations to its contained view, this means that view operations can
+ * be written the same way on reducers and wrappers, which is convenient
+ * for incrementally converting old code using wrappers to use reducers
+ * instead. That is:
+ *
+ * reducer< op_and<int> > r;
+ * *r &= a; // *r returns the view
+ * // operator &= is a view member function
+ *
+ * reducer_opand<int> w;
+ * *w &= a; // *w returns the wrapper
+ * // operator &= is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
reducer_opand& operator*() { return *this; }
reducer_opand const& operator*() const { return *this; }
reducer_opand* operator->() { return this; }
reducer_opand const* operator->() const { return this; }
-
- private:
- friend class temp_and;
-
- // Hyperobject to serve up views
- reducer<Monoid> imp_;
-
- // Not copyable
- reducer_opand(const reducer_opand&);
- reducer_opand& operator=(const reducer_opand&);
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_and<Type, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_and<Type, false> >* >(this);
+ }
+ operator const reducer< op_and<Type, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_and<Type, false> >* >(this);
+ }
+ //@}
};
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// ------------------------------------
-// template class reducer_opand::Monoid
-// ------------------------------------
-
-/**
- * Combines two views of the data.
+/// @cond internal
+/** Metafunction specialization for reducer conversion.
+ *
+ * This specialization of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes the `reducer< op_and<Type> >` class to have an
+ * `operator reducer_opand<Type>& ()` conversion operator that statically
+ * downcasts the `reducer<op_and>` to the corresponding `reducer_opand` type.
+ * (The reverse conversion, from `reducer_opand` to `reducer<op_and>`, is just
+ * an upcast, which is provided for free by the language.)
+ *
+ * @ingroup ReducersAnd
*/
-template <typename Type>
-void
-reducer_opand<Type>::Monoid::reduce(Type* left, Type* right)
-{
- *left &= *right;
-}
-
-// ----------------------------
-// template class reducer_opand
-// ----------------------------
-
-template <typename Type>
-inline
-reducer_opand<Type>::reducer_opand()
- : imp_(Type())
-{
-}
-
-template <typename Type>
-inline
-reducer_opand<Type>::reducer_opand(const Type& initial_value)
- : imp_(initial_value)
-{
-}
-
-template <typename Type>
-inline
-const Type& reducer_opand<Type>::get_value() const
-{
- return imp_.view();
-}
-
-template <typename Type>
-inline
-void reducer_opand<Type>::set_value(const Type& value)
-{
- imp_.view() = value;
-}
-
-template <typename Type>
-inline
-typename reducer_opand<Type>::temp_and
-reducer_opand<Type>::operator&(const Type& x) const
-{
- Type* valuePtr = const_cast<Type*>(&imp_.view());
- *valuePtr = *valuePtr & x;
- return temp_and(valuePtr);
-}
-
-template <typename Type>
-inline
-reducer_opand<Type>& reducer_opand<Type>::operator&=(const Type& x)
-{
- imp_.view() &= x;
- return *this;
-}
-
-template <typename Type>
-inline
-reducer_opand<Type>&
-reducer_opand<Type>::operator=(
- const typename reducer_opand<Type>::temp_and& temp)
+template <typename Type, bool Align>
+struct legacy_reducer_downcast<reducer<op_and<Type, Align> > >
{
- // No-op. Just test that temp was constructed from this.
- __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_);
- return *this;
-}
-
-// --------------------------------------
-// template class reducer_opand::temp_and
-// --------------------------------------
-
-template <typename Type>
-inline
-reducer_opand<Type>::temp_and::temp_and(Type *valuePtr)
- : valuePtr_(valuePtr)
-{
-}
-
-template <typename Type>
-inline
-typename reducer_opand<Type>::temp_and&
-reducer_opand<Type>::temp_and::operator&(const Type& x)
-{
- *valuePtr_ = *valuePtr_ & x;
- return *this;
-}
+ typedef reducer_opand<Type> type;
+};
+/// @endcond
} // namespace cilk
-#endif /* __cplusplus */
+#endif // __cplusplus
+
-/* C Interface
+/** @ingroup ReducersAdd
*/
+//@{
+/** @name C language reducer macros
+ *
+ * These macros are used to declare and work with op_and reducers in C code.
+ *
+ * @see @ref page_reducers_in_c
+ */
+ //@{
+
__CILKRTS_BEGIN_EXTERN_C
+/** Opand reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the op_and
+ * reducer type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersAnd
+ */
#define CILK_C_REDUCER_OPAND_TYPE(tn) \
__CILKRTS_MKIDENT(cilk_c_reducer_opand_,tn)
+
+/** Declare an op_and reducer object.
+ *
+ * This macro expands into a declaration of an op_and reducer object for a
+ * specified numeric type. For example:
+ *
+ * CILK_C_REDUCER_OPAND(my_reducer, ulong, ~0UL);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ * @param v The initial value for the reducer. (A value which can be
+ * assigned to the numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersAnd
+ */
#define CILK_C_REDUCER_OPAND(obj,tn,v) \
CILK_C_REDUCER_OPAND_TYPE(tn) obj = \
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
@@ -353,57 +519,81 @@ __CILKRTS_BEGIN_EXTERN_C
__CILKRTS_MKIDENT(cilk_c_reducer_opand_identity_,tn), \
__cilkrts_hyperobject_noop_destroy, v)
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \
- typedef CILK_C_DECLARE_REDUCER(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_opand_,tn); \
+/// @cond internal
+
+/** Declare the op_and reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which
+ * implement the reducer functionality for the op_and reducer type for a
+ * specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPAND_DECLARATION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn);
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_OPAND_INSTANCE(char,char);
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_OPAND_INSTANCE(signed char,schar);
-CILK_C_REDUCER_OPAND_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_OPAND_INSTANCE(short,short);
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_OPAND_INSTANCE(int,int);
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_OPAND_INSTANCE(long,long);
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_OPAND_INSTANCE(long long,longlong);
-CILK_C_REDUCER_OPAND_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_OPAND_INSTANCE(float,float);
-CILK_C_REDUCER_OPAND_INSTANCE(double,double);
-CILK_C_REDUCER_OPAND_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPAND_IMP(t,tn) \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn);
+
+/** Define the op_and reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement
+ * the reducer functionality for the op_and reducer type for a specified
+ * numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPAND_DEFINITION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r) \
{ *(t*)l &= *(t*)r; } \
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn) \
- { *(t*)v = (t)-1; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric tyeps. The contents of reducer_opand.c are as follows:
-
-CILK_C_REDUCER_OPAND_IMP(char,char)
-CILK_C_REDUCER_OPAND_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPAND_IMP(signed char,schar)
-CILK_C_REDUCER_OPAND_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPAND_IMP(short,short)
-CILK_C_REDUCER_OPAND_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPAND_IMP(int,int)
-CILK_C_REDUCER_OPAND_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPAND_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPAND_IMP(long,long)
-CILK_C_REDUCER_OPAND_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPAND_IMP(long long,longlong)
-CILK_C_REDUCER_OPAND_IMP(unsigned long long,ulonglong)
-
-*/
+ { *(t*)v = ~((t)0); }
+
+//@{
+/** @def CILK_C_REDUCER_OPAND_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
+ * will be defined, and this macro will generate reducer implementation
+ * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
+ * this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPAND_DEFINITION(t,tn)
+#else
+# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPAND_DECLARATION(t,tn)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for
+ * each numeric type.
+ */
+CILK_C_REDUCER_OPAND_INSTANCE(char, char)
+CILK_C_REDUCER_OPAND_INSTANCE(unsigned char, uchar)
+CILK_C_REDUCER_OPAND_INSTANCE(signed char, schar)
+CILK_C_REDUCER_OPAND_INSTANCE(wchar_t, wchar_t)
+CILK_C_REDUCER_OPAND_INSTANCE(short, short)
+CILK_C_REDUCER_OPAND_INSTANCE(unsigned short, ushort)
+CILK_C_REDUCER_OPAND_INSTANCE(int, int)
+CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, uint)
+CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, unsigned) /* alternate name */
+CILK_C_REDUCER_OPAND_INSTANCE(long, long)
+CILK_C_REDUCER_OPAND_INSTANCE(unsigned long, ulong)
+CILK_C_REDUCER_OPAND_INSTANCE(long long, longlong)
+CILK_C_REDUCER_OPAND_INSTANCE(unsigned long long, ulonglong)
+
+//@endcond
__CILKRTS_END_EXTERN_C
-#endif /* REDUCER_OPAND_H_INCLUDED */
+//@}
+
+//@}
+
+#endif /* REDUCER_OPAND_H_INCLUDED */
diff --git a/libcilkrts/include/cilk/reducer_opmul.h b/libcilkrts/include/cilk/reducer_opmul.h
new file mode 100644
index 00000000000..47dce121ad9
--- /dev/null
+++ b/libcilkrts/include/cilk/reducer_opmul.h
@@ -0,0 +1,437 @@
+/* reducer_opmul.h -*- C++ -*-
+ *
+ * @copyright
+ * Copyright (C) 2012-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ */
+
+/** @file reducer_opmul.h
+ *
+ * @brief Defines classes for doing parallel multiplication reductions.
+ *
+ * @ingroup ReducersMul
+ *
+ * @see ReducersMul
+ */
+
+#ifndef REDUCER_OPMUL_H_INCLUDED
+#define REDUCER_OPMUL_H_INCLUDED
+
+#include <cilk/reducer.h>
+
+/** @defgroup ReducersMul Multiplication Reducers
+ *
+ * Multiplication reducers allow the computation of the product of a set of
+ * values in parallel.
+ *
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file `reducers.md`, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redopmul_usage Usage Example
+ *
+ * cilk::reducer< cilk::op_mul<double> > r;
+ * cilk_for (int i = 0; i != N; ++i) {
+ * *r *= a[i];
+ * }
+ * double product;
+ * r.move_out(product);
+ *
+ * @section redopmul_monoid The Monoid
+ *
+ * @subsection redopmul_monoid_values Value Set
+ *
+ * The value set of a multiplication reducer is the set of values of `Type`,
+ * which is expected to be a builtin numeric type (or something like it, such
+ * as `std::complex`).
+ *
+ * @subsection redopmul_monoid_operator Operator
+ *
+ * The operator of a multiplication reducer is the multiplication operation,
+ * defined by the “`*`” binary operator on `Type`.
+ *
+ * @subsection redopmul_monoid_identity Identity
+ *
+ * The identity value of the reducer is the numeric value “`1`”. This is
+ * expected to be the value of the expression `Type(1)`.
+ *
+ * @section redopmul_operations Operations
+ *
+ * @subsection redopmul_constructors Constructors
+ *
+ * reducer() // identity
+ * reducer(const Type& value)
+ * reducer(move_in(Type& variable))
+ *
+ * @subsection redopmul_get_set Set and Get
+ *
+ * r.set_value(const Type& value)
+ * const Type& = r.get_value() const
+ * r.move_in(Type& variable)
+ * r.move_out(Type& variable)
+ *
+ * @subsection redopmul_initial Initial Values
+ *
+ * If a multiplication reducer is constructed without an explicit initial
+ * value, then its initial value will be its identity value, as long as `Type`
+ * satisfies the requirements of @ref redopmul_types.
+ *
+ * @subsection redopmul_view_ops View Operations
+ *
+ * *r *= a
+ * *r = *r * a
+ * *r = *r * a1 * a2 … * an
+ *
+ * @section redopmul_floating_point Issues with Floating-Point Types
+ *
+ * Because of overflow and underflow issues, floating-point multiplication is
+ * not really associative. For example, `(1e200 * 1e-200) * 1e-200 == 1e-200`,
+ * but `1e200 * (1e-200 * 1e-200 == 0.
+ *
+ * In many cases, this won’t matter, but computations which have been
+ * carefully ordered to control overflow and underflow may not deal well with
+ * being reassociated. In general, you should be sure to understand the
+ * floating-point behavior of your program before doing any transformation
+ * that will reassociate its computations.
+ *
+ * @section redopmul_types Type and Operator Requirements
+ *
+ * `Type` must be `Copy Constructible`, `Default Constructible`, and
+ * `Assignable`.
+ *
+ * The operator “`*=`” must be defined on `Type`, with `x *= a` having the same
+ * meaning as `x = x * a`.
+ *
+ * The expression `Type(1)` must be a valid expression which yields the
+ * identity value (the value of `Type` whose numeric value is `1`).
+ *
+ * @section redopmul_in_c Multiplication Reducers in C
+ *
+ * The @ref CILK_C_REDUCER_OPMUL and @ref CILK_C_REDUCER_OPMUL_TYPE macros can
+ * be used to do multiplication reductions in C. For example:
+ *
+ * CILK_C_REDUCER_OPMUL(r, double, 1);
+ * CILK_C_REGISTER_REDUCER(r);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * REDUCER_VIEW(r) *= a[i];
+ * }
+ * CILK_C_UNREGISTER_REDUCER(r);
+ * printf("The product of the elements of a is %f\n", REDUCER_VIEW(r));
+ *
+ * See @ref reducers_c_predefined.
+ */
+
+#ifdef __cplusplus
+
+namespace cilk {
+
+/** The multiplication reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_mul<Type> >`. It holds the accumulator variable
+ * for the reduction, and allows only multiplication operations to be
+ * performed on it.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `*=` operation would be used in an expression like `*r *= a`, where
+ * `r` is an op_mul reducer variable.
+ *
+ * @tparam Type The type of the contained accumulator variable. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
+ *
+ * @see ReducersMul
+ * @see op_mul
+ *
+ * @ingroup ReducersMul
+ */
+template <typename Type>
+class op_mul_view : public scalar_view<Type>
+{
+ typedef scalar_view<Type> base;
+
+public:
+ /** Class to represent the right-hand side of `*reducer = *reducer * value`.
+ *
+ * The only assignment operator for the op_mul_view class takes an
+ * rhs_proxy as its operand. This results in the syntactic restriction
+ * that the only expressions that can be assigned to an op_mul_view are
+ * ones which generate an rhs_proxy — that is, expressions of the form
+ * `op_mul_view * value ... * value`.
+ *
+ * @warning
+ * The lhs and rhs views in such an assignment must be the same;
+ * otherwise, the behavior will be undefined. (I.e., `v1 = v1 * x` is
+ * legal; `v1 = v2 * x` is illegal.) This condition will be checked with a
+ * runtime assertion when compiled in debug mode.
+ *
+ * @see op_mul_view
+ */
+ class rhs_proxy {
+ friend class op_mul_view;
+
+ const op_mul_view* m_view;
+ Type m_value;
+
+ // Constructor is invoked only from op_mul_view::operator*().
+ //
+ rhs_proxy(const op_mul_view* view, const Type& value) : m_view(view), m_value(value) {}
+
+ rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
+ rhs_proxy(); // Disable default constructor
+
+ public:
+ /** Multiply by an additional rhs value. If `v` is an op_mul_view and
+ * `a1` is a value, then the expression `v * a1` invokes the view’s
+ * `operator*()` to create an rhs_proxy for `(v, a1)`; then
+ * `v * a1 * a2` invokes the rhs_proxy’s `operator*()` to create a
+ * new rhs_proxy for `(v, a1*a2)`. This allows the right-hand side of
+ * an assignment to be not just `view * value`, but
+ * `view * value * value ... * value`. The effect is that
+ *
+ * v = v * a1 * a2 ... * an;
+ *
+ * is evaluated as
+ *
+ * v = v * (a1 * a2 ... * an);
+ */
+ rhs_proxy& operator*(const Type& x) { m_value *= x; return *this; }
+ };
+
+
+ /** Default/identity constructor. This constructor initializes the
+ * contained value to `Type(1)`, which is expected to be the identity
+ * value for multiplication on `Type`.
+ */
+ op_mul_view() : base(Type(1)) {}
+
+ /** Construct with a specified initial value.
+ */
+ explicit op_mul_view(const Type& v) : base(v) {}
+
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_mul monoid to combine the views
+ * of two strands when the right strand merges with the left one. It
+ * multiplies the value contained in the left-strand view by the value
+ * contained in the right-strand view, and leaves the value in the
+ * right-strand view undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_mul monoid to implement the monoid
+ * reduce operation.
+ */
+ void reduce(op_mul_view* right) { this->m_value *= right->m_value; }
+
+ /** @name Accumulator variable updates.
+ *
+ * These functions support the various syntaxes for multiplying the
+ * accumulator variable contained in the view by some value.
+ */
+ //@{
+
+ /** Multiply the accumulator variable by @a x.
+ */
+ op_mul_view& operator*=(const Type& x) { this->m_value *= x; return *this; }
+
+ /** Create an object representing `*this * x`.
+ *
+ * @see rhs_proxy
+ */
+ rhs_proxy operator*(const Type& x) const { return rhs_proxy(this, x); }
+
+ /** Assign the result of a `view * value` expression to the view. Note that
+ * this is the only assignment operator for this class.
+ *
+ * @see rhs_proxy
+ */
+ op_mul_view& operator=(const rhs_proxy& rhs) {
+ __CILKRTS_ASSERT(this == rhs.m_view);
+ this->m_value *= rhs.m_value;
+ return *this;
+ }
+
+ //@}
+};
+
+/** Monoid class for multiplication reductions. Instantiate the cilk::reducer
+ * template class with an op_mul monoid to create a multiplication reducer
+ * class. For example, to compute the product of a set of `double` values:
+ *
+ * cilk::reducer< cilk::op_mul<double> > r;
+ *
+ * @see ReducersMul
+ * @see op_mul_view
+ *
+ * @ingroup ReducersMul
+ */
+template <typename Type>
+struct op_mul : public monoid_with_view< op_mul_view<Type> > {};
+
+} // namespace cilk
+
+#endif // __cplusplus
+
+
+/** @ingroup ReducersAdd
+ */
+//@{
+
+/** @name C language reducer macros
+ *
+ * These macros are used to declare and work with numeric op_mul reducers in
+ * C code.
+ *
+ * @see @ref page_reducers_in_c
+ */
+ //@{
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/** Opmul reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the op_mul
+ * reducer type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersMul
+ */
+#define CILK_C_REDUCER_OPMUL_TYPE(tn) \
+ __CILKRTS_MKIDENT(cilk_c_reducer_opmul_,tn)
+
+/** Declare an op_mul reducer object.
+ *
+ * This macro expands into a declaration of an op_mul reducer object for a
+ * specified numeric type. For example:
+ *
+ * CILK_C_REDUCER_OPMUL(my_reducer, double, 1.0);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ * @param v The initial value for the reducer. (A value which can be
+ * assigned to the numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersMul
+ */
+#define CILK_C_REDUCER_OPMUL(obj,tn,v) \
+ CILK_C_REDUCER_OPMUL_TYPE(tn) obj = \
+ CILK_C_INIT_REDUCER(_Typeof(obj.value), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_opmul_reduce_,tn), \
+ __CILKRTS_MKIDENT(cilk_c_reducer_opmul_identity_,tn), \
+ __cilkrts_hyperobject_noop_destroy, v)
+
+/// @cond internal
+
+/** Declare the op_mul reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which
+ * implement the reducer functionality for the op_mul reducer type for a
+ * specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPMUL_DECLARATION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r); \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn);
+
+/** Define the op_mul reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement
+ * the reducer functionality for the op_mul reducer type for a specified
+ * numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPMUL_DEFINITION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \
+ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r) \
+ { *(t*)l *= *(t*)r; } \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn) \
+ { *(t*)v = 1; }
+
+//@{
+/** @def CILK_C_REDUCER_OPMUL_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
+ * will be defined, and this macro will generate reducer implementation
+ * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
+ * this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPMUL_DEFINITION(t,tn)
+#else
+# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPMUL_DECLARATION(t,tn)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+CILK_C_REDUCER_OPMUL_INSTANCE(char, char)
+CILK_C_REDUCER_OPMUL_INSTANCE(unsigned char, uchar)
+CILK_C_REDUCER_OPMUL_INSTANCE(signed char, schar)
+CILK_C_REDUCER_OPMUL_INSTANCE(wchar_t, wchar_t)
+CILK_C_REDUCER_OPMUL_INSTANCE(short, short)
+CILK_C_REDUCER_OPMUL_INSTANCE(unsigned short, ushort)
+CILK_C_REDUCER_OPMUL_INSTANCE(int, int)
+CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, uint)
+CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, unsigned) /* alternate name */
+CILK_C_REDUCER_OPMUL_INSTANCE(long, long)
+CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long, ulong)
+CILK_C_REDUCER_OPMUL_INSTANCE(long long, longlong)
+CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long long, ulonglong)
+CILK_C_REDUCER_OPMUL_INSTANCE(float, float)
+CILK_C_REDUCER_OPMUL_INSTANCE(double, double)
+CILK_C_REDUCER_OPMUL_INSTANCE(long double, longdouble)
+
+//@endcond
+
+__CILKRTS_END_EXTERN_C
+
+//@}
+
+//@}
+
+#endif /* REDUCER_OPMUL_H_INCLUDED */
diff --git a/libcilkrts/include/cilk/reducer_opor.h b/libcilkrts/include/cilk/reducer_opor.h
index 2b2de9bc163..5472e603555 100644
--- a/libcilkrts/include/cilk/reducer_opor.h
+++ b/libcilkrts/include/cilk/reducer_opor.h
@@ -1,33 +1,41 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+/* reducer_opor.h -*- C++ -*-
*
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-/*
- * reducer_opor.h
+/** @file reducer_opor.h
+ *
+ * @brief Defines classes for doing parallel bitwise or reductions.
*
- * Purpose: Reducer hyperobject to compute bitwise OR values
+ * @ingroup ReducersOr
+ *
+ * @see ReducersOr
*/
#ifndef REDUCER_OPOR_H_INCLUDED
@@ -35,367 +43,551 @@
#include <cilk/reducer.h>
-#ifdef __cplusplus
-
-/* C++ Interface
- *
- * Purpose: Reducer hyperobject to compute bitwise OR values
- * When bool is passed as 'Type', it computes logical OR
- * operation.
- *
- * Classes: reducer_opor<Type>
- *
- * Description:
- * ============
- * This component provides a reducer-type hyperobject representation
- * that allows conducting bitwise OR operation to a non-local variable
- * using the |=, | operators. A common operation
- * when traversing a data structure is to bit-wise OR values
- * into a non-local numeric variable. When Cilk parallelism is
- * introduced, however, a data race will occur on the variable holding
- * the bit-wise OR result. By replacing the variable with the
- * hyperobject defined in this component, the data race is eliminated.
- *
- * When bool is passed as the 'Type', this reducer conducts logic OR
- * operation.
- *
- * Usage Example:
- * ==============
- * Assume we wish to traverse an array of objects, performing a bit-wise OR
- * operation on each object and accumulating the result of the operation
- * into an integer variable.
- *..
- * unsigned int compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * unsigned int result = 0;
- * for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * result |= compute(myArray[i]);
- * }
+/** @defgroup ReducersOr Bitwise Or Reducers
*
- * std::cout << "The result is: " << result << std::endl;
+ * Bitwise and reducers allow the computation of the bitwise and of a set of
+ * values in parallel.
*
- * return 0;
- * }
- *..
- * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel,
- * but doing so will create a data race on the 'result' variable.
- * The race is solved by changing 'result' to a 'reducer_opor' hyperobject:
- *..
- * unsigned int compute(const X& v);
- *
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * cilk::reducer_opor<unsigned int> result;
- * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * *result |= compute(myArray[i]);
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file `reducers.md`, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redopor_usage Usage Example
+ *
+ * cilk::reducer< cilk::op_or<unsigned> > r;
+ * cilk_for (int i = 0; i != N; ++i) {
+ * *r |= a[i];
* }
+ * unsigned result;
+ * r.move_out(result);
*
- * std::cout << "The result is: "
- * << result.get_value() << std::endl;
+ * @section redopor_monoid The Monoid
*
- * return 0;
- * }
- *
+ * @subsection redopor_monoid_values Value Set
+ *
+ * The value set of a bitwise or reducer is the set of values of `Type`, which
+ * is expected to be a builtin integer type which has a representation as a
+ * sequence of bits (or something like it, such as `bool` or `std::bitset`).
+ *
+ * @subsection redopor_monoid_operator Operator
+ *
+ * The operator of a bitwise or reducer is the bitwise or operator, defined by
+ * the “`|`” binary operator on `Type`.
+ *
+ * @subsection redopor_monoid_identity Identity
*
- * Operations provided:
- * ====================
- * Given 'reducer_opor' objects, x and y, the following are
- * valid statements:
- *..
- * *x |= 5;
- * *x = *x | 5;
- *..
- * The following are not valid expressions and will result in a run-time error
- * in a debug build:
- *..
- * x = y; // Cannot assign one reducer to another
- * *x = *y | 5; // Mixed reducers
- * *x = 5 | *x; // operator| is not necessarily commutative
- *..
- *
- * Requirements on the 'Type' parameter
- * ====================================
- * The 'Type' parameter used to instantiate the 'reducer_opor' class must
- * provide a |= operator that meets the requirements for an
- * *associative* *mutating* *operator* as defined in the Cilk++ user manual.
- * The default constructor for 'Type' must yield an OR identity, i.e.,
- * a value (such as unsigned int 0, bool false) that, when performed
- * OR operation to any other value, yields the other value.
+ * The identity value of the reducer is the value whose representation
+ * contains all 0-bits. This is expected to be the value of the default
+ * constructor `Type()`.
+ *
+ * @section redopor_operations Operations
+ *
+ * @subsection redopor_constructors Constructors
+ *
+ * reducer() // identity
+ * reducer(const Type& value)
+ * reducer(move_in(Type& variable))
+ *
+ * @subsection redopor_get_set Set and Get
+ *
+ * r.set_value(const Type& value)
+ * const Type& = r.get_value() const
+ * r.move_in(Type& variable)
+ * r.move_out(Type& variable)
+ *
+ * @subsection redopor_initial Initial Values
+ *
+ * If a bitwise or reducer is constructed without an explicit initial value,
+ * then its initial value will be its identity value, as long as `Type`
+ * satisfies the requirements of @ref redopor_types.
+ *
+ * @subsection redopor_view_ops View Operations
+ *
+ * *r |= a
+ * *r = *r | a
+ * *r = *r | a1 | a2 … | an
+ *
+ * @section redopor_types Type and Operator Requirements
+ *
+ * `Type` must be `Copy Constructible`, `Default Constructible`, and
+ * `Assignable`.
+ *
+ * The operator “`|=`” must be defined on `Type`, with `x |= a` having the
+ * same meaning as `x = x | a`.
+ *
+ * The expression `Type()` must be a valid expression which yields the
+ * identity value (the value of `Type` whose representation consists of all
+ * 0-bits).
+ *
+ * @section redopor_in_c Bitwise Or Reducers in C
+ *
+ * The @ref CILK_C_REDUCER_OPOR and @ref CILK_C_REDUCER_OPOR_TYPE macros can
+ * be used to do bitwise or reductions in C. For example:
+ *
+ * CILK_C_REDUCER_OPOR(r, uint, 0);
+ * CILK_C_REGISTER_REDUCER(r);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * REDUCER_VIEW(r) |= a[i];
+ * }
+ * CILK_C_UNREGISTER_REDUCER(r);
+ * printf("The bitwise OR of the elements of a is %x\n", REDUCER_VIEW(r));
+ *
+ * See @ref reducers_c_predefined.
*/
-#include <new>
+#ifdef __cplusplus
-namespace cilk
-{
+namespace cilk {
-/**
- * @brief A reducer-type hyperobject representation that supports bitwise OR
- * operations on a non-local variable using the |=, | operators.
+/** The bitwise or reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_or<Type> >`. It holds the accumulator variable for
+ * the reduction, and allows only `or` operations to be performed on it.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `|=` operation would be used in an expression like `*r |= a`, where
+ * `r` is an opmod reducer variable.
*
- * A common operation when traversing a data structure is to bit-wise OR
- * values into a non-local numeric variable. When Cilk parallelism is
- * introduced, however, a data race will occur on the variable holding
- * the bit-wise OR result. By replacing the variable with the
- * hyperobject defined in this component, the data race is eliminated.
+ * @tparam Type The type of the contained accumulator variable. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
*
- * When bool is passed as the 'Type', this reducer conducts logic OR
- * operation.
+ * @see ReducersOr
+ * @see op_or
+ *
+ * @ingroup ReducersOr
*/
template <typename Type>
-class reducer_opor
+class op_or_view : public scalar_view<Type>
{
- public:
- /// Definition of data view, operation, and identity for reducer_opor
- class Monoid : public monoid_base<Type>
- {
- public:
- /// Combines two views of the data
- static void reduce(Type* left, Type* right);
+ typedef scalar_view<Type> base;
+
+public:
+ /** Class to represent the right-hand side of `*reducer = *reducer | value`.
+ *
+ * The only assignment operator for the op_or_view class takes an
+ * rhs_proxy as its operand. This results in the syntactic restriction
+ * that the only expressions that can be assigned to an op_or_view are
+ * ones which generate an rhs_proxy — that is, expressions of the form
+ * `op_or_view | value ... | value`.
+ *
+ * @warning
+ * The lhs and rhs views in such an assignment must be the same;
+ * otherwise, the behavior will be undefined. (I.e., `v1 = v1 | x` is
+ * legal; `v1 = v2 | x` is illegal.) This condition will be checked with
+ * a runtime assertion when compiled in debug mode.
+ *
+ * @see op_or_view
+ */
+ class rhs_proxy {
+ friend class op_or_view;
+
+ const op_or_view* m_view;
+ Type m_value;
+
+ // Constructor is invoked only from op_or_view::operator|().
+ //
+ rhs_proxy(const op_or_view* view, const Type& value) : m_view(view), m_value(value) {}
+
+ rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
+ rhs_proxy(); // Disable default constructor
+
+ public:
+ /** Bitwise or with an additional rhs value. If `v` is an op_or_view
+ * and `a1` is a value, then the expression `v | a1` invokes the
+ * view’s `operator|()` to create an rhs_proxy for `(v, a1)`; then
+ * `v | a1 | a2` invokes the rhs_proxy’s `operator|()` to create a new
+ * rhs_proxy for `(v, a1|a2)`. This allows the right-hand side of an
+ * assignment to be not just `view | value`, but
+ ( `view | value | value ... | value`. The effect is that
+ *
+ * v = v | a1 | a2 ... | an;
+ *
+ * is evaluated as
+ *
+ * v = v | (a1 | a2 ... | an);
+ */
+ rhs_proxy& operator|(const Type& x) { m_value |= x; return *this; }
};
- /// "PRIVATE" HELPER CLASS
- class temp_or {
- friend class reducer_opor;
-
- Type* valuePtr_;
- // Default copy constructor, no assignment operator
- temp_or& operator=(const temp_or&);
+ /** Default/identity constructor. This constructor initializes the
+ * contained value to `Type()`.
+ */
+ op_or_view() : base() {}
+
+ /** Construct with a specified initial value.
+ */
+ explicit op_or_view(const Type& v) : base(v) {}
+
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_or monoid to combine the views
+ * of two strands when the right strand merges with the left one. It
+ * “ors” the value contained in the left-strand view by the value
+ * contained in the right-strand view, and leaves the value in the
+ * right-strand view undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_or monoid to implement the monoid
+ * reduce operation.
+ */
+ void reduce(op_or_view* right) { this->m_value |= right->m_value; }
+
+ /** @name Accumulator variable updates.
+ *
+ * These functions support the various syntaxes for “oring” the
+ * accumulator variable contained in the view with some value.
+ */
+ //@{
+
+ /** Or the accumulator variable with @a x.
+ */
+ op_or_view& operator|=(const Type& x) { this->m_value |= x; return *this; }
+
+ /** Create an object representing `*this | x`.
+ *
+ * @see rhs_proxy
+ */
+ rhs_proxy operator|(const Type& x) const { return rhs_proxy(this, x); }
+
+ /** Assign the result of a `view | value` expression to the view. Note that
+ * this is the only assignment operator for this class.
+ *
+ * @see rhs_proxy
+ */
+ op_or_view& operator=(const rhs_proxy& rhs) {
+ __CILKRTS_ASSERT(this == rhs.m_view);
+ this->m_value |= rhs.m_value;
+ return *this;
+ }
+
+ //@}
+};
- explicit temp_or(Type* valuePtr);
+/** Monoid class for bitwise or reductions. Instantiate the cilk::reducer
+ * template class with an op_or monoid to create a bitwise or reducer
+ * class. For example, to compute the bitwise or of a set of `unsigned long`
+ * values:
+ *
+ * cilk::reducer< cilk::op_or<unsigned long> > r;
+ *
+ * @tparam Type The reducer value type.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
+ *
+ * @see ReducersOr
+ * @see op_or_view
+ *
+ * @ingroup ReducersOr
+ */
+template <typename Type, bool Align = false>
+struct op_or : public monoid_with_view<op_or_view<Type>, Align> {};
- public:
- temp_or& operator|(const Type& x);
- };
+/** Deprecated bitwise or reducer class.
+ *
+ * reducer_opor is the same as @ref reducer<@ref op_or>, except that
+ * reducer_opor is a proxy for the contained view, so that accumulator
+ * variable update operations can be applied directly to the reducer. For
+ * example, a value is ored with a `reducer<%op_or>` with `*r |= a`, but a
+ * value can be ored with a `%reducer_opor` with `r |= a`.
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_opor.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_opor`
+ * and `reducer<%op_or>`. This allows incremental code
+ * conversion: old code that used `%reducer_opor` can pass a
+ * `%reducer_opor` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_or>`, and vice
+ * versa.
+ *
+ * @tparam Type The value type of the reducer.
+ *
+ * @see op_or
+ * @see reducer
+ * @see ReducersOr
+ *
+ * @ingroup ReducersOr
+ */
+template <typename Type>
+class reducer_opor : public reducer< op_or<Type, true> >
+{
+ typedef reducer< op_or<Type, true> > base;
+ using base::view;
public:
-
- /// Construct an 'reducer_opor' object with a value of 'Type()'.
- reducer_opor();
-
- /// Construct an 'reducer_opor' object with the specified initial value.
- explicit reducer_opor(const Type& initial_value);
-
- /// Return a const reference to the current value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Type& get_value() const;
-
- /// Set the value of this object.
- ///
- /// @warning: Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Type& value);
-
- /// OR 'x' to the value of this reducer and produce a temporary and object.
- /// The temporary and can be used for additional bit-wise operations
- /// or assigned back to this reducer.
- temp_or operator|(const Type& x) const;
-
- /// OR 'x' to the value of this object.
- reducer_opor& operator|=(const Type& x);
-
- /// Merge the result of OR operation into this object. The OR operation
- /// must involve this reducer, i.e., x = x + 5; not x = y + 5;
- reducer_opor& operator=(const temp_or& temp);
-
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view’s rhs proxy type.
+ typedef typename view_type::rhs_proxy rhs_proxy;
+
+ /// The view type for the reducer.
+ typedef view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /** Default (identity) constructor.
+ *
+ * Constructs the wrapper with the default initial value of `Type()`.
+ */
+ reducer_opor() {}
+
+ /** Value constructor.
+ *
+ * Constructs the wrapper with a specified initial value.
+ */
+ explicit reducer_opor(const Type& initial_value) : base(initial_value) {}
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_and_view. */
+ //@{
+
+ /// @copydoc op_or_view::operator|=(const Type&)
+ reducer_opor& operator|=(const Type& x)
+ {
+ view() |= x; return *this;
+ }
+
+ // The legacy definition of reducer_opor::operator|() has different
+ // behavior and a different return type than this definition. The legacy
+ // version is defined as a member function, so this new version is defined
+ // as a free function to give it a different signature, so that they won’t
+ // end up sharing a single object file entry.
+
+ /// @copydoc op_or_view::operator|(const Type&) const
+ friend rhs_proxy operator|(const reducer_opor& r, const Type& x)
+ {
+ return r.view() | x;
+ }
+
+ /// @copydoc op_and_view::operator=(const rhs_proxy&)
+ reducer_opor& operator=(const rhs_proxy& temp)
+ {
+ view() = temp; return *this;
+ }
+ //@}
+
+ /** @name Dereference
+ * @details Dereferencing a wrapper is a no-op. It simply returns the
+ * wrapper. Combined with the rule that the wrapper forwards view
+ * operations to its contained view, this means that view operations can
+ * be written the same way on reducers and wrappers, which is convenient
+ * for incrementally converting old code using wrappers to use reducers
+ * instead. That is:
+ *
+ * reducer< op_and<int> > r;
+ * *r &= a; // *r returns the view
+ * // operator &= is a view member function
+ *
+ * reducer_opand<int> w;
+ * *w &= a; // *w returns the wrapper
+ * // operator &= is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
reducer_opor& operator*() { return *this; }
reducer_opor const& operator*() const { return *this; }
reducer_opor* operator->() { return this; }
reducer_opor const* operator->() const { return this; }
-
- private:
- friend class temp_or;
-
- // Hyperobject to serve up views
- reducer<Monoid> imp_;
-
- // Not copyable
- reducer_opor(const reducer_opor&);
- reducer_opor& operator=(const reducer_opor&);
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_or<Type, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_or<Type, false> >* >(this);
+ }
+ operator const reducer< op_or<Type, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_or<Type, false> >* >(this);
+ }
+ //@}
+
};
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// ------------------------------------
-// template class reducer_opor::Monoid
-// ------------------------------------
-
-template <typename Type>
-void
-reducer_opor<Type>::Monoid::reduce(Type* left, Type* right)
-{
- *left |= *right;
-}
-
-// ----------------------------
-// template class reducer_opor
-// ----------------------------
-
-template <typename Type>
-inline
-reducer_opor<Type>::reducer_opor()
- : imp_(Type())
-{
-}
-
-template <typename Type>
-inline
-reducer_opor<Type>::reducer_opor(const Type& initial_value)
- : imp_(initial_value)
-{
-}
-
-template <typename Type>
-inline
-const Type& reducer_opor<Type>::get_value() const
-{
- return imp_.view();
-}
-
-template <typename Type>
-inline
-void reducer_opor<Type>::set_value(const Type& value)
-{
- imp_.view() = value;
-}
-
-template <typename Type>
-inline
-typename reducer_opor<Type>::temp_or
-reducer_opor<Type>::operator|(const Type& x) const
-{
- Type* valuePtr = const_cast<Type*>(&imp_.view());
- *valuePtr = *valuePtr | x;
- return temp_or(valuePtr);
-}
-
-template <typename Type>
-inline
-reducer_opor<Type>& reducer_opor<Type>::operator|=(const Type& x)
-{
- imp_.view() |= x;
- return *this;
-}
-
-template <typename Type>
-inline
-reducer_opor<Type>&
-reducer_opor<Type>::operator=(
- const typename reducer_opor<Type>::temp_or& temp)
-{
- // No-op. Just test that temp was constructed from this.
- __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_);
- return *this;
-}
-
-// --------------------------------------
-// template class reducer_opor::temp_or
-// --------------------------------------
-
-template <typename Type>
-inline
-reducer_opor<Type>::temp_or::temp_or(Type *valuePtr)
- : valuePtr_(valuePtr)
-{
-}
-
-template <typename Type>
-inline
-typename reducer_opor<Type>::temp_or&
-reducer_opor<Type>::temp_or::operator|(const Type& x)
+/// @cond internal
+/** Metafunction specialization for reducer conversion.
+ *
+ * This specialization of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes the `reducer< op_or<Type> >` class to have an
+ * `operator reducer_opor<Type>& ()` conversion operator that statically
+ * downcasts the `reducer<op_or>` to the corresponding `reducer_opor` type.
+ * (The reverse conversion, from `reducer_opor` to `reducer<op_or>`, is just
+ * an upcast, which is provided for free by the language.)
+ *
+ * @ingroup ReducersOr
+ */
+template <typename Type, bool Align>
+struct legacy_reducer_downcast<reducer<op_or<Type, Align> > >
{
- *valuePtr_ = *valuePtr_ | x;
- return *this;
-}
+ typedef reducer_opor<Type> type;
+};
+/// @endcond
} // namespace cilk
#endif /* __cplusplus */
-/* C Interface
+
+/** @ingroup ReducersOr
*/
+//@{
+/** @name C language reducer macros
+ *
+ * These macros are used to declare and work with op_or reducers in C code.
+ *
+ * @see @ref page_reducers_in_c
+ */
+ //@{
+
__CILKRTS_BEGIN_EXTERN_C
+/** Opor reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the op_or
+ * reducer type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersOr
+ */
#define CILK_C_REDUCER_OPOR_TYPE(tn) \
__CILKRTS_MKIDENT(cilk_c_reducer_opor_,tn)
+
+/** Declare an op_or reducer object.
+ *
+ * This macro expands into a declaration of an op_or reducer object for a
+ * specified numeric type. For example:
+ *
+ * CILK_C_REDUCER_OPOR(my_reducer, ulong, 0);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ * @param v The initial value for the reducer. (A value which can be
+ * assigned to the numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersOr
+ */
#define CILK_C_REDUCER_OPOR(obj,tn,v) \
CILK_C_REDUCER_OPOR_TYPE(tn) obj = \
- CILK_C_INIT_REDUCER(_Typeof(obj.value), \
+ CILK_C_INIT_REDUCER(_Typeof(obj.value), \
__CILKRTS_MKIDENT(cilk_c_reducer_opor_reduce_,tn), \
__CILKRTS_MKIDENT(cilk_c_reducer_opor_identity_,tn), \
__cilkrts_hyperobject_noop_destroy, v)
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \
- typedef CILK_C_DECLARE_REDUCER(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_opor_,tn); \
+/// @cond internal
+
+/** Declare the op_or reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which
+ * implement the reducer functionality for the op_or reducer type for a
+ * specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPOR_DECLARATION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn);
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_OPOR_INSTANCE(char,char);
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_OPOR_INSTANCE(signed char,schar);
-CILK_C_REDUCER_OPOR_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_OPOR_INSTANCE(short,short);
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_OPOR_INSTANCE(int,int);
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_OPOR_INSTANCE(long,long);
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_OPOR_INSTANCE(long long,longlong);
-CILK_C_REDUCER_OPOR_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_OPOR_INSTANCE(float,float);
-CILK_C_REDUCER_OPOR_INSTANCE(double,double);
-CILK_C_REDUCER_OPOR_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPOR_IMP(t,tn) \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn);
+
+/** Define the op_or reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement
+ * the reducer functionality for the op_or reducer type for a specified
+ * numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPOR_DEFINITION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r) \
{ *(t*)l |= *(t*)r; } \
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn) \
- { *(t*)v = (t)0; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric tyeps. The contents of reducer_opor.c are as follows:
-
-CILK_C_REDUCER_OPOR_IMP(char,char)
-CILK_C_REDUCER_OPOR_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPOR_IMP(signed char,schar)
-CILK_C_REDUCER_OPOR_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPOR_IMP(short,short)
-CILK_C_REDUCER_OPOR_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPOR_IMP(int,int)
-CILK_C_REDUCER_OPOR_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPOR_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPOR_IMP(long,long)
-CILK_C_REDUCER_OPOR_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPOR_IMP(long long,longlong)
-CILK_C_REDUCER_OPOR_IMP(unsigned long long,ulonglong)
-
-*/
+ { *(t*)v = 0; }
+
+//@{
+/** @def CILK_C_REDUCER_OPOR_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
+ * will be defined, and this macro will generate reducer implementation
+ * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
+ * this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPOR_DEFINITION(t,tn)
+#else
+# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPOR_DECLARATION(t,tn)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+CILK_C_REDUCER_OPOR_INSTANCE(char, char)
+CILK_C_REDUCER_OPOR_INSTANCE(unsigned char, uchar)
+CILK_C_REDUCER_OPOR_INSTANCE(signed char, schar)
+CILK_C_REDUCER_OPOR_INSTANCE(wchar_t, wchar_t)
+CILK_C_REDUCER_OPOR_INSTANCE(short, short)
+CILK_C_REDUCER_OPOR_INSTANCE(unsigned short, ushort)
+CILK_C_REDUCER_OPOR_INSTANCE(int, int)
+CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, uint)
+CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, unsigned) /* alternate name */
+CILK_C_REDUCER_OPOR_INSTANCE(long, long)
+CILK_C_REDUCER_OPOR_INSTANCE(unsigned long, ulong)
+CILK_C_REDUCER_OPOR_INSTANCE(long long, longlong)
+CILK_C_REDUCER_OPOR_INSTANCE(unsigned long long, ulonglong)
+
+//@endcond
__CILKRTS_END_EXTERN_C
+//@}
+
+//@}
+
#endif /* REDUCER_OPOR_H_INCLUDED */
diff --git a/libcilkrts/include/cilk/reducer_opxor.h b/libcilkrts/include/cilk/reducer_opxor.h
index 8aad7c052fa..5e128e7f2c7 100644
--- a/libcilkrts/include/cilk/reducer_opxor.h
+++ b/libcilkrts/include/cilk/reducer_opxor.h
@@ -1,33 +1,41 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+/* reducer_opxor.h -*- C++ -*-
*
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-/*
- * reducer_opxor.h
+/** @file reducer_opxor.h
+ *
+ * @brief Defines classes for doing parallel bitwise or reductions.
*
- * Purpose: Reducer hyperobject to compute bitwise XOR values
+ * @ingroup ReducersXor
+ *
+ * @see ReducersXor
*/
#ifndef REDUCER_OPXOR_H_INCLUDED
@@ -35,308 +43,469 @@
#include <cilk/reducer.h>
-#ifdef __cplusplus
-
-/* C++ interface
- *
- * Purpose: Reducer hyperobject to compute bitwise XOR values
- * When bool is passed as 'Type', it computes logical XOR
- * operation.
- *
- * Classes: reducer_opxxor<Type>
- *
- * Description:
- * ============
- * This component provides a reducer-type hyperobject representation
- * that allows conducting bitwise XOR operation to a non-local variable
- * using the ^=, ^ operators. A common operation
- * when traversing a data structure is to bit-wise XOR values
- * into a non-local numeric variable. When Cilk parallelism is
- * introduced, however, a data race will occur on the variable holding
- * the bit-wise XOR result. By replacing the variable with the
- * hyperobject defined in this component, the data race is eliminated.
- *
- * When bool is passed as the 'Type', this reducer conducts logic XOR
- * operation.
- *
- * Usage Example:
- * ==============
- * Assume we wish to traverse an array of objects, performing a bit-wise XOR
- * operation on each object and accumulating the result of the operation
- * into an integer variable.
- *..
- * unsigned int compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * unsigned int result = 0;
- * for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * result ^= compute(myArray[i]);
- * }
+/** @defgroup ReducersXor Bitwise Xor Reducers
*
- * std::cout << "The result is: " << result << std::endl;
+ * Bitwise and reducers allow the computation of the bitwise and of a set of
+ * values in parallel.
*
- * return 0;
- * }
- *..
- * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel,
- * but doing so will create a data race on the 'result' variable.
- * The race is solved by changing 'result' to a 'reducer_opxor' hyperobject:
- *..
- * unsigned int compute(const X& v);
- *
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * cilk::reducer_opxor<unsigned int> result;
- * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * *result ^= compute(myArray[i]);
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file `reducers.md`, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redopxor_usage Usage Example
+ *
+ * cilk::reducer< cilk::op_xor<unsigned> > r;
+ * cilk_for (int i = 0; i != N; ++i) {
+ * *r ^= a[i];
* }
+ * unsigned result;
+ * r.move_out(result);
*
- * std::cout << "The result is: "
- * << result.get_value() << std::endl;
+ * @section redopxor_monoid The Monoid
*
- * return 0;
- * }
- *
+ * @subsection redopxor_monoid_values Value Set
+ *
+ * The value set of a bitwise xor reducer is the set of values of `Type`, which
+ * is expected to be a builtin integer type which has a representation as a
+ * sequence of bits (or something like it, such as `bool` or `std::bitset`).
+ *
+ * @subsection redopxor_monoid_operator Operator
+ *
+ * The operator of a bitwise xor reducer is the bitwise xor operator, defined
+ * by the “`^`” binary operator on `Type`.
+ *
+ * @subsection redopxor_monoid_identity Identity
*
- * Operations provided:
- * ====================
- * Given 'reducer_opxor' objects, x and y, the following are
- * valid statements:
- *..
- * x ^= 5;
- * x = x ^ 5;
- *..
- * The following are not valid expressions and will result in a run-time error
- * in a debug build:
- *..
- * x = y; // Cannot assign one reducer to another
- * x = y ^ 5; // Mixed reducers
- * x = 5 ^ x; // operator^ is not necessarily commutative
- *..
- *
- * Requirements on the 'Type' parameter
- * ====================================
- * The 'Type' parameter used to instantiate the 'reducer_opxor' class must
- * provide a ^= operator that meets the requirements for an
- * *associative* *mutating* *operator* as defined in the Cilk++ user manual.
- * The default constructor for 'Type' must yield an XOR identity, i.e.,
- * a value (such as unsigned int 0, bool false) that, when performed
- * XOR operation to any other value, yields the other value.
+ * The identity value of the reducer is the value whose representation
+ * contains all 0-bits. This is expected to be the value of the default
+ * constructor `Type()`.
+ *
+ * @section redopxor_operations Operations
+ *
+ * @subsection redopxor_constructors Constructors
+ *
+ * reducer() // identity
+ * reducer(const Type& value)
+ * reducer(move_in(Type& variable))
+ *
+ * @subsection redopxor_get_set Set and Get
+ *
+ * r.set_value(const Type& value)
+ * const Type& = r.get_value() const
+ * r.move_in(Type& variable)
+ * r.move_out(Type& variable)
+ *
+ * @subsection redopxor_initial Initial Values
+ *
+ * If a bitwise xor reducer is constructed without an explicit initial value,
+ * then its initial value will be its identity value, as long as `Type`
+ * satisfies the requirements of @ref redopxor_types.
+ *
+ * @subsection redopxor_view_ops View Operations
+ *
+ * *r ^= a
+ * *r = *r ^ a
+ * *r = *r ^ a1 ^ a2 … ^ an
+ *
+ * @section redopxor_types Type and Operator Requirements
+ *
+ * `Type` must be `Copy Constructible`, `Default Constructible`, and
+ * `Assignable`.
+ *
+ * The operator “`^=`” must be defined on `Type`, with `x ^= a` having the
+ * same meaning as `x = x ^ a`.
+ *
+ * The expression `Type()` must be a valid expression which yields the
+ * identity value (the value of `Type` whose representation consists of all
+ * 0-bits).
+ *
+ * @section redopxor_in_c Bitwise Xor Reducers in C
+ *
+ * The @ref CILK_C_REDUCER_OPXOR and @ref CILK_C_REDUCER_OPXOR_TYPE macros can
+ * be used to do bitwise xor reductions in C. For example:
+ *
+ * CILK_C_REDUCER_OPXOR(r, uint, 0);
+ * CILK_C_REGISTER_REDUCER(r);
+ * cilk_for(int i = 0; i != n; ++i) {
+ * REDUCER_VIEW(r) ^= a[i];
+ * }
+ * CILK_C_UNREGISTER_REDUCER(r);
+ * printf("The bitwise XOR of the elements of a is %x\n", REDUCER_VIEW(r));
+ *
+ * See @ref reducers_c_predefined.
*/
-#include <new>
+#ifdef __cplusplus
namespace cilk {
-/**
- * @brief A reducer-type hyperobject representation that supports bitwise XOR
- * operations to a non-local variable using the ^=, ^ operators.
+/** The bitwise xor reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_xor<Type> >`. It holds the accumulator variable
+ * for the reduction, and allows only `xor` operations to be performed on it.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `^=` operation would be used in an expression like `*r ^= a`, where
+ * `r` is an opmod reducer variable.
+ *
+ * @tparam Type The type of the contained accumulator variable. This will
+ * be the value type of a monoid_with_view that is
+ * instantiated with this view.
*
- * A common operation when traversing a data structure is to bit-wise XOR
- * values into a non-local numeric variable. When Cilk parallelism is
- * introduced, however, a data race will occur on the variable holding
- * the bit-wise XOR result. By replacing the variable with the
- * hyperobject defined in this component, the data race is eliminated.
+ * @see ReducersXor
+ * @see op_xor
*
- * When bool is passed as the 'Type', this reducer conducts logic XOR
- * operation.
+ * @ingroup ReducersXor
*/
template <typename Type>
-class reducer_opxor
+class op_xor_view : public scalar_view<Type>
{
- public:
- /// Definition of data view, operation, and identity for reducer_opxor
- class Monoid : public monoid_base<Type>
- {
+ typedef scalar_view<Type> base;
+
+public:
+ /** Class to represent the right-hand side of `*reducer = *reducer ^ value`.
+ *
+ * The only assignment operator for the op_xor_view class takes an
+ * rhs_proxy as its operand. This results in the syntactic restriction
+ * that the only expressions that can be assigned to an op_xor_view are
+ * ones which generate an rhs_proxy — that is, expressions of the form
+ * `op_xor_view ^ value ... ^ value`.
+ *
+ * @warning
+ * The lhs and rhs views in such an assignment must be the same;
+ * otherwise, the behavior will be undefined. (I.e., `v1 = v1 ^ x` is
+ * legal; `v1 = v2 ^ x` is illegal.) This condition will be checked with
+ * a runtime assertion when compiled in debug mode.
+ *
+ * @see op_xor_view
+ */
+ class rhs_proxy {
+ friend class op_xor_view;
+
+ const op_xor_view* m_view;
+ Type m_value;
+
+ // Constructor is invoked only from op_xor_view::operator^().
+ //
+ rhs_proxy(const op_xor_view* view, const Type& value) : m_view(view), m_value(value) {}
+
+ rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator
+ rhs_proxy(); // Disable default constructor
+
public:
- /// Combines two views of the data
- static void reduce(Type* left, Type* right);
+ /** Bitwise xor with an additional rhs value. If `v` is an op_xor_view
+ * and `a1` is a value, then the expression `v ^ a1` invokes the
+ * view’s `operator^()` to create an rhs_proxy for `(v, a1)`; then
+ * `v ^ a1 ^ a2` invokes the rhs_proxy’s `operator^()` to create a new
+ * rhs_proxy for `(v, a1^a2)`. This allows the right-hand side of an
+ * assignment to be not just `view ^ value`, but
+ ( `view ^ value ^ value ... ^ value`. The effect is that
+ *
+ * v = v ^ a1 ^ a2 ... ^ an;
+ *
+ * is evaluated as
+ *
+ * v = v ^ (a1 ^ a2 ... ^ an);
+ */
+ rhs_proxy& operator^(const Type& x) { m_value ^= x; return *this; }
};
- /// "PRIVATE" HELPER CLASS
- class temp_xor {
- friend class reducer_opxor;
-
- Type* valuePtr_;
- // Default copy constructor, no assignment operator
- temp_xor& operator=(const temp_xor&);
+ /** Default/identity constructor. This constructor initializes the
+ * contained value to `Type()`.
+ */
+ op_xor_view() : base() {}
+
+ /** Construct with a specified initial value.
+ */
+ explicit op_xor_view(const Type& v) : base(v) {}
+
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_xor monoid to combine the views
+ * of two strands when the right strand merges with the left one. It
+ * “xors” the value contained in the left-strand view by the value
+ * contained in the right-strand view, and leaves the value in the
+ * right-strand view undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_xor monoid to implement the monoid
+ * reduce operation.
+ */
+ void reduce(op_xor_view* right) { this->m_value ^= right->m_value; }
+
+ /** @name Accumulator variable updates.
+ *
+ * These functions support the various syntaxes for “xoring” the
+ * accumulator variable contained in the view with some value.
+ */
+ //@{
+
+ /** Xor the accumulator variable with @a x.
+ */
+ op_xor_view& operator^=(const Type& x) { this->m_value ^= x; return *this; }
+
+ /** Create an object representing `*this ^ x`.
+ *
+ * @see rhs_proxy
+ */
+ rhs_proxy operator^(const Type& x) const { return rhs_proxy(this, x); }
+
+ /** Assign the result of a `view ^ value` expression to the view. Note that
+ * this is the only assignment operator for this class.
+ *
+ * @see rhs_proxy
+ */
+ op_xor_view& operator=(const rhs_proxy& rhs) {
+ __CILKRTS_ASSERT(this == rhs.m_view);
+ this->m_value ^= rhs.m_value;
+ return *this;
+ }
+
+ //@}
+};
- explicit temp_xor(Type* valuePtr);
+/** Monoid class for bitwise xor reductions. Instantiate the cilk::reducer
+ * template class with an op_xor monoid to create a bitwise xor reducer
+ * class. For example, to compute the bitwise xor of a set of `unsigned long`
+ * values:
+ *
+ * cilk::reducer< cilk::op_xor<unsigned long> > r;
+ *
+ * @tparam Type The reducer value type.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
+ *
+ * @see ReducersXor
+ * @see op_xor_view
+ *
+ * @ingroup ReducersXor
+ */
+template <typename Type, bool Align = false>
+struct op_xor : public monoid_with_view<op_xor_view<Type>, Align> {};
- public:
- temp_xor& operator^(const Type& x);
- };
+/** Deprecated bitwise xor reducer class.
+ *
+ * reducer_opxor is the same as @ref reducer<@ref op_xor>, except that
+ * reducer_opxor is a proxy for the contained view, so that accumulator
+ * variable update operations can be applied directly to the reducer. For
+ * example, a value is xored with a `reducer<%op_xor>` with `*r ^= a`, but a
+ * value can be xored with a `%reducer_opxor` with `r ^= a`.
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_opand.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_opxor`
+ * and `reducer<%op_xor>`. This allows incremental code
+ * conversion: old code that used `%reducer_opxor` can pass a
+ * `%reducer_opxor` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_xor>`, and vice
+ * versa.
+ *
+ * @tparam Type The value type of the reducer.
+ *
+ * @see op_xor
+ * @see reducer
+ * @see ReducersXor
+ *
+ * @ingroup ReducersXor
+ */
+template <typename Type>
+class reducer_opxor : public reducer< op_xor<Type, true> >
+{
+ typedef reducer< op_xor<Type, true> > base;
+ using base::view;
public:
-
- /// Construct an 'reducer_opxor' object with a value of 'Type()'.
- reducer_opxor();
-
- /// Construct an 'reducer_opxor' object with the specified initial value.
- explicit reducer_opxor(const Type& initial_value);
-
- /// Return a const reference to the current value of this object.
- ///
- /// @warning If this method is called before the parallel calculation is
- /// complete, the value returned by this method will be a partial result.
- const Type& get_value() const;
-
- /// Set the value of this object.
- ///
- /// @warning: Setting the value of a reducer such that it violates the
- /// associative operation algebra will yield results that are likely to
- /// differ from serial execution and may differ from run to run.
- void set_value(const Type& value);
-
- /// XOR 'x' to the value of this reducer and produce a temporary and object.
- /// The temporary and can be used for additional bit-wise operations
- /// or assigned back to this reducer.
- temp_xor operator^(const Type& x) const;
-
- /// XOR 'x' to the value of this object.
- reducer_opxor& operator^=(const Type& x);
-
- /// Merge the result of XOR operation into this object. The XOR operation
- /// must involve this reducer, i.e., x = x + 5; not x = y + 5;
- reducer_opxor& operator=(const temp_xor& temp);
-
+ /// The view type for the reducer.
+ typedef typename base::view_type view_type;
+
+ /// The view’s rhs proxy type.
+ typedef typename view_type::rhs_proxy rhs_proxy;
+
+ /// The view type for the reducer.
+ typedef view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+ /** @name Constructors
+ */
+ //@{
+
+ /** Default (identity) constructor.
+ *
+ * Constructs the wrapper with the default initial value of `Type()`.
+ */
+ reducer_opxor() {}
+
+ /** Value constructor.
+ *
+ * Constructs the wrapper with a specified initial value.
+ */
+ explicit reducer_opxor(const Type& initial_value) : base(initial_value) {}
+
+ //@}
+
+ /** @name Forwarded functions
+ * @details Functions that update the contained accumulator variable are
+ * simply forwarded to the contained @ref op_and_view. */
+ //@{
+
+ /// @copydoc op_xor_view::operator^=(const Type&)
+ reducer_opxor& operator^=(const Type& x)
+ {
+ view() ^= x; return *this;
+ }
+
+ // The legacy definition of reducer_opxor::operator^() has different
+ // behavior and a different return type than this definition. The legacy
+ // version is defined as a member function, so this new version is defined
+ // as a free function to give it a different signature, so that they won’t
+ // end up sharing a single object file entry.
+
+ /// @copydoc op_xor_view::operator^(const Type&) const
+ friend rhs_proxy operator^(const reducer_opxor& r, const Type& x)
+ {
+ return r.view() ^ x;
+ }
+
+ /// @copydoc op_and_view::operator=(const rhs_proxy&)
+ reducer_opxor& operator=(const rhs_proxy& temp)
+ {
+ view() = temp; return *this;
+ }
+ //@}
+
+ /** @name Dereference
+ * @details Dereferencing a wrapper is a no-op. It simply returns the
+ * wrapper. Combined with the rule that the wrapper forwards view
+ * operations to its contained view, this means that view operations can
+ * be written the same way on reducers and wrappers, which is convenient
+ * for incrementally converting old code using wrappers to use reducers
+ * instead. That is:
+ *
+ * reducer< op_and<int> > r;
+ * *r &= a; // *r returns the view
+ * // operator &= is a view member function
+ *
+ * reducer_opand<int> w;
+ * *w &= a; // *w returns the wrapper
+ * // operator &= is a wrapper member function that
+ * // calls the corresponding view function
+ */
+ //@{
reducer_opxor& operator*() { return *this; }
reducer_opxor const& operator*() const { return *this; }
reducer_opxor* operator->() { return this; }
reducer_opxor const* operator->() const { return this; }
-
- private:
- friend class temp_or;
-
- // Hyperobject to serve up views
- reducer<Monoid> imp_;
-
- // Not copyable
- reducer_opxor(const reducer_opxor&);
- reducer_opxor& operator=(const reducer_opxor&);
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_xor<Type, false> >& ()
+ {
+ return *reinterpret_cast< reducer< op_xor<Type, false> >* >(this);
+ }
+ operator const reducer< op_xor<Type, false> >& () const
+ {
+ return *reinterpret_cast< const reducer< op_xor<Type, false> >* >(this);
+ }
+ //@}
+
};
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
-
-// ------------------------------------
-// template class reducer_opxor::Monoid
-// ------------------------------------
-
-template <typename Type>
-void
-reducer_opxor<Type>::Monoid::reduce(Type* left, Type* right)
-{
- *left ^= *right;
-}
-
-// ----------------------------
-// template class reducer_opxor
-// ----------------------------
-
-template <typename Type>
-inline
-reducer_opxor<Type>::reducer_opxor()
- : imp_(Type())
-{
-}
-
-template <typename Type>
-inline
-reducer_opxor<Type>::reducer_opxor(const Type& initial_value)
- : imp_(initial_value)
-{
-}
-
-template <typename Type>
-inline
-const Type& reducer_opxor<Type>::get_value() const
-{
- return imp_.view();
-}
-
-template <typename Type>
-inline
-void reducer_opxor<Type>::set_value(const Type& value)
-{
- imp_.view() = value;
-}
-
-template <typename Type>
-inline
-typename reducer_opxor<Type>::temp_xor
-reducer_opxor<Type>::operator^(const Type& x) const
-{
- Type* valuePtr = const_cast<Type*>(&imp_.view());
- *valuePtr = *valuePtr ^ x;
- return temp_xor(valuePtr);
-}
-
-template <typename Type>
-inline
-reducer_opxor<Type>& reducer_opxor<Type>::operator^=(const Type& x)
-{
- imp_.view() ^= x;
- return *this;
-}
-
-template <typename Type>
-inline
-reducer_opxor<Type>&
-reducer_opxor<Type>::operator=(
- const typename reducer_opxor<Type>::temp_xor& temp)
-{
- // No-op. Just test that temp was constructed from this.
- __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_);
- return *this;
-}
-
-// --------------------------------------
-// template class reducer_opxor::temp_xor
-// --------------------------------------
-
-template <typename Type>
-inline
-reducer_opxor<Type>::temp_xor::temp_xor(Type *valuePtr)
- : valuePtr_(valuePtr)
-{
-}
-
-template <typename Type>
-inline
-typename reducer_opxor<Type>::temp_xor&
-reducer_opxor<Type>::temp_xor::operator^(const Type& x)
+/// @cond internal
+/** Metafunction specialization for reducer conversion.
+ *
+ * This specialization of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes the `reducer< op_xor<Type> >` class to have an
+ * `operator reducer_opxor<Type>& ()` conversion operator that statically
+ * downcasts the `reducer<op_xor>` to the corresponding `reducer_opxor` type.
+ * (The reverse conversion, from `reducer_opxor` to `reducer<op_xor>`, is just
+ * an upcast, which is provided for free by the language.)
+ *
+ * @ingroup ReducersXor
+ */
+template <typename Type, bool Align>
+struct legacy_reducer_downcast<reducer<op_xor<Type, Align> > >
{
- *valuePtr_ = *valuePtr_ ^ x;
- return *this;
-}
+ typedef reducer_opxor<Type> type;
+};
+/// @endcond
} // namespace cilk
#endif /* __cplusplus */
-/* C Interface
+
+/** @ingroup ReducersXor
*/
+//@{
+/** @name C language reducer macros
+ *
+ * These macros are used to declare and work with op_xor reducers in C code.
+ *
+ * @see @ref page_reducers_in_c
+ */
+ //@{
+
__CILKRTS_BEGIN_EXTERN_C
+/** Opxor reducer type name.
+ *
+ * This macro expands into the identifier which is the name of the op_xor
+ * reducer type for a specified numeric type.
+ *
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersXor
+ */
#define CILK_C_REDUCER_OPXOR_TYPE(tn) \
__CILKRTS_MKIDENT(cilk_c_reducer_opxor_,tn)
+
+/** Declare an op_xor reducer object.
+ *
+ * This macro expands into a declaration of an op_xor reducer object for a
+ * specified numeric type. For example:
+ *
+ * CILK_C_REDUCER_OPXOR(my_reducer, ulong, 0);
+ *
+ * @param obj The variable name to be used for the declared reducer object.
+ * @param tn The @ref reducers_c_type_names "numeric type name" specifying
+ * the type of the reducer.
+ * @param v The initial value for the reducer. (A value which can be
+ * assigned to the numeric type represented by @a tn.)
+ *
+ * @see @ref reducers_c_predefined
+ * @see ReducersXor
+ */
#define CILK_C_REDUCER_OPXOR(obj,tn,v) \
CILK_C_REDUCER_OPXOR_TYPE(tn) obj = \
CILK_C_INIT_REDUCER(_Typeof(obj.value), \
@@ -344,57 +513,81 @@ __CILKRTS_BEGIN_EXTERN_C
__CILKRTS_MKIDENT(cilk_c_reducer_opxor_identity_,tn), \
__cilkrts_hyperobject_noop_destroy, v)
-/* Declare an instance of the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \
- typedef CILK_C_DECLARE_REDUCER(t) \
- __CILKRTS_MKIDENT(cilk_c_reducer_opxor_,tn); \
+/// @cond internal
+
+/** Declare the op_xor reducer functions for a numeric type.
+ *
+ * This macro expands into external function declarations for functions which
+ * implement the reducer functionality for the op_xor reducer type for a
+ * specified numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPXOR_DECLARATION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r); \
- __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn);
-
-/* Declare an instance of the reducer type for each numeric type */
-CILK_C_REDUCER_OPXOR_INSTANCE(char,char);
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned char,uchar);
-CILK_C_REDUCER_OPXOR_INSTANCE(signed char,schar);
-CILK_C_REDUCER_OPXOR_INSTANCE(wchar_t,wchar_t);
-CILK_C_REDUCER_OPXOR_INSTANCE(short,short);
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned short,ushort);
-CILK_C_REDUCER_OPXOR_INSTANCE(int,int);
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int,uint);
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int,unsigned); /* alternate name */
-CILK_C_REDUCER_OPXOR_INSTANCE(long,long);
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long,ulong);
-CILK_C_REDUCER_OPXOR_INSTANCE(long long,longlong);
-CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long long,ulonglong);
-CILK_C_REDUCER_OPXOR_INSTANCE(float,float);
-CILK_C_REDUCER_OPXOR_INSTANCE(double,double);
-CILK_C_REDUCER_OPXOR_INSTANCE(long double,longdouble);
-
-/* Declare function bodies for the reducer for a specific numeric type */
-#define CILK_C_REDUCER_OPXOR_IMP(t,tn) \
+ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn);
+
+/** Define the op_xor reducer functions for a numeric type.
+ *
+ * This macro expands into function definitions for functions which implement
+ * the reducer functionality for the op_xor reducer type for a specified
+ * numeric type.
+ *
+ * @param t The value type of the reducer.
+ * @param tn The value “type name” identifier, used to construct the reducer
+ * type name, function names, etc.
+ */
+#define CILK_C_REDUCER_OPXOR_DEFINITION(t,tn) \
+ typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \
__CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r) \
{ *(t*)l ^= *(t*)r; } \
__CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn) \
- { *(t*)v = (t)0; }
-
-/* c_reducers.c contains definitions for all of the monoid functions
- for the C numeric tyeps. The contents of reducer_opxor.c are as follows:
-
-CILK_C_REDUCER_OPXOR_IMP(char,char)
-CILK_C_REDUCER_OPXOR_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPXOR_IMP(signed char,schar)
-CILK_C_REDUCER_OPXOR_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPXOR_IMP(short,short)
-CILK_C_REDUCER_OPXOR_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPXOR_IMP(int,int)
-CILK_C_REDUCER_OPXOR_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPXOR_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPXOR_IMP(long,long)
-CILK_C_REDUCER_OPXOR_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPXOR_IMP(long long,longlong)
-CILK_C_REDUCER_OPXOR_IMP(unsigned long long,ulonglong)
-
-*/
+ { *(t*)v = 0; }
+
+//@{
+/** @def CILK_C_REDUCER_OPXOR_INSTANCE
+ * @brief Declare or define implementation functions for a reducer type.
+ *
+ * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS`
+ * will be defined, and this macro will generate reducer implementation
+ * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and
+ * this macro will expand into external declarations for the functions.
+ */
+#ifdef CILK_C_DEFINE_REDUCERS
+# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPXOR_DEFINITION(t,tn)
+#else
+# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \
+ CILK_C_REDUCER_OPXOR_DECLARATION(t,tn)
+#endif
+//@}
+
+/* Declare or define an instance of the reducer type and its functions for each
+ * numeric type.
+ */
+CILK_C_REDUCER_OPXOR_INSTANCE(char, char)
+CILK_C_REDUCER_OPXOR_INSTANCE(unsigned char, uchar)
+CILK_C_REDUCER_OPXOR_INSTANCE(signed char, schar)
+CILK_C_REDUCER_OPXOR_INSTANCE(wchar_t, wchar_t)
+CILK_C_REDUCER_OPXOR_INSTANCE(short, short)
+CILK_C_REDUCER_OPXOR_INSTANCE(unsigned short, ushort)
+CILK_C_REDUCER_OPXOR_INSTANCE(int, int)
+CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, uint)
+CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, unsigned) /* alternate name */
+CILK_C_REDUCER_OPXOR_INSTANCE(long, long)
+CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long, ulong)
+CILK_C_REDUCER_OPXOR_INSTANCE(long long, longlong)
+CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long long, ulonglong)
+
+//@endcond
__CILKRTS_END_EXTERN_C
-#endif // REDUCER_OPXOR_H_INCLUDED
+//@}
+
+//@}
+
+#endif /* REDUCER_OPXOR_H_INCLUDED */
diff --git a/libcilkrts/include/cilk/reducer_ostream.h b/libcilkrts/include/cilk/reducer_ostream.h
index 9d57824bce4..d64c740c081 100644
--- a/libcilkrts/include/cilk/reducer_ostream.h
+++ b/libcilkrts/include/cilk/reducer_ostream.h
@@ -1,26 +1,31 @@
/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
*/
diff --git a/libcilkrts/include/cilk/reducer_string.h b/libcilkrts/include/cilk/reducer_string.h
index 9d323271ae2..676b16d0e6e 100644
--- a/libcilkrts/include/cilk/reducer_string.h
+++ b/libcilkrts/include/cilk/reducer_string.h
@@ -1,108 +1,41 @@
-/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
- *
+/* reducer_string.h -*- C++ -*-
+ *
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*/
-/*
- * reducer_string.h
- *
- * Purpose: Reducer hyperobject to accumulate a string.
- *
- * Classes: reducer_basic_string<Elem, Traits, Alloc>
- * reducer_string - convenience name for a string-of-char reducer
- * reducer_wstring - convenience name for a string-of-wchar_t reducer
- *
- * Description:
- * ============
- * This component provides a reducer-type hyperobject representation that
- * allows appending characters to an STL string. By replacing the variable
- * with the hyperobject defined in this component, the data race is eliminated.
- *
- * reducer_basic_string is actually implemented using a list to avoid memory
- * fragmentation issues as text is appended to the string. The string
- * components are assembled into a single string before being returned by
- * get_value().
- *
- * Usage Example:
- * ==============
- * Assume we wish to traverse an array of objects, performing an operation on
- * each object and accumulating the result of the operation into an STL string
- * variable.
- *..
- * char *compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * std::string result;
- * for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * result += compute(myArray[i]);
- * }
- *
- * std::cout << "The result is: " << result.c_str() << std::endl;
-
- * return 0;
- * }
- *..
- * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel,
- * but doing so will create a data race on the 'result' variable.
- * The race is solved by changing 'result' to a 'reducer_string' hyperobject:
- *..
- * char *compute(const X& v);
- *
- * int test()
- * {
- * const std::size_t ARRAY_SIZE = 1000000;
- * extern X myArray[ARRAY_SIZE];
- * // ...
- *
- * cilk::reducer_string result;
- * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i)
- * {
- * *result += compute(myArray[i]);
- * }
- *
- * std::cout << "The result is: " << result.get_value() << std::endl;
+/** @file reducer_string.h
*
- * return 0;
- * }
- *..
+ * @brief Defines classes for doing parallel string creation by appending.
*
- * Operations provided:
- * ====================
+ * @ingroup ReducersString
*
- * 'reducer_string' supports operator+= and append.
- *
- * The the current value of the reducer can be retrieved using the 'get_value'
- * method. As with most reducers, the 'get_value' method produces deterministic
- * results only if called before the first spawn after creating a 'hyperobject'
- * or when all strands spawned since creating the 'hyperobject' have been
- * synced.
+ * @see ReducersString
*/
#ifndef REDUCER_STRING_H_INCLUDED
@@ -112,565 +45,680 @@
#include <string>
#include <list>
-namespace cilk
-{
-
-/**
- * @brief Reducer hyperobject representation of a string.
+/** @defgroup ReducersString String Reducers
*
- * Typedefs for 8-bit character strings (reducer_string) and 16-bit character
- * strings (reducer_wstring) are provided at the end of the file.
- */
-template<class _Elem,
- class _Traits = std::char_traits<_Elem>,
- class _Alloc = std::allocator<_Elem> >
-class reducer_basic_string
-{
-public:
- /// Type of the basic_string reducer_basic_string is based on
- typedef std::basic_string<_Elem, _Traits, _Alloc> string_type;
+ * String reducers allow the creation of a string by concatenating a set of
+ * strings or characters in parallel.
+ *
+ * @ingroup Reducers
+ *
+ * You should be familiar with @ref pagereducers "Cilk reducers", described in
+ * file reducers.md, and particularly with @ref reducers_using, before trying
+ * to use the information in this file.
+ *
+ * @section redstring_usage Usage Example
+ *
+ * vector<Data> data;
+ * void expensive_string_computation(const Data& x, string& s);
+ * cilk::reducer<cilk::op_string> r;
+ * cilk_for (int i = 0; i != data.size(); ++i) {
+ * string temp;
+ * expensive_string_computation(data[i], temp);
+ * *r += temp;
+ * }
+ * string result;
+ * r.move_out(result);
+ *
+ * @section redstring_monoid The Monoid
+ *
+ * @subsection redstring_monoid_values Value Set
+ *
+ * The value set of a string reducer is the set of values of the class
+ * `std::basic_string<Char, Traits, Alloc>`, which we refer to as “the
+ * reducer’s string type”.
+ *
+ * @subsection redstring_monoid_operator Operator
+ *
+ * The operator of a string reducer is the string concatenation operator,
+ * defined by the “`+`” binary operator on the reducer’s string type.
+ *
+ * @subsection redstring_monoid_identity Identity
+ *
+ * The identity value of a string reducer is the empty string, which is the
+ * value of the expression
+ * `std::basic_string<Char, Traits, Alloc>([allocator])`.
+ *
+ * @section redstring_operations Operations
+ *
+ * In the operation descriptions below, the type name `String` refers to the
+ * reducer’s string type, `std::basic_string<Char, Traits, Alloc>`.
+ *
+ * @subsection redstring_constructors Constructors
+ *
+ * Any argument list which is valid for a `std::basic_string` constructor is
+ * valid for a string reducer constructor. The usual move-in constructor is
+ * also provided:
+ *
+ * reducer(move_in(String& variable))
+ *
+ * @subsection redstring_get_set Set and Get
+ *
+ * r.set_value(const String& value)
+ * const String& = r.get_value() const
+ * r.move_in(String& variable)
+ * r.move_out(String& variable)
+ *
+ * @subsection redstring_initial Initial Values
+ *
+ * A string reducer with no constructor arguments, or with only an allocator
+ * argument, will initially contain the identity value, an empty string.
+ *
+ * @subsection redstring_view_ops View Operations
+ *
+ * *r += a
+ * r->append(a)
+ * r->append(a, b)
+ * r->push_back(a)
+ *
+ * These operations on string reducer views are the same as the corresponding
+ * operations on strings.
+ *
+ * @section redstring_performance Performance Considerations
+ *
+ * String reducers work by creating a string for each view, collecting those
+ * strings in a list, and then concatenating them into a single result string
+ * at the end of the computation. This last step takes place in serial code,
+ * and necessarily takes time proportional to the length of the result string.
+ * Thus, a parallel string reducer cannot actually speed up the time spent
+ * directly creating the string. This trivial example would probably be slower
+ * (because of reducer overhead) than the corresponding serial code:
+ *
+ * vector<string> a;
+ * reducer<op_string> r;
+ * cilk_for (int i = 0; i != a.length(); ++i) {
+ * *r += a[i];
+ * }
+ * string result;
+ * r.move_out(result);
+ *
+ * What a string reducer _can_ do is to allow the _remainder_ of the
+ * computation to be done in parallel, without having to worry about managing
+ * the string computation.
+ *
+ * The strings for new views are created (by the view identity constructor)
+ * using the same allocator as the string that was created when the reducer
+ * was constructed. Note that this allocator is determined when the reducer is
+ * constructed. The following two examples may have very different behavior:
+ *
+ * string<Char, Traits, Allocator> a_string;
+ *
+ * reducer< op_string<Char, Traits, Allocator> reducer1(move_in(a_string));
+ * ... parallel computation ...
+ * reducer1.move_out(a_string);
+ *
+ * reducer< op_string<Char, Traits, Allocator> reducer2;
+ * reducer2.move_in(a_string);
+ * ... parallel computation ...
+ * reducer2.move_out(a_string);
+ *
+ * * `reducer1` will be constructed with the same allocator as `a_string`,
+ * because the string was specified in the constructor. The `move_in`
+ * and `move_out` can therefore be done with a `swap` in constant time.
+ * * `reducer2` will be constructed with a _default_ allocator of type
+ * `Allocator`, which may not be the same as the allocator of `a_string`.
+ * Therefore, the `move_in` and `move_out` may have to be done with a copy
+ * in _O(N)_ time.
+ *
+ * (All instances of an allocator type with no internal state (like
+ * `std::allocator`) are “the same”. You only need to worry about the “same
+ * allocator” issue when you create string reducers with custom allocator
+ * types.)
+ *
+ * @section redstring_types Type and Operator Requirements
+ *
+ * `std::basic_string<Char, Traits, Alloc>` must be a valid type.
+*/
- /// Type of sizes
- typedef typename string_type::size_type size_type;
+namespace cilk {
- /// Character type for reducer_basic_string
- typedef _Elem basic_value_type;
+/** @ingroup ReducersString */
+//@{
- /// Internal representation of the per-strand view of the data for reducer_basic_string
- struct View
+/** The string append reducer view class.
+ *
+ * This is the view class for reducers created with
+ * `cilk::reducer< cilk::op_basic_string<Type, Traits, Allocator> >`. It holds
+ * the accumulator variable for the reduction, and allows only append
+ * operations to be performed on it.
+ *
+ * @note The reducer “dereference” operation (`reducer::operator *()`)
+ * yields a reference to the view. Thus, for example, the view class’s
+ * `append` operation would be used in an expression like
+ * `r->append(a)`, where `r` is a string append reducer variable.
+ *
+ * @tparam Char The string element type (not the string type).
+ * @tparam Traits The character traits type.
+ * @tparam Alloc The string allocator type.
+ *
+ * @see ReducersString
+ * @see op_basic_string
+ */
+template<typename Char, typename Traits, typename Alloc>
+class op_basic_string_view
+{
+ typedef std::basic_string<Char, Traits, Alloc> string_type;
+ typedef std::list<string_type> list_type;
+ typedef typename string_type::size_type size_type;
+
+ // The view's value is represented by a list of strings and a single
+ // string. The value is the concatenation of the strings in the list with
+ // the single string at the end. All string operations apply to the single
+ // string; reduce operations cause lists of partial strings from multiple
+ // strands to be combined.
+ //
+ mutable string_type m_string;
+ mutable list_type m_list;
+
+ // Before returning the value of the reducer, concatenate all the strings
+ // in the list with the single string.
+ //
+ void flatten() const
{
- friend class reducer_basic_string<_Elem, _Traits, _Alloc>;
-
- /// Type of the basic_string the View is based on
- typedef std::basic_string<_Elem, _Traits, _Alloc> string_type;
+ if (m_list.empty()) return;
- /// Type of sizes
- typedef typename string_type::size_type size_type;
+ typename list_type::iterator i;
- std::basic_string<_Elem, _Traits, _Alloc> &get_value();
+ size_type len = m_string.size();
+ for (i = m_list.begin(); i != m_list.end(); ++i)
+ len += i->size();
- /// Add a character to the View
- void add_char(_Elem ch) { m_value += ch; }
+ string_type result(get_allocator());
+ result.reserve(len);
- private:
- string_type m_value; // Holds current string
- std::list<string_type> m_list; // List used to accumulate string fragments
- };
-
-public:
- /// Definition of data view, operation, and identity for reducer_basic_string
- struct Monoid: monoid_base< View >
- {
- static void reduce (View *left, View *right);
- };
+ for (i = m_list.begin(); i != m_list.end(); ++i)
+ result += *i;
+ m_list.clear();
-private:
- // Hyperobject to serve up views
- reducer<Monoid> imp_;
+ result += m_string;
+ result.swap(m_string);
+ }
public:
- // Default constructor - Construct an empty reducer_basic_string
- reducer_basic_string();
+ /** @name Monoid support.
+ */
+ //@{
- // Construct a reducer_basic_string with an initial value
- reducer_basic_string(const _Elem *ptr);
- reducer_basic_string(const _Elem *ptr, const _Alloc &al);
- reducer_basic_string(const _Elem *ptr, size_type count);
- reducer_basic_string(const _Elem *ptr, size_type count, const _Alloc &al);
- reducer_basic_string(const string_type &right, size_type offset, size_type count);
- reducer_basic_string(const string_type &right, size_type offset, size_type count, const _Alloc &al);
- reducer_basic_string(size_type count, _Elem ch);
- reducer_basic_string(size_type count, _Elem ch, const _Alloc &al);
+ /// Required by @ref monoid_with_view
+ typedef string_type value_type;
- // Return an immutable reference to the current string
- const string_type &get_value() const;
+ /// Required by @ref op_string
+ Alloc get_allocator() const
+ {
+ return m_string.get_allocator();
+ }
- // Return a reference to the current string
- string_type& get_reference();
- string_type const& get_reference() const;
+ /** Reduction operation.
+ *
+ * This function is invoked by the @ref op_basic_string monoid to combine
+ * the views of two strands when the right strand merges with the left
+ * one. It appends the value contained in the right-strand view to the
+ * value contained in the left-strand view, and leaves the value in the
+ * right-strand view undefined.
+ *
+ * @param right A pointer to the right-strand view. (`this` points to
+ * the left-strand view.)
+ *
+ * @note Used only by the @ref op_basic_string monoid to implement the
+ * monoid reduce operation.
+ */
+ void reduce(op_basic_string_view* right)
+ {
+ if (!right->m_string.empty() || !right->m_list.empty()) {
+ // (list, string) + (right_list, right_string) =>
+ // (list + {string} + right_list, right_string)
+ if (!m_string.empty()) {
+ // simulate m_list.push_back(std::move(m_string))
+ m_list.push_back(string_type(get_allocator()));
+ m_list.back().swap(m_string);
+ }
+ m_list.splice(m_list.end(), right->m_list);
+ m_string.swap(right->m_string);
+ }
+ }
- // Set the string to a specified value
- void set_value(const string_type &value);
+ //@}
- // Append to the string
- void append(const _Elem *ptr);
- void append(const _Elem *ptr, size_type count);
- void append(const string_type &str, size_type offset, size_type count);
- void append(const string_type &str);
- void append(size_type count, _Elem ch);
+ /** @name Pass constructor arguments through to the string constructor.
+ */
+ //@{
- // Append to the string
- reducer_basic_string<_Elem, _Traits, _Alloc> &operator+=(_Elem ch);
- reducer_basic_string<_Elem, _Traits, _Alloc> &operator+=(const _Elem *ptr);
- reducer_basic_string<_Elem, _Traits, _Alloc> &operator+=(const string_type &right);
+ op_basic_string_view() : m_string() {}
- reducer_basic_string& operator*() { return *this; }
- reducer_basic_string const& operator*() const { return *this; }
+ template <typename T1>
+ op_basic_string_view(const T1& x1) : m_string(x1) {}
- reducer_basic_string* operator->() { return this; }
- reducer_basic_string const* operator->() const { return this; }
+ template <typename T1, typename T2>
+ op_basic_string_view(const T1& x1, const T2& x2) : m_string(x1, x2) {}
-}; // class reducer_basic_string
+ template <typename T1, typename T2, typename T3>
+ op_basic_string_view(const T1& x1, const T2& x2, const T3& x3) : m_string(x1, x2, x3) {}
-/////////////////////////////////////////////////////////////////////////////
-// Implementation of inline and template functions
-/////////////////////////////////////////////////////////////////////////////
+ template <typename T1, typename T2, typename T3, typename T4>
+ op_basic_string_view(const T1& x1, const T2& x2, const T3& x3, const T4& x4) :
+ m_string(x1, x2, x3, x4) {}
-// -----------------------------------------
-// template class reducer_basic_string::View
-// -----------------------------------------
+ //@}
-/**
- * Assemble the string from the collected fragments
- *
- * @returns std::basic_string reference to the assembled string
- */
-template<class _Elem, class _Traits, class _Alloc>
-std::basic_string<_Elem, _Traits, _Alloc> &
-reducer_basic_string<_Elem, _Traits, _Alloc>::View::get_value()
-{
- // If the list is empty, just return our string
- if (m_list.empty())
- return m_value;
-
- // First calculate the total length of all of the string fragments
- size_type len = m_value.size();
- typename std::list<string_type>::iterator i;
- for (i = m_list.begin(); i != m_list.end(); ++i)
- len += i->size();
-
- // Hold onto the string, since it needs to go at the end
- string_type tmp;
- tmp.swap(m_value);
-
- // Expand the string that to hold all of the string fragments.
- // Allocating it up-front prevents heap fragmentation.
- m_value.reserve(len);
-
- // Concatenate all of the fragments into the string, then clear out the
- // list
- for (i = m_list.begin(); i != m_list.end(); ++i)
- m_value += *i;
- m_list.clear();
-
- // Finally, add the string value we saved
- m_value += tmp;
- return m_value;
-}
-
-// -------------------------------------------
-// template class reducer_basic_string::Monoid
-// -------------------------------------------
-
-/**
- * Appends string from "right" reducer_basic_string onto the end of
- * the "left". When done, the "right" reducer_basic_string is empty.
- */
-template<class _Elem, class _Traits, class _Alloc>
-void
-reducer_basic_string<_Elem, _Traits, _Alloc>::Monoid::reduce(View *left,
- View *right)
-{
- // Check if there's anything to do
- if (right->m_list.empty() && right->m_value.empty())
- return;
-
- // If the only thing is the right string, just take it
- if (left->m_list.empty() && right->m_list.empty() & left->m_value.empty())
+ /** Move-in constructor.
+ */
+ explicit op_basic_string_view(move_in_wrapper<value_type> w)
+ : m_string(w.value().get_allocator())
{
- left->m_value.swap(right->m_value);
- return;
+ m_string.swap(w.value());
}
- // Debugging aid - should be removed before ship!
-#ifdef DEBUG_STRING_REDUCER
- std::cout << "Complex merge" << std::endl;
- dump ("Left");
- right->dump("Right");
-#endif
+ /** @name @ref reducer support.
+ */
+ //@{
- // OK, merge everything together. If there's anything in our string, it's
- // got to be added to the list first
- if (! left->m_value.empty())
+ void view_move_in(string_type& s)
{
- left->m_list.push_back(left->m_value);
- left->m_value.clear();
+ m_list.clear();
+ if (m_string.get_allocator() == s.get_allocator())
+ // Equal allocators. Do a (fast) swap.
+ m_string.swap(s);
+ else
+ // Unequal allocators. Do a (slow) copy.
+ m_string = s;
+ s.clear();
}
- // Now splice the two lists together, then take the right string
- left->m_list.splice(left->m_list.end(), right->m_list);
- left->m_value.swap(right->m_value);
-
- // Debugging aid - should be removed before ship!
-#ifdef DEBUG_STRING_REDUCER
- dump ("Result");
-#endif
-}
+ void view_move_out(string_type& s)
+ {
+ flatten();
+ if (m_string.get_allocator() == s.get_allocator())
+ // Equal allocators. Do a (fast) swap.
+ m_string.swap(s);
+ else
+ // Unequal allocators. Do a (slow) copy.
+ s = m_string;
+ m_string.clear();
+ }
-// -----------------------------------
-// template class reducer_basic_string
-// -----------------------------------
+ void view_set_value(const string_type& s)
+ { m_list.clear(); m_string = s; }
-/**
- * Default constructor - doesn't do much
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string():
- imp_()
-{
-}
+ string_type const& view_get_value() const
+ { flatten(); return m_string; }
-/**
- * Construct a reducer_basic_string initializing it from a null-terminated
- * string using the default allocator.
- *
- * @param ptr Null-terminated string to initialize from
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr) :
- imp_()
-{
- string_type str(ptr);
+ string_type & view_get_reference()
+ { flatten(); return m_string; }
- View &v = imp_.view();
- v.m_value = str;
-}
+ string_type const& view_get_reference() const
+ { flatten(); return m_string; }
-/**
- * Construct a reducer_basic_string initializing it from a null-terminated
- * string specifying an allocator.
- *
- * @param ptr Null-terminated string to initialize from
- * @param al Allocator to be used
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr,
- const _Alloc &al) :
- imp_()
-{
- string_type str(ptr, al);
+ //@}
- View &v = imp_.view();
- v.m_value = str;
-}
+ /** @name View modifier operations.
+ *
+ * @details These simply wrap the corresponding operations on the underlying string.
+ */
+ //@{
-/**
- * Construct a reducer_basic_string initializing it from a null-terminated
- * string, copying N characters, using the default allocator.
- *
- * @param ptr Null-terminated string to initialize from
- * @param count Number of characters to copy
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr,
- size_type count) :
- imp_()
-{
- string_type str(ptr, count);
+ template <typename T>
+ op_basic_string_view& operator +=(const T& x)
+ { m_string += x; return *this; }
- View &v = imp_.view();
- v.m_value = str;
-}
+ template <typename T1>
+ op_basic_string_view& append(const T1& x1)
+ { m_string.append(x1); return *this; }
-/**
- * Construct a reducer_basic_string initializing it from a null-terminated
- * string, copying N characters, specifying an allocator.
- *
- * @param ptr Null-terminated string to initialize from
- * @param count Number of characters to copy
- * @param al Allocator to be used
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr,
- size_type count,
- const _Alloc &al) :
- imp_()
-{
- string_type str(ptr, count, al);
-
- View &v = imp_.view();
- v.m_value = str;
-}
+ template <typename T1, typename T2>
+ op_basic_string_view& append(const T1& x1, const T2& x2)
+ { m_string.append(x1, x2); return *this; }
-/**
- * Construct a reducer_basic_string initializing it from a string_type
- * string starting from an offset, copying N characters, using the default
- * allocator.
- *
- * @param right string_type string to initialize from
- * @param offset Character withing right to start copying from
- * @param count Number of characters to copy
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const string_type &right,
- size_type offset,
- size_type count) :
- imp_()
-{
- string_type str(right, offset, count);
+ template <typename T1, typename T2, typename T3>
+ op_basic_string_view& append(const T1& x1, const T2& x2, const T3& x3)
+ { m_string.append(x1, x2, x3); return *this; }
- View &v = imp_.view();
- v.m_value = str;
-}
+ void push_back(const Char x) { m_string.push_back(x); }
-/**
- * Construct a reducer_basic_string initializing it from a string_type
- * string starting from an offset, copying N characters, uspecifying an
- * allocator.
- *
- * @param right string_type string to initialize from
- * @param offset Character withing right to start copying from
- * @param count Number of characters to copy
- * @param al Allocator to be used
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const string_type &right,
- size_type offset,
- size_type count,
- const _Alloc &al) :
- imp_()
-{
- string_type str(right, offset, count, al);
+ //@}
+};
- View &v = imp_.view();
- v.m_value = str;
-}
-/**
- * Construct a reducer_basic_string initializing it with a character repeated
- * some number of times, using the default allocator.
+/** String append monoid class. Instantiate the cilk::reducer template class
+ * with an op_basic_string monoid to create a string append reducer class. For
+ * example, to concatenate a collection of standard strings:
*
- * @param count Number of times to repeat the character
- * @param ch Character to initialize reducer_basic_string with
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(size_type count,
- _Elem ch) :
- imp_()
-{
- string_type str(count, ch);
-
- View &v = imp_.view();
- v.m_value = str;
-}
-
-/**
- * Construct a reducer_basic_string initializing it with a character repeated
- * some number of times, specifying an allocator.
+ * cilk::reducer< cilk::op_basic_string<char> > r;
*
- * @param count Number of times to repeat the character
- * @param ch Character to initialize reducer_basic_string with
- * @param al Allocator to be used
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(size_type count,
- _Elem ch,
- const _Alloc &al) :
- imp_()
-{
- string_type str(count, ch, al);
-
- View &v = imp_.view();
- v.m_value = str;
-}
-
-/**
- * Assemble the string from the collected fragments and return a mutable
- * reference to it
+ * @tparam Char The string element type (not the string type).
+ * @tparam Traits The character traits type.
+ * @tparam Alloc The string allocator type.
+ * @tparam Align If `false` (the default), reducers instantiated on this
+ * monoid will be naturally aligned (the Cilk library 1.0
+ * behavior). If `true`, reducers instantiated on this monoid
+ * will be cache-aligned for binary compatibility with
+ * reducers in Cilk library version 0.9.
*
- * @returns std::basic_string reference
+ * @see ReducersString
+ * @see op_basic_string_view
+ * @see reducer_basic_string
+ * @see op_string
+ * @see op_wstring
*/
-template<class _Elem, class _Traits, class _Alloc>
-std::basic_string<_Elem, _Traits, _Alloc> &
-reducer_basic_string<_Elem, _Traits, _Alloc>::get_reference()
+template<typename Char,
+ typename Traits = std::char_traits<Char>,
+ typename Alloc = std::allocator<Char>,
+ bool Align = false>
+class op_basic_string :
+ public monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align >
{
- View &v = imp_.view();
-
- return v.get_value();
-}
+ typedef monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align >
+ base;
+ Alloc m_allocator;
-/**
- * Assemble the string from the collected fragments and return an immutable
- * reference to it
- *
- * @returns std::basic_string reference
- */
-template<class _Elem, class _Traits, class _Alloc>
-const std::basic_string<_Elem, _Traits, _Alloc> &
-reducer_basic_string<_Elem, _Traits, _Alloc>::get_reference() const
-{
- // Cast away the const-ness and call mutable get_reference to do the work
- reducer_basic_string *pThis = const_cast<reducer_basic_string *>(this);
- return pThis->get_reference();
-}
+public:
-/**
- * Assemble the string from the collected fragments and return an immutable
- * reference to it
- *
- * @returns string_type reference
+ /** View type of the monoid.
+ */
+ typedef typename base::view_type view_type;
+
+ /** Constructor.
+ *
+ * There is no default constructor for string monoids, because the
+ * allocator must always be specified.
+ *
+ * @param allocator The list allocator to be used when
+ * identity-constructing new views.
+ */
+ op_basic_string(const Alloc& allocator = Alloc()) : m_allocator(allocator)
+ {}
+
+ /** Create an identity view.
+ *
+ * String view identity constructors take the string allocator as an
+ * argument.
+ *
+ * @param v The address of the uninitialized memory in which the view
+ * will be constructed.
+ */
+ void identity(view_type *v) const { ::new((void*) v) view_type(m_allocator); }
+
+ /** @name Construct functions
+ *
+ * A string append reduction monoid must have a copy of the allocator of
+ * the leftmost view’s string, so that it can use it in the `identity`
+ * operation. This, in turn, requires that string reduction monoids have a
+ * specialized `construct()` function.
+ *
+ * All string reducer monoid `construct()` functions first construct the
+ * leftmost view, using the arguments that were passed in from the reducer
+ * constructor. They then call the view’s `get_allocator()` function to
+ * get the string allocator from the string in the leftmost view, and pass
+ * that to the monoid constructor.
+ */
+ //@{
+
+ static void construct(op_basic_string* monoid, view_type* view)
+ { provisional( new ((void*)view) view_type() ).confirm_if(
+ new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
+
+ template <typename T1>
+ static void construct(op_basic_string* monoid, view_type* view, const T1& x1)
+ { provisional( new ((void*)view) view_type(x1) ).confirm_if(
+ new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
+
+ template <typename T1, typename T2>
+ static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2)
+ { provisional( new ((void*)view) view_type(x1, x2) ).confirm_if(
+ new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
+
+ template <typename T1, typename T2, typename T3>
+ static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2,
+ const T3& x3)
+ { provisional( new ((void*)view) view_type(x1, x2, x3) ).confirm_if(
+ new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
+
+ template <typename T1, typename T2, typename T3, typename T4>
+ static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2,
+ const T3& x3, const T4& x4)
+ { provisional( new ((void*)view) view_type(x1, x2, x3, x4) ).confirm_if(
+ new ((void*)monoid) op_basic_string(view->get_allocator()) ); }
+
+ //@}
+};
+
+
+/** Convenience typedef for 8-bit strings
*/
-template<class _Elem, class _Traits, class _Alloc>
-inline
-const std::basic_string<_Elem, _Traits, _Alloc> &
-reducer_basic_string<_Elem, _Traits, _Alloc>::get_value() const
-{
- // Delegate to get_reference()
- return this->get_reference();
-}
-
-/**
- * Set the string to a specified value
- *
- * @param value string_type to set the reducer_basic_string to
+typedef op_basic_string<char> op_string;
+
+/** Convenience typedef for 16-bit strings
*/
-template<class _Elem, class _Traits, class _Alloc>
-void reducer_basic_string<_Elem, _Traits, _Alloc>::set_value(const string_type &value)
-{
- View &v = imp_.view();
-
- v.m_list.clear();
- v.m_value.assign(value);
-}
-
-/**
- * Add a null-terminated string to the string
- *
- * @param ptr Null-terminated string to be appended
+typedef op_basic_string<wchar_t> op_wstring;
+
+
+/** Deprecated string append reducer class.
+ *
+ * reducer_basic_string is the same as @ref reducer<@ref op_basic_string>,
+ * except that reducer_basic_string is a proxy for the contained view, so that
+ * accumulator variable update operations can be applied directly to the
+ * reducer. For example, a value is appended to a `reducer<%op_basic_string>`
+ * with `r->push_back(a)`, but a value can be appended to a `%reducer_opand`
+ * with `r.push_back(a)`.
+ *
+ * @deprecated Users are strongly encouraged to use `reducer<monoid>`
+ * reducers rather than the old wrappers like reducer_basic_string.
+ * The `reducer<monoid>` reducers show the reducer/monoid/view
+ * architecture more clearly, are more consistent in their
+ * implementation, and present a simpler model for new
+ * user-implemented reducers.
+ *
+ * @note Implicit conversions are provided between `%reducer_basic_string`
+ * and `reducer<%op_basic_string>`. This allows incremental code
+ * conversion: old code that used `%reducer_basic_string` can pass a
+ * `%reducer_basic_string` to a converted function that now expects a
+ * pointer or reference to a `reducer<%op_basic_string>`, and vice
+ * versa.
+ *
+ * @tparam Char The string element type (not the string type).
+ * @tparam Traits The character traits type.
+ * @tparam Alloc The string allocator type.
+ *
+ * @see op_basic_string
+ * @see reducer
+ * @see ReducersString
*/
-template<class _Elem, class _Traits, class _Alloc>
-void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const _Elem *ptr)
+template<typename Char,
+ typename Traits = std::char_traits<Char>,
+ typename Alloc = std::allocator<Char> >
+class reducer_basic_string :
+ public reducer< op_basic_string<Char, Traits, Alloc, true> >
{
- View &v = imp_.view();
+ typedef reducer< op_basic_string<Char, Traits, Alloc, true> > base;
+ using base::view;
+public:
- v.m_value.append(ptr);
-}
+ /// The reducer’s string type.
+ typedef typename base::value_type string_type;
-/**
- * Add a string_type string to the string
- *
- * @param str string_type to be appended
- */
-template<class _Elem, class _Traits, class _Alloc>
-void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const string_type &str)
-{
- View &v = imp_.view();
+ /// The reducer’s primitive component type.
+ typedef Char basic_value_type;
- v.m_value.append(str);
-}
+ /// The string size type.
+ typedef typename string_type::size_type size_type;
-/**
- * Add a null-terminated string to the string, specifying the maximum number
- * of characters to copy
- *
- * @param ptr Null-terminated string to be appended
- * @param count Maximum number of characters to copy
- */
-template<class _Elem, class _Traits, class _Alloc>
-void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const _Elem *ptr,
- size_type count)
-{
- View &v = imp_.view();
+ /// The view type for the reducer.
+ typedef typename base::view_type View;
+
+ /// The monoid type for the reducer.
+ typedef typename base::monoid_type Monoid;
+
+
+ /** @name Constructors
+ */
+ //@{
+
+ /** @name Forward constructor calls to the base class.
+ *
+ * All basic_string constructor forms are supported.
+ */
+ //@{
+ reducer_basic_string() {}
+
+ template <typename T1>
+ reducer_basic_string(const T1& x1) :
+ base(x1) {}
+
+ template <typename T1, typename T2>
+ reducer_basic_string(const T1& x1, const T2& x2) :
+ base(x1, x2) {}
+
+ template <typename T1, typename T2, typename T3>
+ reducer_basic_string(const T1& x1, const T2& x2, const T3& x3) :
+ base(x1, x2, x3) {}
+
+ template <typename T1, typename T2, typename T3, typename T4>
+ reducer_basic_string(const T1& x1, const T2& x2, const T3& x3, const T4& x4) :
+ base(x1, x2, x3, x4) {}
+ //@}
+
+ /** Allow mutable access to the string within the current view.
+ *
+ * @warning If this method is called before the parallel calculation is
+ * complete, the string returned by this method will be a
+ * partial result.
+ *
+ * @returns A mutable reference to the string within the current view.
+ */
+ string_type &get_reference()
+ { return view().view_get_reference(); }
+
+ /** Allow read-only access to the string within the current view.
+ *
+ * @warning If this method is called before the parallel calculation is
+ * complete, the string returned by this method will be a
+ * partial result.
+ *
+ * @returns A const reference to the string within the current view.
+ */
+ string_type const &get_reference() const
+ { return view().view_get_reference(); }
+
+ /** @name Append to the string.
+ *
+ * These operations are simply forwarded to the view.
+ */
+ //@{
+ void append(const Char *ptr)
+ { view().append(ptr); }
+ void append(const Char *ptr, size_type count)
+ { view().append(ptr, count); }
+ void append(const string_type &str, size_type offset, size_type count)
+ { view().append(str, offset, count); }
+ void append(const string_type &str)
+ { view().append(str); }
+ void append(size_type count, Char ch)
+ { view().append(count, ch); }
- v.m_value.append(ptr, count);
-}
+ // Append to the string
+ reducer_basic_string<Char, Traits, Alloc> &operator+=(Char ch)
+ { view() += ch; return *this; }
+ reducer_basic_string<Char, Traits, Alloc> &operator+=(const Char *ptr)
+ { view() += ptr; return *this; }
+ reducer_basic_string<Char, Traits, Alloc> &operator+=(const string_type &right)
+ { view() += right; return *this; }
+ //@}
+
+ /** @name Dereference
+ * @details Dereferencing a wrapper is a no-op. It simply returns the
+ * wrapper. Combined with the rule that the wrapper forwards view
+ * operations to its contained view, this means that view operations can
+ * be written the same way on reducers and wrappers, which is convenient
+ * for incrementally converting old code using wrappers to use reducers
+ * instead. That is:
+ *
+ * reducer<op_string> r;
+ * r->push_back(a); // r-> returns the view
+ * // push_back() is a view member function
+ *
+ * reducer_string w;
+ * w->push_back(a); // *w returns the wrapper
+ * // push_back() is a wrapper member function
+ * // that calls the corresponding view function
+ */
+ //@{
+ reducer_basic_string& operator*() { return *this; }
+ reducer_basic_string const& operator*() const { return *this; }
-/**
- * Add a string_type string to the string, specifying the starting offset and
- * maximum number of characters to copy
- *
- * @param str Null-terminated string to be appended
- * @param offset Offset in the string_type to start copy at
- * @param count Maximum number of characters to copy
- */
-template<class _Elem, class _Traits, class _Alloc>
-void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const string_type &str,
- size_type offset,
- size_type count)
-{
- View &v = imp_.view();
+ reducer_basic_string* operator->() { return this; }
+ reducer_basic_string const* operator->() const { return this; }
+ //@}
+
+ /** @name Upcast
+ * @details In Cilk library 0.9, reducers were always cache-aligned. In
+ * library 1.0, reducer cache alignment is optional. By default, reducers
+ * are unaligned (i.e., just naturally aligned), but legacy wrappers
+ * inherit from cache-aligned reducers for binary compatibility.
+ *
+ * This means that a wrapper will automatically be upcast to its aligned
+ * reducer base class. The following conversion operators provide
+ * pseudo-upcasts to the corresponding unaligned reducer class.
+ */
+ //@{
+ operator reducer< op_basic_string<Char, Traits, Alloc, false> >& ()
+ {
+ return *reinterpret_cast< reducer<
+ op_basic_string<Char, Traits, Alloc, false> >*
+ >(this);
+ }
+ operator const reducer< op_basic_string<Char, Traits, Alloc, false> >& () const
+ {
+ return *reinterpret_cast< const reducer<
+ op_basic_string<Char, Traits, Alloc, false> >*
+ >(this);
+ }
+ //@}
+};
- v.m_value.append(str, offset, count);
-}
-/**
- * Add one or more repeated characters to the string
- *
- * @param count Number of times to repeat the character
- * @param ch Character to be added one or more times to the string
+/** Convenience typedef for 8-bit strings
*/
-// append - add one or more repeated characters to the list
-template<class _Elem, class _Traits, class _Alloc>
-void reducer_basic_string<_Elem, _Traits, _Alloc>::append(size_type count,
- _Elem ch)
-{
- View &v = imp_.view();
-
- v.m_value.append(count, ch);
-}
+typedef reducer_basic_string<char> reducer_string;
-/**
- * append a single character to the string
- *
- * @param ch Character to be appended
+/** Convenience typedef for 16-bit strings
*/
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc> &
-reducer_basic_string<_Elem, _Traits, _Alloc>::operator+=(_Elem ch)
-{
- View &v = imp_.view();
+typedef reducer_basic_string<wchar_t> reducer_wstring;
- v.m_value.append(1, ch);
- return *this;
-}
+/// @cond internal
-/**
- * append a null-terminated string to the string
+/// @cond internal
+/** Metafunction specialization for reducer conversion.
*
- * @param ptr Null-terminated string to be appended
- */
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc> &
-reducer_basic_string<_Elem, _Traits, _Alloc>::operator+=(const _Elem *ptr)
-{
- View &v = imp_.view();
-
- v.m_value.append(ptr);
- return *this;
-}
-
-/**
- * append a string-type to the string
+ * This specialization of the @ref legacy_reducer_downcast template class
+ * defined in reducer.h causes the `reducer< op_basic_string<Char> >` class to
+ * have an `operator reducer_basic_string<Char>& ()` conversion operator that
+ * statically downcasts the `reducer<op_basic_string>` to the corresponding
+ * `reducer_basic_string` type. (The reverse conversion, from
+ * `reducer_basic_string` to `reducer<op_basic_string>`, is just an upcast,
+ * which is provided for free by the language.)
*
- * @param right string-type to be appended
+ * @ingroup ReducersString
*/
-template<class _Elem, class _Traits, class _Alloc>
-reducer_basic_string<_Elem, _Traits, _Alloc> &
-reducer_basic_string<_Elem, _Traits, _Alloc>::operator+=(const string_type &right)
+template<typename Char, typename Traits, typename Alloc, bool Align>
+struct legacy_reducer_downcast<
+ reducer<op_basic_string<Char, Traits, Alloc, Align> > >
{
- View &v = imp_.view();
-
- v.m_value.append(right);
- return *this;
-}
+ typedef reducer_basic_string<Char, Traits, Alloc> type;
+};
-/**
- * Convenience typedefs for 8-bit strings
- */
-typedef reducer_basic_string<char,
- std::char_traits<char>,
- std::allocator<char> >
- reducer_string;
+/// @endcond
-/**
- * Convenience typedefs for 16-bit strings
- */
-typedef reducer_basic_string<wchar_t,
- std::char_traits<wchar_t>,
- std::allocator<wchar_t> >
- reducer_wstring;
+//@}
-} // namespace cilk
+} // namespace cilk
#endif // REDUCER_STRING_H_INCLUDED
diff --git a/libcilkrts/include/cilktools/cilkscreen.h b/libcilkrts/include/cilktools/cilkscreen.h
index 0975f8ca7db..47a363e9c98 100644
--- a/libcilkrts/include/cilktools/cilkscreen.h
+++ b/libcilkrts/include/cilktools/cilkscreen.h
@@ -2,24 +2,29 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2011
+ * @copyright
+ * Copyright (C) 2010-2011
* Intel Corporation
*
+ * @copyright
* This file is part of the Intel Cilk Plus Library. This library is free
* software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option)
* any later version.
*
+ * @copyright
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
+ * @copyright
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
*
+ * @copyright
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
@@ -41,7 +46,7 @@
# define __cilkscreen_metacall(annotation,expr) \
__notify_zc_intrinsic((char *)annotation, expr)
#else
-# define __cilkscreen_metacall(annotation,expr) (annotation, (void) (expr))
+# define __cilkscreen_metacall(annotation,expr) ((void)annotation, (void)(expr))
#endif
/* Call once when a user thread enters a spawning function */
diff --git a/libcilkrts/include/cilktools/cilkview.h b/libcilkrts/include/cilktools/cilkview.h
index cb1d235af95..e4656260049 100644
--- a/libcilkrts/include/cilktools/cilkview.h
+++ b/libcilkrts/include/cilktools/cilkview.h
@@ -2,24 +2,29 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2011
+ * @copyright
+ * Copyright (C) 2010-2011
* Intel Corporation
*
+ * @copyright
* This file is part of the Intel Cilk Plus Library. This library is free
* software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option)
* any later version.
*
+ * @copyright
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
+ * @copyright
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
*
+ * @copyright
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
@@ -40,7 +45,7 @@ __CILKRTS_END_EXTERN_C
# endif
#endif // _WIN32
-#if defined __unix__ || defined __APPLE__
+#if defined __unix__ || defined __APPLE__ || defined __VXWORKS__
# include <sys/time.h>
#endif // defined __unix__ || defined __APPLE__
@@ -61,7 +66,7 @@ static inline unsigned long long __cilkview_getticks()
#ifdef _WIN32
// Return milliseconds elapsed since the system started
return GetTickCount();
-#elif defined(__unix__) || defined(__APPLE__)
+#elif defined(__unix__) || defined(__APPLE__) || defined __VXWORKS__
// Return milliseconds elapsed since the Unix Epoch
// (1-Jan-1970 00:00:00.000 UTC)
struct timeval t;
@@ -96,7 +101,7 @@ typedef struct
{
cilkview_data_t *start; // Values at start of interval
cilkview_data_t *end; // Values at end of interval
- char *label; // Name for this interval
+ const char *label; // Name for this interval
unsigned int flags; // What to do - see flags below
} cilkview_report_t;
@@ -107,10 +112,13 @@ enum
CV_REPORT_WRITE_TO_RESULTS = 2 // Write parallelism data to results file
};
-void __cilkview_do_report(cilkview_data_t *start,
+#ifndef CILKVIEW_NO_REPORT
+static void __cilkview_do_report(cilkview_data_t *start,
cilkview_data_t *end,
- char *label,
+ const char *label,
unsigned int flags);
+#endif /* CILKVIEW_NO_REPORT */
+
/*
* Metacall data
*
@@ -198,7 +206,7 @@ enum
static void __cilkview_do_report(cilkview_data_t *start,
cilkview_data_t *end,
- char *label,
+ const char *label,
unsigned int flags)
{
int under_cilkview = 0;
@@ -242,7 +250,7 @@ static void __cilkview_do_report(cilkview_data_t *start,
// Open the output file and write the trial data to it
outfile = getenv("CILKVIEW_OUTFILE");
if (NULL == outfile)
- outfile = "cilkview.out";
+ outfile = (char *)"cilkview.out";
f = fopen(outfile, "a");
if (NULL == f)
diff --git a/libcilkrts/include/cilktools/fake_mutex.h b/libcilkrts/include/cilktools/fake_mutex.h
index d11a10d700b..76276a63caf 100644
--- a/libcilkrts/include/cilktools/fake_mutex.h
+++ b/libcilkrts/include/cilktools/fake_mutex.h
@@ -2,24 +2,29 @@
*
*************************************************************************
*
- * Copyright (C) 2011
+ * @copyright
+ * Copyright (C) 2013
* Intel Corporation
*
+ * @copyright
* This file is part of the Intel Cilk Plus Library. This library is free
* software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option)
* any later version.
*
+ * @copyright
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
+ * @copyright
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
*
+ * @copyright
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
@@ -40,47 +45,43 @@
#include <cilktools/cilkscreen.h>
-// If this is Windows, specify the linkage
-#ifdef _WIN32
-#define CILKSCREEN_CDECL __cdecl
-#else
-#define CILKSCREEN_CDECL
-#endif // _WIN32
-
namespace cilkscreen
{
class fake_mutex
{
public:
+ fake_mutex() : locked(false)
+ {
+ }
+
+ ~fake_mutex()
+ {
+ __CILKRTS_ASSERT(! locked);
+ }
// Wait until mutex is available, then enter
- virtual void lock()
+ void lock()
{
- __cilkscreen_acquire_lock(&lock_val);
+ __cilkscreen_acquire_lock(&locked);
+ __CILKRTS_ASSERT(! locked);
+ locked = true;
}
// A fake mutex is always available
- virtual bool try_lock() { lock(); return true; }
+ bool try_lock() { lock(); return true; }
// Releases the mutex
- virtual void unlock()
+ void unlock()
{
- __cilkscreen_release_lock(&lock_val);
+ __CILKRTS_ASSERT(locked);
+ locked = false;
+ __cilkscreen_release_lock(&locked);
}
private:
- int lock_val;
+ bool locked;
};
- // Factory function for fake mutex
- inline
- fake_mutex *CILKSCREEN_CDECL create_fake_mutex() { return new fake_mutex(); }
-
- // Destructor function for fake mutex - The mutex cannot be used after
- // calling this function
- inline
- void CILKSCREEN_CDECL destroy_fake_mutex(fake_mutex *m) { delete m; }
-
} // namespace cilk
#endif // FAKE_MUTEX_H_INCLUDED
diff --git a/libcilkrts/include/cilktools/lock_guard.h b/libcilkrts/include/cilktools/lock_guard.h
index 02c8b401e31..bcb5eb5b2c3 100644
--- a/libcilkrts/include/cilktools/lock_guard.h
+++ b/libcilkrts/include/cilktools/lock_guard.h
@@ -2,24 +2,29 @@
*
*************************************************************************
*
- * Copyright (C) 2011
+ * @copyright
+ * Copyright (C) 2011
* Intel Corporation
*
+ * @copyright
* This file is part of the Intel Cilk Plus Library. This library is free
* software; you can redistribute it and/or modify it under the
* terms of the GNU General Public License as published by the
* Free Software Foundation; either version 3, or (at your option)
* any later version.
*
+ * @copyright
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
+ * @copyright
* Under Section 7 of GPL version 3, you are granted additional
* permissions described in the GCC Runtime Library Exception, version
* 3.1, as published by the Free Software Foundation.
*
+ * @copyright
* You should have received a copy of the GNU General Public License and
* a copy of the GCC Runtime Library Exception along with this program;
* see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
diff --git a/libcilkrts/include/internal/abi.h b/libcilkrts/include/internal/abi.h
index 0db800cc231..8f64b1bc5df 100644
--- a/libcilkrts/include/internal/abi.h
+++ b/libcilkrts/include/internal/abi.h
@@ -1,28 +1,33 @@
/*
* abi.h
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
******************************************************************************/
@@ -38,6 +43,7 @@
#include <cilk/common.h>
+#include <stddef.h> // Needed to define size_t
/**
* Jump buffers are OS and architecture dependent
@@ -85,14 +91,14 @@ typedef struct
#endif /* defined(_MSC_VER) */
/* struct tags */
-typedef struct __cilkrts_worker __cilkrts_worker;
-typedef struct __cilkrts_worker* __cilkrts_worker_ptr;
-typedef struct __cilkrts_stack_frame __cilkrts_stack_frame;
+typedef struct __cilkrts_stack_frame __cilkrts_stack_frame; ///< struct tag for stack frame
// Forwarded declarations
-typedef struct global_state_t global_state_t;
-typedef struct local_state local_state;
-typedef struct cilkred_map cilkred_map;
+typedef struct global_state_t global_state_t; ///< Forwarded declaration for global state
+typedef struct local_state local_state; ///< Forwarded declaration for local state
+typedef struct cilkred_map cilkred_map; ///< Forward declaration for reducer map
+
+/// Forwarded declaration for system-dependent worker state
typedef struct __cilkrts_worker_sysdep_state
__cilkrts_worker_sysdep_state;
@@ -559,6 +565,70 @@ CILK_ABI_THROWS(void) __cilkrts_cilk_for_64(__cilk_abi_f64_t body,
cilk64_t count,
int grain);
-__CILKRTS_END_EXTERN_C
+/**
+ * @brief Allocate memory for variable length arrays. If the frame is
+ * sync'd, the memory will be allocated on the stack, otherwise it will
+ * be allocated from the heap.
+ *
+ * @param sf The __cilkrts_stack_frame for the function allocating the
+ * memory.
+ * @param size The number of bytes requested.
+ * @param distance_from_sp_to_alloca_area ?.
+ * @param align Alignment required. Always >= minimum stack alignment,
+ * >= ptr_size, and always a power of 2.
+ * @param needs_tag Non-zero if the pointer being returned needs to be
+ * tagged
+ *
+ * @return The address of the memory block allocated.
+ */
+
+CILK_ABI(__cilkrts_void_ptr)
+__cilkrts_stack_alloc(__cilkrts_stack_frame *sf,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align,
+ uint32_t needs_tag);
+/**
+ * @brief Free memory allocated by _cilkrts_stack_alloc() for variable length
+ * arrays.
+ *
+ * @param sf The __cilkrts_stack_frame for the function allocating the
+ * memory.
+ * @param p Pointer to the memory block to be freed.
+ * @param size The number of bytes requested.
+ * @param distance_from_sp_to_alloca_area ?.
+ * @param align Alignment required. Always >= minimum stack alignment,
+ * >= ptr_size, and always a power of 2.
+ * @param know_from_stack Non-zero if the pointer is known to have been
+ * allocated on the stack and has no tag.
+ */
+CILK_ABI(void)
+__cilkrts_stack_free(__cilkrts_stack_frame *sf,
+ void *p,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align,
+ uint32_t known_from_stack);
+
+/**
+ * @brief System-dependent code to save floating point control information
+ * to an ABI 1 or higher @c __cilkrts_stack_frame. If possible (and necessary)
+ * the code to save the floating point control information should be inlined.
+ *
+ * Note that this function does *not* save the current floating point
+ * registers. It saves the floating point control words that control
+ * precision and rounding and stuff like that.
+ *
+ * This function will be a noop for architectures that don't have warts
+ * like the floating point control words, or where the information is
+ * already being saved by the setjmp.
+ *
+ * @param sf @c __cilkrts_stack_frame for the frame we're saving the
+ * floating point control information in.
+ */
+CILK_ABI(void)
+__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf);
+
+__CILKRTS_END_EXTERN_C
#endif /* include guard */
diff --git a/libcilkrts/include/internal/cilk_fake.h b/libcilkrts/include/internal/cilk_fake.h
new file mode 100644
index 00000000000..2dc8efff765
--- /dev/null
+++ b/libcilkrts/include/internal/cilk_fake.h
@@ -0,0 +1,441 @@
+/* cilk_fake.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2011-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+/**
+ * @file cilk_fake.h
+ *
+ * @brief Macros to simulate a compiled Cilk program.
+ *
+ * Used carefully, these macros can be used to create a Cilk program with a
+ * non-Cilk compiler by manually inserting the code necessary for interacting
+ * with the Cilk runtime library. They are not intended to be pretty (you
+ * wouldn't want to write a whole program using these macros), but they are
+ * useful for experiments. They also work well as an illustration of what the
+ * compiler generates.
+ *
+ * Details of the mechanisms used in these macros are described in
+ * design-notes/CilkPlusABI.docx
+ *
+ * Example 1: fib in C++
+ * ---------------------
+ *
+ * #include <internal/cilk_fake.h>
+ *
+ * int fib(int n)
+ * {
+ * CILK_FAKE_PROLOG();
+ *
+ * if (n < 2)
+ * return n;
+ *
+ * int a, b;
+ * CILK_FAKE_SPAWN_R(a, fib(n - 1));
+ * b = fib(n - 2);
+ * CILK_FAKE_SYNC();
+ *
+ * return a + b;
+ * }
+ *
+ *
+ * Example 2: fib in C
+ * -------------------
+ *
+ * #include <internal/cilk_fake.h>
+ *
+ * int fib(int n);
+ *
+ * void fib_spawn_helper(__cilkrts_stack_frame* parent_sf, int* a, int n)
+ * {
+ * CILK_FAKE_SPAWN_HELPER_PROLOG(*parent_sf);
+ * *a = fib(n - 1);
+ * CILK_FAKE_SPAWN_HELPER_EPILOG();
+ * }
+ *
+ * int fib(int n)
+ * {
+ * CILK_FAKE_PROLOG();
+ *
+ * if (n < 2)
+ * return n;
+ *
+ * int a, b;
+ * CILK_FAKE_CALL_SPAWN_HELPER(fib_spawn_helper(&__cilk_sf, &a, n));
+ * b = fib(n - 2);
+ * CILK_FAKE_SYNC();
+ *
+ * CILK_FAKE_EPILOG();
+ * return a + b;
+ * }
+ */
+
+#ifndef INCLUDED_CILK_FAKE_DOT_H
+#define INCLUDED_CILK_FAKE_DOT_H
+
+// This header implements ABI version 1. If __CILKRTS_ABI_VERSION is already
+// defined but is less than 1, then the data structures in <internal/abi.h>
+// will not match the expectations of facilities in this header. Therefore,
+// for successful compilation, __CILKRTS_ABI_VERSION must either be not
+// defined, or defined to be 1 or greater.
+#ifndef __CILKRTS_ABI_VERSION
+ // ABI version was not specified. Set it to 1.
+# define __CILKRTS_ABI_VERSION 1
+#elif __CILKRTS_ABI_VERSION < 1
+ // ABI version was specified but was too old. Fail compilation.
+# error cilk_fake.h requirs an ABI version of 1 or greater
+#endif
+
+#include <internal/abi.h>
+
+// alloca is defined in malloc.h on Windows, alloca.h on Linux
+#ifndef _MSC_VER
+#include <alloca.h>
+#else
+#include <malloc.h>
+// Define offsetof
+#include <stddef.h>
+#endif
+
+#define CILK_FAKE_VERSION_FLAG (__CILKRTS_ABI_VERSION << 24)
+
+/* Initialize frame. To be called when worker is known */
+__CILKRTS_INLINE void __cilk_fake_enter_frame_fast(__cilkrts_stack_frame *sf,
+ __cilkrts_worker *w)
+{
+ sf->call_parent = w->current_stack_frame;
+ sf->worker = w;
+ sf->flags = CILK_FAKE_VERSION_FLAG;
+ w->current_stack_frame = sf;
+}
+
+/* Initialize frame. To be called when worker is not known */
+__CILKRTS_INLINE void __cilk_fake_enter_frame(__cilkrts_stack_frame *sf)
+{
+ __cilkrts_worker* w = __cilkrts_get_tls_worker();
+ uint32_t last_flag = 0;
+ if (! w) {
+ w = __cilkrts_bind_thread_1();
+ last_flag = CILK_FRAME_LAST;
+ }
+ __cilk_fake_enter_frame_fast(sf, w);
+ sf->flags |= last_flag;
+}
+
+/* Initialize frame. To be called within the spawn helper */
+__CILKRTS_INLINE void __cilk_fake_helper_enter_frame(
+ __cilkrts_stack_frame *sf,
+ __cilkrts_stack_frame *parent_sf)
+{
+ sf->worker = 0;
+ sf->call_parent = parent_sf;
+}
+
+/* Called from the spawn helper to push the parent continuation on the task
+ * deque so that it can be stolen.
+ */
+__CILKRTS_INLINE void __cilk_fake_detach(__cilkrts_stack_frame *sf)
+{
+ /* Initialize spawn helper frame.
+ * call_parent was saved in __cilk_fake_helper_enter_frame */
+ __cilkrts_stack_frame *parent = sf->call_parent;
+ __cilkrts_worker *w = parent->worker;
+ __cilk_fake_enter_frame_fast(sf, w);
+
+ /* Append a node to the pedigree */
+ sf->spawn_helper_pedigree = w->pedigree;
+ parent->parent_pedigree = w->pedigree;
+ w->pedigree.rank = 0;
+ w->pedigree.parent = &sf->spawn_helper_pedigree;
+
+ /* Push parent onto the task deque */
+ __cilkrts_stack_frame *volatile *tail = w->tail;
+ *tail++ = sf->call_parent;
+ /* The stores must be separated by a store fence (noop on x86)
+ * or the second store is a release (st8.rel on Itanium) */
+ w->tail = tail;
+ sf->flags |= CILK_FRAME_DETACHED;
+}
+
+/* This variable is used in CILK_FAKE_FORCE_FRAME_PTR(), below */
+static int __cilk_fake_dummy = 8;
+
+/* The following macro is used to force the compiler into generating a frame
+ * pointer. We never change the value of __cilk_fake_dummy, so the alloca()
+ * is never called, but we need the 'if' statement and the __cilk_fake_dummy
+ * variable so that the compiler does not attempt to optimize it away.
+ */
+#define CILK_FAKE_FORCE_FRAME_PTR(sf) do { \
+ if (__builtin_expect(1 & __cilk_fake_dummy, 0)) \
+ (sf).worker = (__cilkrts_worker*) alloca(__cilk_fake_dummy); \
+} while (0)
+
+#ifndef CILK_FAKE_NO_SHRINKWRAP
+ /* "shrink-wrap" optimization enabled. Do not initialize frame on entry,
+ * except to clear worker pointer. Instead, defer initialization until
+ * the first spawn.
+ */
+# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) ((void) ((sf).worker = 0))
+# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) do { \
+ if (! (sf).worker) __cilk_fake_enter_frame(&(sf)); \
+ } while (0)
+#else
+ /* "shrink-wrap" optimization disabled. Initialize frame immediately on
+ * entry. Do not initialize frame on spawn.
+ */
+# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) \
+ __cilk_fake_enter_frame(&(sf))
+# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) ((void) &(sf))
+#endif
+
+/* Prologue of a spawning function. Declares and initializes the stack
+ * frame.
+ */
+#define CILK_FAKE_PROLOG() \
+ __cilk_fake_stack_frame __cilk_sf; \
+ CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
+ CILK_FAKE_INITIAL_ENTER_FRAME(__cilk_sf)
+
+/* Prologue of a spawning function where the current worker is already known.
+ * Declares and initializes the stack frame without looking up the worker from
+ * TLS.
+ */
+#define CILK_FAKE_PROLOG_FAST(w) \
+ __cilk_fake_stack_frame __cilk_sf; \
+ CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \
+ __cilk_fake_enter_frame_fast(&__cilk_sf, (w))
+
+/* Simulate a cilk_sync */
+#define CILK_FAKE_SYNC() CILK_FAKE_SYNC_IMP(__cilk_sf)
+
+/* Epilog at the end of a spawning function. Does a sync and calls the
+ * runtime for leaving the frame.
+ */
+#ifdef __cplusplus
+ // Epilogue is run automatically by __cilk_fake_stack_frame destructor.
+# define CILK_FAKE_EPILOG() ((void) __cilk_sf)
+#else
+# define CILK_FAKE_EPILOG() CILK_FAKE_CLEANUP_FRAME(__cilk_sf)
+#endif // C
+
+/* Implementation of spawning function epilog. See CILK_FAKE_EPILOG macro and
+ * __cilk_fake_stack_frame destructor body.
+ */
+#define CILK_FAKE_CLEANUP_FRAME(sf) do { \
+ if (! (sf).worker) break; \
+ CILK_FAKE_SYNC_IMP(sf); \
+ CILK_FAKE_POP_FRAME(sf); \
+ if ((sf).flags != CILK_FAKE_VERSION_FLAG) \
+ __cilkrts_leave_frame(&(sf)); \
+} while (0)
+
+/* Implementation of CILK_FAKE_SYNC with sf argument */
+#define CILK_FAKE_SYNC_IMP(sf) do { \
+ if (__builtin_expect((sf).flags & CILK_FRAME_UNSYNCHED, 0)) { \
+ (sf).parent_pedigree = (sf).worker->pedigree; \
+ CILK_FAKE_SAVE_FP(sf); \
+ if (! CILK_SETJMP((sf).ctx)) \
+ __cilkrts_sync(&(sf)); \
+ } \
+ ++(sf).worker->pedigree.rank; \
+} while (0)
+
+/* Save the floating-point control registers.
+ * The definition of CILK_FAKE_SAVE_FP is compiler specific (and
+ * architecture specific on Windows)
+ */
+#ifdef _MSC_VER
+# define MXCSR_OFFSET offsetof(struct __cilkrts_stack_frame, mxcsr)
+# define FPCSR_OFFSET offsetof(struct __cilkrts_stack_frame, fpcsr)
+# if defined(_M_IX86)
+/* Windows x86 */
+# define CILK_FAKE_SAVE_FP(sf) do { \
+ __asm \
+ { \
+ mov eax, sf \
+ stmxcsr [eax+MXCSR_OFFSET] \
+ fnstcw [eax+FPCSR_OFFSET] \
+ } \
+ } while (0)
+# elif defined(_M_X64)
+/* Windows Intel64 - Not needed - saved by setjmp call */
+# define CILK_FAKE_SAVE_FP(sf) ((void) sf)
+# else
+# error "Unknown architecture"
+# endif /* Microsoft architecture specifics */
+#else
+/* Non-Windows */
+# define CILK_FAKE_SAVE_FP(sf) do { \
+ __asm__ ( "stmxcsr %0\n\t" \
+ "fnstcw %1" : : "m" ((sf).mxcsr), "m" ((sf).fpcsr)); \
+ } while (0)
+#endif
+
+/* Call the spawn helper as part of a fake spawn */
+#define CILK_FAKE_CALL_SPAWN_HELPER(helper) do { \
+ CILK_FAKE_DEFERRED_ENTER_FRAME(__cilk_sf); \
+ CILK_FAKE_SAVE_FP(__cilk_sf); \
+ if (__builtin_expect(! CILK_SETJMP(__cilk_sf.ctx), 1)) { \
+ helper; \
+ } \
+} while (0)
+
+/* Body of a spawn helper function. In addition to the worker and the
+ * expression to spawn, pass it any number of statements to be executed before
+ * detaching.
+ */
+#define CILK_FAKE_SPAWN_HELPER_BODY(parent_sf, expr, ...) \
+ CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf); \
+ __VA_ARGS__; \
+ __cilk_fake_detach(&__cilk_sf); \
+ expr; \
+ CILK_FAKE_SPAWN_HELPER_EPILOG()
+
+/* Prolog for a spawn helper function */
+#define CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf) \
+ __cilk_fake_spawn_helper_stack_frame __cilk_sf; \
+ __cilk_fake_helper_enter_frame(&__cilk_sf, &(parent_sf))
+
+/* Implementation of spawn helper epilog. See CILK_FAKE_SPAWN_HELPER_EPILOG
+ * and the __cilk_fake_spawn_helper_frame destructor.
+ */
+#define CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(sf) do { \
+ if (! (sf).worker) break; \
+ CILK_FAKE_POP_FRAME(sf); \
+ __cilkrts_leave_frame(&(sf)); \
+} while (0)
+
+/* Epilog to execute at the end of a spawn helper */
+#ifdef __cplusplus
+ // Epilog handled by __cilk_fake_spawn_helper_stack_frame destructor
+# define CILK_FAKE_SPAWN_HELPER_EPILOG() ((void) __cilk_sf)
+#else
+# define CILK_FAKE_SPAWN_HELPER_EPILOG() \
+ CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(__cilk_sf)
+#endif
+
+/* Pop the current frame off of the call chain */
+#define CILK_FAKE_POP_FRAME(sf) do { \
+ (sf).worker->current_stack_frame = (sf).call_parent; \
+ (sf).call_parent = 0; \
+} while (0)
+
+#ifdef _WIN32
+/* define macros for synching functions before allowing them to propagate. */
+# define CILK_FAKE_EXCEPT_BEGIN \
+ if (0 == CILK_SETJMP(__cilk_sf.except_ctx)) {
+
+# define CILK_FAKE_EXCEPT_END \
+ } else { \
+ assert((__cilk_sf.flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING))\
+ == CILK_FRAME_EXCEPTING); \
+ __cilkrts_rethrow(&__cilk_sf); \
+ exit(0); \
+ }
+#else
+# define CILK_EXCEPT_BEGIN {
+# define CILK_EXCEPT_END }
+#endif
+
+#ifdef __cplusplus
+// The following definitions depend on C++ features.
+
+// Simulate "_Cilk_spawn expr", where expr must be a function call.
+//
+// Note: this macro does not correctly construct function arguments.
+// According to the ABI specification, function arguments should be evaluated
+// before the detach and destroyed after the detach. This macro both
+// evaluates and destroys them after the detach. This means that if any part
+// of the function argument expression depends on a value that is modified in
+// the continuation of the spawn, race will occur between the continuation and
+// the argument evaluation.
+//
+// To work around this problem, this macro accepts an arbitrary list of
+// declarations and statements (separated by semicolons) that are evaluated
+// before the detach. Thus, to simulate:
+//
+// _Cilk_spawn f(expr);
+//
+// one would write:
+//
+// CILK_FAKE_SPAWN(f(arg), auto arg = expr);
+//
+// Despite appearing in the reverse order, the 'arg' variable is created and
+// initialized before the detach and the call to f(arg) occurs after the
+// detach.
+#define CILK_FAKE_SPAWN(expr, ...) \
+ CILK_FAKE_CALL_SPAWN_HELPER( \
+ CILK_FAKE_SPAWN_HELPER(expr, __VA_ARGS__)(&__cilk_sf))
+
+// Simulate "ret = cilk_spawn expr". See CILK_FAKE_SPAWN for constraints.
+#define CILK_FAKE_SPAWN_R(ret, expr, ...) \
+ CILK_FAKE_SPAWN(((ret) = (expr)), __VA_ARGS__)
+
+// Create a spawn helper as a C++11 lambda function. In addition to the
+// expression to spawn, this macro takes a any number of statements to be
+// executed before detaching.
+#define CILK_FAKE_SPAWN_HELPER(expr, ...) \
+ [&](__cilkrts_stack_frame *parent_sf) { \
+ CILK_FAKE_SPAWN_HELPER_BODY(*parent_sf, expr, __VA_ARGS__); \
+ }
+
+// C++ version of a __cilkrts_stack_frame for a spawning function.
+// This struct is identical to __cilkrts_stack_frame except that the
+// destructor automatically does frame cleanup.
+struct __cilk_fake_stack_frame : __cilkrts_stack_frame
+{
+ // Extension of __cilkrts_stack_frame with constructor and destructor
+ __cilk_fake_stack_frame() { }
+ __forceinline ~__cilk_fake_stack_frame() {
+ CILK_FAKE_CLEANUP_FRAME(*this);
+ }
+};
+
+// C++ version of a __cilkrts_stack_frame for a spawn helper.
+// This struct is identical to __cilkrts_stack_frame except that the
+// destructor automatically does frame cleanup.
+struct __cilk_fake_spawn_helper_stack_frame : __cilkrts_stack_frame
+{
+ // Extension of __cilkrts_stack_frame with constructor and destructor
+ __cilk_fake_spawn_helper_stack_frame() { worker = 0; }
+ __forceinline ~__cilk_fake_spawn_helper_stack_frame() {
+ CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(*this);
+ }
+};
+#else
+// For C, __cilk_fake_stack_frame and __cilk_fake_spawn_helper_stack_frame are
+// identical to __cilkrts_stack_frame. Frame cleanup must be performed
+// excplicitly (in CILK_FAKE_EPILOG and CILK_FAKE_SPAWN_HELPER_EPILOG)
+typedef __cilkrts_stack_frame __cilk_fake_stack_frame;
+typedef __cilkrts_stack_frame __cilk_fake_spawn_helper_stack_frame;
+#endif
+
+#endif // ! defined(INCLUDED_CILK_FAKE_DOT_H)
diff --git a/libcilkrts/include/internal/cilk_version.h b/libcilkrts/include/internal/cilk_version.h
index d0d3bc051d4..30d40393d3b 100644
--- a/libcilkrts/include/internal/cilk_version.h
+++ b/libcilkrts/include/internal/cilk_version.h
@@ -1,37 +1,42 @@
// cilk_version.h
//
-// Copyright (C) 2009-2012
-// Intel Corporation
-//
-// This file is part of the Intel Cilk Plus Library. This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 3, or (at your option)
-// any later version.
-//
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-// GNU General Public License for more details.
-//
-// Under Section 7 of GPL version 3, you are granted additional
-// permissions described in the GCC Runtime Library Exception, version
-// 3.1, as published by the Free Software Foundation.
-//
-// You should have received a copy of the GNU General Public License and
-// a copy of the GCC Runtime Library Exception along with this program;
-// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-// <http://www.gnu.org/licenses/>.
+// @copyright
+// Copyright (C) 2009-2013
+// Intel Corporation
+//
+// @copyright
+// This file is part of the Intel Cilk Plus Library. This library is free
+// software; you can redistribute it and/or modify it under the
+// terms of the GNU General Public License as published by the
+// Free Software Foundation; either version 3, or (at your option)
+// any later version.
+//
+// @copyright
+// This library is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// @copyright
+// Under Section 7 of GPL version 3, you are granted additional
+// permissions described in the GCC Runtime Library Exception, version
+// 3.1, as published by the Free Software Foundation.
+//
+// @copyright
+// You should have received a copy of the GNU General Public License and
+// a copy of the GCC Runtime Library Exception along with this program;
+// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+// <http://www.gnu.org/licenses/>.
// DO NOT EDIT THIS FILE!
//
// It was automatically generated by cilkrts/include/internal/Makefile
#define VERSION_MAJOR 2
#define VERSION_MINOR 0
-#define VERSION_BUILD 2856
+#define VERSION_BUILD 3520
#define VERSION_REV 0
-#define VERSION_STRING "2,0,2856,0"
-#define VERSION_HASH "71912a126cb8"
-#define VERSION_BRANCH "v13.0"
+#define VERSION_STRING "2,0,3520,0"
+#define VERSION_HASH "d5d11f1fb4cf"
+#define VERSION_BRANCH "eng"
#define TBB_REV_NUMBER ""
-#define VERSION_YEAR "2012"
+#define VERSION_YEAR "2013"
diff --git a/libcilkrts/include/internal/metacall.h b/libcilkrts/include/internal/metacall.h
index a3450eae9be..9418ad57279 100644
--- a/libcilkrts/include/internal/metacall.h
+++ b/libcilkrts/include/internal/metacall.h
@@ -1,28 +1,33 @@
// -*- C++ -*-
/*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
******************************************************************************
*
diff --git a/libcilkrts/include/internal/rev.mk b/libcilkrts/include/internal/rev.mk
index 574df641ab7..5b86e6a863c 100644
--- a/libcilkrts/include/internal/rev.mk
+++ b/libcilkrts/include/internal/rev.mk
@@ -1,27 +1,36 @@
#########################################################################
#
-# Copyright (C) 2011-2012
-# Intel Corporation
-#
-# This file is part of the Intel Cilk Plus Library. This library is free
-# software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# Under Section 7 of GPL version 3, you are granted additional
-# permissions described in the GCC Runtime Library Exception, version
-# 3.1, as published by the Free Software Foundation.
-#
-# You should have received a copy of the GNU General Public License and
-# a copy of the GCC Runtime Library Exception along with this program;
-# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-# <http://www.gnu.org/licenses/>.
+# @copyright
+# Copyright (C) 2011-2013
+# Intel Corporation
+#
+# @copyright
+# This file is part of the Intel Cilk Plus Library. This library is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# @copyright
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# @copyright
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# @copyright
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
###########################################################################
-CILK_REVISION = 2856
+# DO NOT EDIT THIS FILE!
+#
+# It was automatically generated by cilkrts/include/internal/Makefile
+
+CILK_REVISION = 3520
diff --git a/libcilkrts/runtime/acknowledgements.dox b/libcilkrts/runtime/acknowledgements.dox
new file mode 100644
index 00000000000..9715098ab7a
--- /dev/null
+++ b/libcilkrts/runtime/acknowledgements.dox
@@ -0,0 +1,46 @@
+/* acknowledgements.dox
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+/*
+ * This file contains acknowledgements of community contributions to the
+ * Cilk Plus runtime.
+ */
+
+/**
+ * @mainpage
+ *
+ * @section Acknowledgements Acknowledgements
+ *
+ * Modifications to build the Cilk Plus runtime for VxWorks provided by
+ * Brian Kuhl of Wind River.
+ */
diff --git a/libcilkrts/runtime/bug.cpp b/libcilkrts/runtime/bug.cpp
index 4ed4d6e553a..1a626b7ed02 100644
--- a/libcilkrts/runtime/bug.cpp
+++ b/libcilkrts/runtime/bug.cpp
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "bug.h"
diff --git a/libcilkrts/runtime/bug.h b/libcilkrts/runtime/bug.h
index 5117ba442a9..1732f049a65 100644
--- a/libcilkrts/runtime/bug.h
+++ b/libcilkrts/runtime/bug.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -62,12 +67,31 @@ COMMON_PORTABLE extern const char *const __cilkrts_assertion_failed;
#define CILK_ASSERT(ex) \
(__builtin_expect((ex) != 0, 1) ? (void)0 : \
__cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, #ex))
+
+#define CILK_ASSERT_MSG(ex, msg) \
+ (__builtin_expect((ex) != 0, 1) ? (void)0 : \
+ __cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, \
+ #ex "\n " msg))
#endif // CILK_ASSERT
/**
* Assert that there is no uncaught exception.
+ *
+ * Not valid on Windows or Android.
+ *
+ * On Android, calling std::uncaught_exception with the stlport library causes
+ * a seg fault. Since we're not supporting exceptions there at this point,
+ * just don't do the check. It works with the GNU STL library, but that's
+ * GPL V3 licensed.
*/
COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void);
+#if defined(_WIN32) || defined(ANDROID)
+# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION()
+#else
+# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION() \
+ cilkbug_assert_no_uncaught_exception()
+#endif
+
/**
* Call __cilkrts_bug with a standard message that the runtime state is
@@ -76,7 +100,9 @@ COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void);
COMMON_SYSDEP void abort_because_rts_is_corrupted(void);
// Debugging aids
-#ifdef _WIN32
+#ifndef _DEBUG
+# define DBGPRINTF(_fmt, ...)
+#elif defined(_WIN32)
/**
* Write debugging output. On windows this is written to the debugger.
@@ -93,16 +119,17 @@ COMMON_SYSDEP void __cilkrts_dbgprintf(const char *fmt,...) cilk_nothrow;
* @param _fmt printf-style format string. Any remaining parameters will be
* be interpreted based on the format string text.
*/
-# ifdef _DEBUG
# define DBGPRINTF(_fmt, ...) __cilkrts_dbgprintf(_fmt, __VA_ARGS__)
-# else
-# define DBGPRINTF(_fmt, ...)
-# endif // _DEBUG
-#else
- // Not yet implemented on the Unix side
-# define DBGPRINTF(_fmt, ...)
-#endif // _WIN32
+#else /* if _DEBUG && !_WIN32 */
+ /* Non-Windows debug logging. Someday we should make GetCurrentFiber()
+ * and GetWorkerFiber() do something.
+ */
+# include <stdio.h>
+ __CILKRTS_INLINE void* GetCurrentFiber() { return 0; }
+ __CILKRTS_INLINE void* GetWorkerFiber(__cilkrts_worker* w) { return 0; }
+# define DBGPRINTF(_fmt, ...) fprintf(stderr, _fmt, __VA_ARGS__)
+#endif // _DEBUG
__CILKRTS_END_EXTERN_C
diff --git a/libcilkrts/runtime/c_reducers.c b/libcilkrts/runtime/c_reducers.c
index 5ed23582154..0e775ec2990 100644
--- a/libcilkrts/runtime/c_reducers.c
+++ b/libcilkrts/runtime/c_reducers.c
@@ -2,186 +2,51 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2010-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
/* Implementation of C reducers */
-#include <cilk/reducer_opadd.h>
-#include <cilk/reducer_opand.h>
-#include <cilk/reducer_opor.h>
-#include <cilk/reducer_opxor.h>
-#include <cilk/reducer_max.h>
-#include <cilk/reducer_min.h>
-#include <limits.h>
-#include <math.h> /* HUGE_VAL */
-
-#ifndef _MSC_VER
-# include <stdint.h> /* WCHAR_MIN */
-#else
-# include <wchar.h> /* WCHAR_MIN */
-#endif
-
-/* Floating-point constants */
-#ifndef HUGE_VALF
- static const unsigned int __huge_valf[] = {0x7f800000};
-# define HUGE_VALF (*((const float *)__huge_valf))
-#endif
-
-#ifndef HUGE_VALL
- static const unsigned int __huge_vall[] = {0, 0, 0x00007f80, 0};
-# define HUGE_VALL (*((const long double *)__huge_vall))
-#endif
-
// Disable warning about integer conversions losing significant bits.
// The code is correct as is.
+#ifdef __INTEL_COMPILER
#pragma warning(disable:2259)
+#endif
-CILK_C_REDUCER_OPADD_IMP(char,char)
-CILK_C_REDUCER_OPADD_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPADD_IMP(signed char,schar)
-CILK_C_REDUCER_OPADD_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPADD_IMP(short,short)
-CILK_C_REDUCER_OPADD_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPADD_IMP(int,int)
-CILK_C_REDUCER_OPADD_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPADD_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPADD_IMP(long,long)
-CILK_C_REDUCER_OPADD_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPADD_IMP(long long,longlong)
-CILK_C_REDUCER_OPADD_IMP(unsigned long long,ulonglong)
-CILK_C_REDUCER_OPADD_IMP(float,float)
-CILK_C_REDUCER_OPADD_IMP(double,double)
-CILK_C_REDUCER_OPADD_IMP(long double,longdouble)
-
-CILK_C_REDUCER_OPAND_IMP(char,char)
-CILK_C_REDUCER_OPAND_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPAND_IMP(signed char,schar)
-CILK_C_REDUCER_OPAND_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPAND_IMP(short,short)
-CILK_C_REDUCER_OPAND_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPAND_IMP(int,int)
-CILK_C_REDUCER_OPAND_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPAND_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPAND_IMP(long,long)
-CILK_C_REDUCER_OPAND_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPAND_IMP(long long,longlong)
-CILK_C_REDUCER_OPAND_IMP(unsigned long long,ulonglong)
-
-CILK_C_REDUCER_OPOR_IMP(char,char)
-CILK_C_REDUCER_OPOR_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPOR_IMP(signed char,schar)
-CILK_C_REDUCER_OPOR_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPOR_IMP(short,short)
-CILK_C_REDUCER_OPOR_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPOR_IMP(int,int)
-CILK_C_REDUCER_OPOR_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPOR_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPOR_IMP(long,long)
-CILK_C_REDUCER_OPOR_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPOR_IMP(long long,longlong)
-CILK_C_REDUCER_OPOR_IMP(unsigned long long,ulonglong)
-
-CILK_C_REDUCER_OPXOR_IMP(char,char)
-CILK_C_REDUCER_OPXOR_IMP(unsigned char,uchar)
-CILK_C_REDUCER_OPXOR_IMP(signed char,schar)
-CILK_C_REDUCER_OPXOR_IMP(wchar_t,wchar_t)
-CILK_C_REDUCER_OPXOR_IMP(short,short)
-CILK_C_REDUCER_OPXOR_IMP(unsigned short,ushort)
-CILK_C_REDUCER_OPXOR_IMP(int,int)
-CILK_C_REDUCER_OPXOR_IMP(unsigned int,uint)
-CILK_C_REDUCER_OPXOR_IMP(unsigned int,unsigned) // alternate name
-CILK_C_REDUCER_OPXOR_IMP(long,long)
-CILK_C_REDUCER_OPXOR_IMP(unsigned long,ulong)
-CILK_C_REDUCER_OPXOR_IMP(long long,longlong)
-CILK_C_REDUCER_OPXOR_IMP(unsigned long long,ulonglong)
-
-CILK_C_REDUCER_MAX_IMP(char,char,CHAR_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned char,uchar,0)
-CILK_C_REDUCER_MAX_IMP(signed char,schar,SCHAR_MIN)
-CILK_C_REDUCER_MAX_IMP(wchar_t,wchar_t,WCHAR_MIN)
-CILK_C_REDUCER_MAX_IMP(short,short,SHRT_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned short,ushort,0)
-CILK_C_REDUCER_MAX_IMP(int,int,INT_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned int,uint,0)
-CILK_C_REDUCER_MAX_IMP(unsigned int,unsigned,0) // alternate name
-CILK_C_REDUCER_MAX_IMP(long,long,LONG_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned long,ulong,0)
-CILK_C_REDUCER_MAX_IMP(long long,longlong,LLONG_MIN)
-CILK_C_REDUCER_MAX_IMP(unsigned long long,ulonglong,0)
-CILK_C_REDUCER_MAX_IMP(float,float,-HUGE_VALF)
-CILK_C_REDUCER_MAX_IMP(double,double,-HUGE_VAL)
-CILK_C_REDUCER_MAX_IMP(long double,longdouble,-HUGE_VALL)
-CILK_C_REDUCER_MAX_INDEX_IMP(char,char,CHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned char,uchar,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(signed char,schar,SCHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(wchar_t,wchar_t,WCHAR_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(short,short,SHRT_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned short,ushort,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(int,int,INT_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,uint,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,unsigned,0) // alternate name
-CILK_C_REDUCER_MAX_INDEX_IMP(long,long,LONG_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long,ulong,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(long long,longlong,LLONG_MIN)
-CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long long,ulonglong,0)
-CILK_C_REDUCER_MAX_INDEX_IMP(float,float,-HUGE_VALF)
-CILK_C_REDUCER_MAX_INDEX_IMP(double,double,-HUGE_VAL)
-CILK_C_REDUCER_MAX_INDEX_IMP(long double,longdouble,-HUGE_VALL)
+#define CILK_C_DEFINE_REDUCERS
-CILK_C_REDUCER_MIN_IMP(char,char,CHAR_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned char,uchar,CHAR_MIN)
-CILK_C_REDUCER_MIN_IMP(signed char,schar,SCHAR_MAX)
-CILK_C_REDUCER_MIN_IMP(wchar_t,wchar_t,WCHAR_MAX)
-CILK_C_REDUCER_MIN_IMP(short,short,SHRT_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned short,ushort,USHRT_MAX)
-CILK_C_REDUCER_MIN_IMP(int,int,INT_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned int,uint,UINT_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned int,unsigned,UINT_MAX) // alternate name
-CILK_C_REDUCER_MIN_IMP(long,long,LONG_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned long,ulong,ULONG_MAX)
-CILK_C_REDUCER_MIN_IMP(long long,longlong,LLONG_MAX)
-CILK_C_REDUCER_MIN_IMP(unsigned long long,ulonglong,ULLONG_MAX)
-CILK_C_REDUCER_MIN_IMP(float,float,HUGE_VALF)
-CILK_C_REDUCER_MIN_IMP(double,double,HUGE_VAL)
-CILK_C_REDUCER_MIN_IMP(long double,longdouble,HUGE_VALL)
-CILK_C_REDUCER_MIN_INDEX_IMP(char,char,CHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned char,uchar,CHAR_MIN)
-CILK_C_REDUCER_MIN_INDEX_IMP(signed char,schar,SCHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(wchar_t,wchar_t,WCHAR_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(short,short,SHRT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned short,ushort,USHRT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(int,int,INT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,uint,UINT_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,unsigned,UINT_MAX) // alternate name
-CILK_C_REDUCER_MIN_INDEX_IMP(long,long,LONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long,ulong,ULONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(long long,longlong,LLONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long long,ulonglong,ULLONG_MAX)
-CILK_C_REDUCER_MIN_INDEX_IMP(float,float,HUGE_VALF)
-CILK_C_REDUCER_MIN_INDEX_IMP(double,double,HUGE_VAL)
-CILK_C_REDUCER_MIN_INDEX_IMP(long double,longdouble,HUGE_VALL)
+#include <cilk/reducer_opadd.h>
+#include <cilk/reducer_opand.h>
+#include <cilk/reducer_opmul.h>
+#include <cilk/reducer_opor.h>
+#include <cilk/reducer_opxor.h>
+#include <cilk/reducer_min_max.h>
/* End reducer_opadd.c */
diff --git a/libcilkrts/runtime/cilk-abi-cilk-for.cpp b/libcilkrts/runtime/cilk-abi-cilk-for.cpp
index a584f86c2ca..89c3d5cd5b2 100644
--- a/libcilkrts/runtime/cilk-abi-cilk-for.cpp
+++ b/libcilkrts/runtime/cilk-abi-cilk-for.cpp
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2011, 2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
@@ -114,6 +119,9 @@ void call_cilk_for_loop_body(count_t low, count_t high,
__cilkrts_worker *w,
__cilkrts_pedigree *loop_root_pedigree)
{
+ // Cilkscreen should not report this call in a stack trace
+ __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0);
+
// The worker is only valid until the first spawn. Fetch the
// __cilkrts_stack_frame out of the worker, since it will be stable across
// steals. The sf pointer actually points to the *parent's*
@@ -201,7 +209,6 @@ capture_spawn_arg_stack_frame(__cilkrts_stack_frame* &sf, __cilkrts_worker* w)
return w;
}
-
/*
* cilk_for_recursive
*
@@ -225,6 +232,10 @@ void cilk_for_recursive(count_t low, count_t high,
__cilkrts_pedigree *loop_root_pedigree)
{
tail_recurse:
+ // Cilkscreen should not report this call in a stack trace
+ // This needs to be done everytime the worker resumes
+ __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0);
+
count_t count = high - low;
// Invariant: count > 0, grain >= 1
if (count > grain)
@@ -269,6 +280,9 @@ static void noop() { }
template <typename count_t, typename F>
static void cilk_for_root(F body, void *data, count_t count, int grain)
{
+ // Cilkscreen should not report this call in a stack trace
+ __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0);
+
// Pedigree computation:
//
// If the last pedigree node on entry to the _Cilk_for has value X,
@@ -353,6 +367,9 @@ extern "C" {
CILK_ABI_THROWS_VOID __cilkrts_cilk_for_32(__cilk_abi_f32_t body, void *data,
cilk32_t count, int grain)
{
+ // Cilkscreen should not report this call in a stack trace
+ __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0);
+
// Check for an empty range here as an optimization - don't need to do any
// __cilkrts_stack_frame initialization
if (count > 0)
diff --git a/libcilkrts/runtime/cilk-abi-vla-internal.c b/libcilkrts/runtime/cilk-abi-vla-internal.c
new file mode 100644
index 00000000000..2669ed37d75
--- /dev/null
+++ b/libcilkrts/runtime/cilk-abi-vla-internal.c
@@ -0,0 +1,78 @@
+/* cilk-abi-vla-internal.c -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+/*
+ * These functions are provided in their own compilation unit so I can debug
+ * them. cilk-abi-vla.c must always be compiled with optimization on so that
+ * inlining occurs.
+ */
+
+#include "internal/abi.h"
+#include "cilk-abi-vla-internal.h"
+#include "bug.h"
+#include "full_frame.h"
+#include "local_state.h"
+
+#include <stdlib.h>
+#include <stdint.h>
+
+#include "bug.h"
+
+void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf,
+ size_t full_size,
+ uint32_t align)
+{
+ return malloc(full_size);
+}
+
+void vla_internal_heap_free(void *t, size_t size)
+{
+ free(t);
+}
+
+void vla_free_from_original_stack(__cilkrts_stack_frame *sf,
+ size_t full_size)
+{
+ // The __cilkrts_stack_frame must be initialized
+ CILK_ASSERT(sf->worker);
+
+#if 1
+ // Add full_size to ff->sync_sp so that when we return, the VLA will no
+ // longer be allocated on the stack
+ __cilkrts_adjust_stack(sf->worker->l->frame_ff, full_size);
+#else
+ // Inline __cilkrts_adjust_stack for Kevin
+ full_frame *ff = sf->worker->l->frame_ff;
+ ff->sync_sp = ff->sync_sp + full_size;
+#endif
+}
diff --git a/libcilkrts/runtime/cilk-abi-vla-internal.h b/libcilkrts/runtime/cilk-abi-vla-internal.h
new file mode 100644
index 00000000000..f8d3c5aaa89
--- /dev/null
+++ b/libcilkrts/runtime/cilk-abi-vla-internal.h
@@ -0,0 +1,85 @@
+/* cilk-abi-vla-internal.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+/**
+ * @file cilk-abi-vla-internal.h
+ *
+ * @brief Allocation/deallocation function for use with Variable Length
+ * Arrays in spawning functions.
+ *
+ * These should be the only functions in the Cilk runtime allocating memory
+ * from the standard C runtime heap. This memory will be provided to user
+ * code for use in VLAs, when the memory cannot be allocated from the stack.
+ *
+ * While these functions are simply passthroughs to malloc and free at the
+ * moment, once we've got the basics of VLA allocations working we'll make
+ * them do fancier tricks.
+ */
+
+/**
+ * @brief Allocate memory from the heap for use by a Variable Length Array in
+ * a spawning function.
+ *
+ * @param sf The __cilkrts_stack_frame for the spawning function containing
+ * the VLA.
+ * @param full_size The number of bytes to be allocated, including any tags
+ * needed to identify this as allocated from the heap.
+ * @param align Any alignment necessary for the allocation.
+ */
+
+void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf,
+ size_t full_size,
+ uint32_t align);
+
+/**
+ * @brief Deallocate memory from the heap used by a Variable Length Array in
+ * a spawning function.
+ *
+ * @param t The address of the memory block to be freed.
+ * @param size The size of the memory block to be freed.
+ */
+
+void vla_internal_heap_free(void *t,
+ size_t size);
+
+/**
+ * @brief Deallocate memory from the original stack. We'll do this by adding
+ * full_size to ff->sync_sp. So after the sync, the Variable Length Array
+ * will no longer be allocated on the stack.
+ *
+ * @param sf The __cilkrts_stack_frame for the spawning function that is
+ * deallocating a VLA.
+ * @param full_size The size of the VLA, including any alignment and tags.
+ */
+void vla_free_from_original_stack(__cilkrts_stack_frame *sf,
+ size_t full_size);
diff --git a/libcilkrts/runtime/cilk-abi-vla.c b/libcilkrts/runtime/cilk-abi-vla.c
new file mode 100644
index 00000000000..9de1f9f3670
--- /dev/null
+++ b/libcilkrts/runtime/cilk-abi-vla.c
@@ -0,0 +1,417 @@
+/* cilk-abi-vla.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ **************************************************************************/
+
+/*
+ * Implementation of Variable Length Array (VLA) ABI.
+ *
+ * __cilkrts_stack_alloc() and __cilkrts_stack_free must be compiled
+ * such that ebp/rbp is used for the stack frames. This is done by having
+ * each of them use alloca, which forces the special frame types needed on
+ * each of the ABIs. Additionally, for some forms of stack frame, special
+ * care must be taken because the alloca space may not be at the bottom of the
+ * stack frame of the caller. For Intel64 windows, and for some options
+ * with other ABIs, a preallocated parameter block may exist on the stack
+ * at a lower address than the alloca. If this is the case, the parameter
+ * distance_from_sp_to_alloca_area will be non-zero, and will indicate how
+ * much pre-allocated parameter space resides in the caller's stack frame
+ * between the alloca area, and the bottom of the stack when the call to
+ * the cilkrts is made. As such, when non-zero it also includes any space
+ * used for passing the cilkrts_stack_alloc or cilkrts_stack_free parameters.
+ */
+
+#include <assert.h>
+#include <stdlib.h>
+#include <stdint.h>
+#ifdef _WIN32
+# define alloca _alloca
+# define INLINE static __inline
+# pragma warning(disable:1025) // Don't whine about zero extending result of unary operation
+#else
+# include <alloca.h>
+# define INLINE static inline
+#endif
+
+#include "internal/abi.h"
+#include "cilk-abi-vla-internal.h"
+
+#if defined(__x86_64) || defined(_M_X64)
+INLINE void setsp(void *val)
+{
+ __asm__("movq %0, %%rsp" : : "r"(val): "rsp");
+}
+INLINE char* getsp(void)
+{
+ void *res;
+
+ __asm__("movq %%rsp, %0" : "=r"(res): : "rsp");
+ return res;
+}
+INLINE char* getbp(void)
+{
+ void *res;
+
+ __asm__("movq %%rbp, %0" : "=r"(res): : "rbp");
+ return res;
+}
+INLINE void copy_frame_down_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be lower address than src,
+ // therefore copying upwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 8, and the copy is done in 64 bit word chunks for
+ // best efficiency.
+ __asm__(
+ "movq %0, %%rdi;"
+ "movq %1, %%rsi;"
+ "movq %2, %%rcx;"
+ "shrq $3, %%rcx;"
+ "rep movsq;"
+ "movq %3, %%rbp" :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "rsi", "rdi", "rcx", "rbp", "memory");
+}
+INLINE void copy_frame_up_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be higher address than src,
+ // therefore copying downwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 8, and the copy is done in 64 bit word chunks for
+ // best efficiency.
+ dst += cpy_bytes - 8;
+ src += cpy_bytes - 8;
+ __asm__(
+ "movq %0, %%rdi;"
+ "movq %1, %%rsi;"
+ "movq %2, %%rcx;"
+ "shrq $3, %%rcx;"
+ "std; rep movsq; cld;"
+ "movl %3, %%rbp;" :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "rsi", "rdi", "rcx", "rbp", "memory");
+}
+#else
+INLINE void setsp(void *val)
+{
+ __asm__("movl %0, %%esp" : : "r"(val): "esp");
+}
+INLINE char* getsp(void)
+{
+ void *res;
+
+ __asm__("movl %%esp, %0" : "=r"(res): : "esp");
+ return res;
+}
+INLINE char* getbp(void)
+{
+ void *res;
+
+ __asm__("movl %%ebp, %0" : "=r"(res): : "ebp");
+ return res;
+}
+INLINE void copy_frame_down_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be lower address than src,
+ // therefore copying upwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 4, and the copy is done in 32 bit word chunks for
+ // best efficiency.
+ __asm__(
+ "movl %0, %%edi;"
+ "movl %1, %%esi;"
+ "movl %2, %%ecx;"
+ "shrl $2, %%ecx;"
+ "rep movsd;"
+ "movl %3, %%ebp" :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "esi", "edi", "ecx", "ebp", "memory");
+}
+INLINE void copy_frame_up_and_move_bp(
+ char *dst,
+ char *src,
+ size_t cpy_bytes,
+ char *new_ebp
+)
+{
+ // In this version, dst is guaranteed to be higher address than src,
+ // therefore copying downwards from src into dst is safe in case
+ // there is overlap. The number of bytes is also guaranteed to be
+ // a multiple of 4, and the copy is done in 32 bit word chunks for
+ // best efficiency.
+ dst += cpy_bytes - 4;
+ src += cpy_bytes - 4;
+ __asm__(
+ "movl %0, %%edi;"
+ "movl %1, %%esi;"
+ "movl %2, %%ecx;"
+ "shrl $2, %%ecx;"
+ "std; rep movsd; cld;"
+ "movl %3, %%ebp" :
+ // "=D"(dst), "=S"(src), "=C"(cpy_bytes) :
+ :
+ "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) :
+ "esi", "edi", "ecx", "ebp", "memory");
+}
+#endif
+
+
+#define c_cilk_ptr_from_heap 0xc2f2f00d
+#define c_cilk_ptr_from_stack 0xc3f30d0f
+
+CILK_ABI(__cilkrts_void_ptr)
+__cilkrts_stack_alloc(
+ __cilkrts_stack_frame *sf,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align, // align is always >= minimum stack alignment and
+ // >= ptr_size as well, and must be a power of 2.
+ uint32_t needs_tag // non-zero if the pointer being returned needs to
+ // be tagged
+)
+{
+#ifdef __INTEL_COMPILER
+ // full_size will be a multiple of align, and contains
+ // enough extra space to allocate a marker.
+ size_t full_size = (size + align - 1) & ~(align - 1);
+
+ if (needs_tag) {
+ full_size += align;
+ }
+
+ char *t;
+ if (sf->worker != 0 &&
+ ((sf->flags & CILK_FRAME_UNSYNCHED) != 0)) {
+ t = vla_internal_heap_alloc(sf, full_size, align);
+ if (needs_tag) {
+ t += align;
+ ((uint32_t*)t)[-1] = c_cilk_ptr_from_heap;
+ }
+ return (void *)t;
+ }
+
+ // stack is still synced, allocate full_size from esp,
+ // and record in 32 bits immediately below the space
+ // allocated that this was space that this was
+ // allocated in the stack.
+ char *old_ebp = getbp();
+ char *old_esp = getsp();
+
+ // make top_ptr point to base of first parameter.
+ char *top_ptr = ((char *)(_AddressOfReturnAddress()) +
+ sizeof(char *));
+ size_t param_size = 0;
+
+#if defined(__x86_64)
+ // For Intel64 linux & MACH ABI, all the parameters were passed in
+ // register, so top of the stack frame above the return address
+ // is just the size of the return address plus
+ // distance_from_sp_to_alloca_area on the chance that the alloca
+ // area isn't at the very bottom of the calling functions stack.
+#elif defined(__MACH__)
+ // For ia32 MACH, parameter size is always a mutliple of 16
+ // bytes to keep the stack 16 byte aligned. So we need to round
+ // number of parameters up to multiple of 4.
+ param_size = 8 * sizeof(char *);
+#else
+ // For both windows Intel64 ABI, and the IA32 windows and
+ // linux ABIs, space is reserved on the stack for all these
+ // parameters. param_size is 5 * size of a stack slot.
+ param_size = 5 * sizeof(char *);
+#endif
+
+ // now make top_ptr point above the params, or if
+ // distance_from_sp_to_alloca_area is not zero, make
+ // it point above that area. When non-zero,
+ // distance_from_sp_to_alloca area is expected to contain
+ // the parameter space, so we only add one or the other,
+ // not both.
+ top_ptr += (distance_from_sp_to_alloca_area != 0) ?
+ distance_from_sp_to_alloca_area : param_size;
+
+ // t needs to end up at current value of top_ptr less full_size and less
+ // distance_from_sp_to_alloca_area and
+ // then rounded down to the alignment needed. Then we have to bump
+ // esp down by current frame_size, so that when all is done with respect
+ // to executing the return sequence, the final value of esp will be the
+ // same value as t.
+ t = (top_ptr - full_size) - distance_from_sp_to_alloca_area;
+ intptr_t temp = (intptr_t)t;
+ temp &= ~((intptr_t)(align - 1));
+ t = (char *)temp;
+
+ // ok, the value of t is set where we need it. Now set esp
+ // to the value of t less the current frame size.
+ // So now when we do regular return esp should be left such
+ // that it has moved down by full_size.
+ size_t cur_fm_size = (top_ptr - old_esp);
+ char *new_esp = t - cur_fm_size;
+ char *new_ebp = old_ebp - (old_esp - new_esp);
+
+ // extend the stack down by at least the difference between where
+ // I want it to be and where it currently is. This should take care
+ // of touching any pages necessary.
+ char *foo = alloca(old_esp - new_esp);
+ setsp(foo < new_esp ? foo : new_esp);
+
+ // Now set esp exactly where I want it.
+ // setsp(new_esp);
+
+ copy_frame_down_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp);
+
+ if (needs_tag) {
+ t += align;
+ ((uint32_t*)t)[-1] = c_cilk_ptr_from_stack;
+ }
+
+ return t;
+#else // Not __INTEL_COMPILER
+ // Not supported unless we can figure out how to get the size of the frame
+ return NULL;
+#endif
+}
+
+// This frees the space allocated for a variable length array.
+CILK_ABI(void)
+__cilkrts_stack_free(
+ __cilkrts_stack_frame *sf,
+ void *p,
+ size_t size,
+ size_t distance_from_sp_to_alloca_area,
+ uint32_t align, // same requirements as for align in allocation,
+ // and must match alignment that was passed when
+ // doing the allocation
+ uint32_t known_from_stack // non-zero if this is known to be allocated
+ // on the stack, and therefore has no tag
+)
+{
+#ifdef __INTEL_COMPILER
+ uint32_t *t = (uint32_t*)p;
+
+ // full_size will be a multiple of align, and contains
+ // enough extra space to allocate a marker if one was needed.
+ size_t full_size = (size + align - 1) & ~(align - 1);
+ if (known_from_stack == 0) {
+ // if the compiler hasn't told the run-time that this is
+ // known to be on the stack, then this pointer must have been
+ // tagged such that the run-time can tell.
+ assert(t[-1] == c_cilk_ptr_from_stack ||
+ t[-1] == c_cilk_ptr_from_heap);
+
+ known_from_stack = t[-1] == c_cilk_ptr_from_stack;
+ full_size += align; // accounts for extra space for marker
+ t = (uint32_t *)(((char *)t) - align);
+ }
+
+ if (known_from_stack) {
+ // alloca useage forces an ebp/rbp based stack frame even though
+ // 0 and unused.
+ char *foo = alloca(0);
+ if (sf->worker == 0 || (sf->flags & CILK_FRAME_UNSYNCHED) == 0) {
+ // p was allocated from current stack frame and we
+ // are synced on current stack frame. Return the
+ // amount of the stack that needs to be freed.
+ char *old_ebp = getbp();
+ char *old_esp = getsp();
+
+ // make top_ptr point to base of first parameter.
+ char *top_ptr = ((char *)(_AddressOfReturnAddress()) +
+ sizeof(char *));
+ size_t param_size = 0;
+
+#if defined(__x86_64)
+ // For Intel64 linux & MACH ABI, all the parameters were passed in
+ // register, so top of the stack frame above the return address
+ // is just the size of the return address plus
+ // distance_from_sp_to_alloca_area on the chance that the alloca
+ // area isn't at the very bottom of the calling functions stack.
+#elif defined(__MACH__)
+ // For ia32 MACH, parameter size is always a mutliple of 16
+ // bytes to keep the stack 16 byte aligned. So we need to round
+ // number of parameters up to multiple of 4.
+ param_size = 8 * sizeof(char *);
+#else
+ // For both windows Intel64 ABI, and the IA32 windows and
+ // linux ABIs, space is reserved on the stack for all these
+ // parameters. param_size is 5 * size of a stack slot.
+ param_size = 6 * sizeof(char *);
+#endif
+
+ // now make top_ptr point above the params, or if
+ // distance_from_sp_to_alloca_area is not zero, make
+ // it point above that area. When non-zero,
+ // distance_from_sp_to_alloca area is expected to contain
+ // the parameter space, so we only add one or the other,
+ // not both.
+ top_ptr += (distance_from_sp_to_alloca_area != 0) ?
+ distance_from_sp_to_alloca_area : param_size;
+
+ size_t cur_fm_size = (top_ptr - old_esp);
+ char *new_esp = old_esp + full_size;
+ char *new_ebp = old_ebp + full_size;
+
+ copy_frame_up_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp);
+ setsp(new_esp);
+ }
+ else {
+ // p was allocated on stack frame, but that is
+ // no longer the current stack frame. Need to adjust the
+ // saved esp that is somewhere in the cilk runtime so that
+ // on sync, esp will be cut back correctly.
+ vla_free_from_original_stack(sf, full_size);
+ }
+ }
+ else {
+ vla_internal_heap_free(t, full_size);
+ }
+#else // Not __INTEL_COMPILER
+ // Not supported unless we can figure out how to get the size of the frame
+#endif
+}
diff --git a/libcilkrts/runtime/cilk-abi.c b/libcilkrts/runtime/cilk-abi.c
index fb525154862..be7e1497561 100644
--- a/libcilkrts/runtime/cilk-abi.c
+++ b/libcilkrts/runtime/cilk-abi.c
@@ -2,31 +2,43 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2010-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
+/**
+ * @file cilk-abi.c
+ *
+ * @brief cilk-abi.c implements all of the entrypoints to the Intel Cilk
+ * Plus runtime.
+ */
+
/*
* Define this macro so that compiliation of this file generates the
* non-inlined versions of certain functions in cilk_api.h.
@@ -35,7 +47,6 @@
#include "cilk/cilk_api.h"
#include "cilk/cilk_undocumented.h"
#include "cilktools/cilkscreen.h"
-#include "internal/inspector-abi.h"
#include "global_state.h"
#include "os.h"
@@ -48,6 +59,7 @@
#include "sysdep.h"
#include "except.h"
#include "cilk_malloc.h"
+#include "record-replay.h"
#include <errno.h>
#include <string.h>
@@ -72,7 +84,14 @@ void * _ReturnAddress(void);
#define TBB_INTEROP_DATA_DELAYED_UNTIL_BIND (void *)-1
-// ABI version
+/**
+ * __cilkrts_bind_thread is a versioned entrypoint. The runtime should be
+ * exporting copies of __cilkrts_bind_version for the current and all previous
+ * versions of the ABI.
+ *
+ * This macro should always be set to generate a version to match the current
+ * version; __CILKRTS_ABI_VERSION.
+ */
#define BIND_THREAD_RTN __cilkrts_bind_thread_1
static inline
@@ -125,7 +144,21 @@ CILK_ABI_VOID __cilkrts_enter_frame_fast_1(__cilkrts_stack_frame *sf)
sf->reserved = 0;
}
-/* Return true if undo-detach failed. */
+/**
+ * A component of the THE protocol. __cilkrts_undo_detach checks whether
+ * this frame's parent has been stolen. If it hasn't, the frame can return
+ * normally. If the parent has been stolen, of if we suspect it might be,
+ * then __cilkrts_leave_frame() needs to call into the runtime.
+ *
+ * @note __cilkrts_undo_detach() is comparing the exception pointer against
+ * the tail pointer. The exception pointer is modified when another worker
+ * is considering whether it can steal a frame. The head pointer is updated
+ * to match when the worker lock is taken out and the thief is sure that
+ * it can complete the steal. If the steal cannot be completed, the thief
+ * will restore the exception pointer.
+ *
+ * @return true if undo-detach failed.
+ */
static int __cilkrts_undo_detach(__cilkrts_stack_frame *sf)
{
__cilkrts_worker *w = sf->worker;
@@ -196,7 +229,9 @@ CILK_ABI_VOID __cilkrts_leave_frame(__cilkrts_stack_frame *sf)
#ifndef _WIN32
if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) {
- update_pedigree_on_leave_frame(w, sf);
+// Pedigree will be updated in __cilkrts_leave_frame. We need the
+// pedigree before the update for record/replay
+// update_pedigree_on_leave_frame(w, sf);
__cilkrts_return_exception(sf);
/* If return_exception returns the caller is attached.
leave_frame is called from a cleanup (destructor)
@@ -205,13 +240,19 @@ CILK_ABI_VOID __cilkrts_leave_frame(__cilkrts_stack_frame *sf)
return;
}
#endif
+
+ // During replay, check whether w was the last worker to continue
+ replay_wait_for_steal_if_parent_was_stolen(w);
+
+ // Attempt to undo the detach
if (__builtin_expect(__cilkrts_undo_detach(sf), 0)) {
- // The update of pedigree for leaving the frame occurs
- // inside this call if it does not return.
+ // The update of pedigree for leaving the frame occurs
+ // inside this call if it does not return.
__cilkrts_c_THE_exception_check(w, sf);
}
- update_pedigree_on_leave_frame(w, sf);
+ update_pedigree_on_leave_frame(w, sf);
+
/* This path is taken when undo-detach wins the race with stealing.
Otherwise this strand terminates and the caller will be resumed
via setjmp at sync. */
@@ -247,11 +288,6 @@ CILK_ABI_VOID __cilkrts_sync(__cilkrts_stack_frame *sf)
}
#endif
- /* Save return address so we can report it to Piersol. */
-#ifdef _WIN32
- w->l->sync_return_address = _ReturnAddress();
-#endif
-
__cilkrts_c_sync(w, sf);
}
@@ -322,7 +358,9 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void)
{
__cilkrts_worker *w;
int start_cilkscreen = 0;
+#ifdef USE_ITTNOTIFY
static int unique_obj;
+#endif
// Cannot set this pointer until after __cilkrts_init_internal() call:
global_state_t* g;
@@ -350,15 +388,19 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void)
__cilkrts_cilkscreen_establish_worker(w);
{
full_frame *ff = __cilkrts_make_full_frame(w, 0);
- ff->stack_self = sysdep_make_user_stack(w);
- tbb_interop_use_saved_stack_op_info(w, ff->stack_self);
- w->l->user_thread_imported = 0;
+
+ ff->fiber_self = cilk_fiber_allocate_from_thread();
+ CILK_ASSERT(ff->fiber_self);
+
+ cilk_fiber_set_owner(ff->fiber_self, w);
+ cilk_fiber_tbb_interop_use_saved_stack_op_info(ff->fiber_self);
+
CILK_ASSERT(ff->join_counter == 0);
ff->join_counter = 1;
w->l->frame_ff = ff;
w->reducer_map = __cilkrts_make_reducer_map(w);
__cilkrts_set_leftmost_reducer_map(w->reducer_map, 1);
- load_pedigree_leaf_into_user_worker(w);
+ load_pedigree_leaf_into_user_worker(w);
}
// Make sure that the head and tail are reset, and saved_protected_tail
@@ -371,10 +413,32 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void)
CILK_ASSERT(w->tail == w->l->ltq);
CILK_ASSERT(w->protected_tail == w->ltq_limit);
- if (0 != __cilkrts_sysdep_bind_thread(w))
- // User thread couldn't be bound (probably because of a lack of
- // resources). Continue, but don't allow stealing from this user
- // thread.
+ // There may have been an old pending exception which was freed when the
+ // exception was caught outside of Cilk
+ w->l->pending_exception = NULL;
+
+ w->reserved = NULL;
+
+ // If we've already created a scheduling fiber for this worker, we'll just
+ // reuse it. If w->self < 0, it means that this is an ad-hoc user worker
+ // not known to the global state. Thus, we need to create a scheduling
+ // stack only if we don't already have one and w->self >= 0.
+ if (NULL == w->l->scheduling_fiber && w->self >= 0)
+ {
+ START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) {
+ // Create a scheduling fiber for this worker.
+ w->l->scheduling_fiber =
+ cilk_fiber_allocate_from_heap(CILK_SCHEDULING_STACK_SIZE);
+ cilk_fiber_reset_state(w->l->scheduling_fiber,
+ scheduler_fiber_proc_for_user_worker);
+ cilk_fiber_set_owner(w->l->scheduling_fiber, w);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE);
+ }
+
+ // If the scheduling fiber is NULL, we've either exceeded our quota for
+ // fibers or workers or we're out of memory, so we should lose parallelism
+ // by disallowing stealing.
+ if (NULL == w->l->scheduling_fiber)
__cilkrts_disallow_stealing(w, NULL);
start_cilkscreen = (0 == w->g->Q);
@@ -420,8 +484,8 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void)
*
* For Windows, the aliased symbol is exported in cilk-exports.def.
*/
-#ifdef _DARWIN_C_SOURCE
-/*
+#if defined(_DARWIN_C_SOURCE) || defined(__APPLE__)
+/**
* Mac OS X: Unfortunately, Darwin doesn't allow aliasing, so we just make a
* call and hope the optimizer does the right thing.
*/
@@ -429,18 +493,27 @@ CILK_ABI_WORKER_PTR __cilkrts_bind_thread (void) {
return BIND_THREAD_RTN();
}
#else
-/*
+
+/**
+ * Macro to convert a parameter to a string. Used on Linux or BSD.
+ */
+#define STRINGIFY(x) #x
+
+/**
+ * Macro to generate an __attribute__ for an aliased name
+ */
+#define ALIASED_NAME(x) __attribute__ ((alias (STRINGIFY(x))))
+
+/**
* Linux or BSD: Use the alias attribute to make the labels for the versioned
* functions point to the same place in the code as the original. Using
* the two macros is annoying but required.
*/
-#define STRINGIFY(x) #x
-#define ALIASED_NAME(x) __attribute__ ((alias (STRINGIFY(x))))
CILK_ABI_WORKER_PTR __cilkrts_bind_thread(void)
ALIASED_NAME(BIND_THREAD_RTN);
-#endif // defined _DARWIN_C_SOURCE
+#endif // defined _DARWIN_C_SOURCE || defined __APPLE__
#endif // !defined _MSC_VER
CILK_API_SIZET
@@ -464,60 +537,6 @@ CILK_API_VOID __cilkrts_dump_stats(void)
global_os_mutex_unlock();
}
-/*
- * __cilkrts_get_stack_region_id
- *
- * Interface called by Inspector (Piersol)
- *
- * Returns a __cilkrts_region_id for the stack currently executing on a thread.
- * Returns NULL on failure.
- */
-
-CILK_INSPECTOR_ABI(__cilkrts_region_id)
-__cilkrts_get_stack_region_id(__cilkrts_thread_id thread_id)
-{
- global_state_t *g = cilkg_get_global_state();
- int i;
-
- if (NULL == g)
- return NULL;
-
- for (i = 0; i < g->total_workers; i++)
- {
- if (WORKER_FREE != g->workers[i]->l->type)
- {
- if (__cilkrts_sysdep_is_worker_thread_id(g, i, thread_id))
- return (__cilkrts_region_id)g->workers[i]->l->frame_ff->stack_self;
- }
- }
-
- return NULL;
-}
-
-/*
- * __cilkrts_get_stack_region_properties
- *
- * Interface called by Inspector (Piersol)
- *
- * Fills in the properties for a region_id.
- *
- * Returns false on invalid region_id or improperly sized
- * __cilkrts_region_properties
- */
-
-CILK_INSPECTOR_ABI(int)
-__cilkrts_get_stack_region_properties(__cilkrts_region_id region_id,
- __cilkrts_region_properties *properties)
-{
- if (NULL == properties)
- return 0;
-
- if (properties->size != sizeof(__cilkrts_region_properties))
- return 0;
-
- return __cilkrts_sysdep_get_stack_region_properties((__cilkrts_stack *)region_id, properties);
-}
-
#ifndef _WIN32
CILK_ABI_THROWS_VOID __cilkrts_rethrow(__cilkrts_stack_frame *sf)
{
@@ -535,16 +554,15 @@ static __cilk_tbb_retcode __cilkrts_unwatch_stack(void *data)
{
__cilk_tbb_stack_op_thunk o;
- // If the __cilkrts_stack wasn't available fetch it now
+ // If the cilk_fiber wasn't available fetch it now
if (TBB_INTEROP_DATA_DELAYED_UNTIL_BIND == data)
{
- __cilkrts_stack *sd;
full_frame *ff;
__cilkrts_worker *w = __cilkrts_get_tls_worker();
if (NULL == w)
{
// Free any saved stack op information
- tbb_interop_free_stack_op_info();
+ cilk_fiber_tbb_interop_free_stack_op_info();
return 0; /* Success! */
}
@@ -552,30 +570,28 @@ static __cilk_tbb_retcode __cilkrts_unwatch_stack(void *data)
__cilkrts_worker_lock(w);
ff = w->l->frame_ff;
__cilkrts_frame_lock(w,ff);
- data = ff->stack_self;
+ data = ff->fiber_self;
__cilkrts_frame_unlock(w,ff);
__cilkrts_worker_unlock(w);
}
#if CILK_LIB_DEBUG /* Debug code */
/* Get current stack */
- __cilkrts_stack *sd;
full_frame *ff;
__cilkrts_worker *w = __cilkrts_get_tls_worker();
__cilkrts_worker_lock(w);
ff = w->l->frame_ff;
__cilkrts_frame_lock(w,ff);
- sd = ff->stack_self;
- CILK_ASSERT (data==sd);
+ CILK_ASSERT (data == ff->fiber_self);
__cilkrts_frame_unlock(w,ff);
__cilkrts_worker_unlock(w);
#endif
/* Clear the callback information */
o.data = NULL;
- o.routine = NULL;
- __cilkrts_set_stack_op( (struct __cilkrts_stack*)data, o );
-
+ o.routine = NULL;
+ cilk_fiber_set_stack_op((cilk_fiber*)data, o);
+
// Note. Do *NOT* free any saved stack information here. If they want to
// free the saved stack op information, they'll do it when the thread is
// unbound
@@ -597,7 +613,7 @@ CILK_API_TBB_RETCODE
__cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
__cilk_tbb_stack_op_thunk o)
{
- __cilkrts_stack *sd;
+ cilk_fiber* current_fiber;
__cilkrts_worker *w;
#ifdef _MSC_VER
@@ -612,8 +628,8 @@ __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
{
// Save data for later. We'll deal with it when/if this thread binds
// to the runtime
- tbb_interop_save_stack_op_info(o);
-
+ cilk_fiber_tbb_interop_save_stack_op_info(o);
+
u->routine = __cilkrts_unwatch_stack;
u->data = TBB_INTEROP_DATA_DELAYED_UNTIL_BIND;
@@ -622,7 +638,7 @@ __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
/* Get current stack */
__cilkrts_worker_lock(w);
- sd = w->l->frame_ff->stack_self;
+ current_fiber = w->l->frame_ff->fiber_self;
__cilkrts_worker_unlock(w);
/* CILK_ASSERT( !sd->stack_op_data ); */
@@ -630,9 +646,9 @@ __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u,
/* Give TBB our callback */
u->routine = __cilkrts_unwatch_stack;
- u->data = sd;
+ u->data = current_fiber;
/* Save the callback information */
- __cilkrts_set_stack_op( sd, o );
+ cilk_fiber_set_stack_op(current_fiber, o);
return 0; /* Success! */
}
@@ -702,4 +718,11 @@ __cilkrts_bump_loop_rank_internal(__cilkrts_worker* w)
return 0;
}
+CILK_ABI_VOID
+__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
+{
+ // Pass call onto OS/architecture dependent function
+ sysdep_save_fp_ctrl_state(sf);
+}
+
/* end cilk-abi.c */
diff --git a/libcilkrts/runtime/cilk-ittnotify.h b/libcilkrts/runtime/cilk-ittnotify.h
index 498aa700a3b..cf28a6591ae 100644
--- a/libcilkrts/runtime/cilk-ittnotify.h
+++ b/libcilkrts/runtime/cilk-ittnotify.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2013
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#ifndef INCLUDED_CILK_ITTNOTIFY_DOT_H
@@ -33,6 +38,11 @@
#endif
#include <stdio.h>
+// ITTNOTIFY does not support ARM at this time
+#ifdef __arm__
+#undef USE_ITTNOTIFY
+#endif
+
#ifdef USE_ITTNOTIFY
#include <ittnotify.h>
diff --git a/libcilkrts/runtime/cilk-tbb-interop.h b/libcilkrts/runtime/cilk-tbb-interop.h
index 2972f037292..0ff501b6ddb 100644
--- a/libcilkrts/runtime/cilk-tbb-interop.h
+++ b/libcilkrts/runtime/cilk-tbb-interop.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
diff --git a/libcilkrts/runtime/cilk_api.c b/libcilkrts/runtime/cilk_api.c
index 33a24861a78..277941ba4a7 100644
--- a/libcilkrts/runtime/cilk_api.c
+++ b/libcilkrts/runtime/cilk_api.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/*
diff --git a/libcilkrts/runtime/cilk_fiber-unix.cpp b/libcilkrts/runtime/cilk_fiber-unix.cpp
new file mode 100644
index 00000000000..afdce4e1e03
--- /dev/null
+++ b/libcilkrts/runtime/cilk_fiber-unix.cpp
@@ -0,0 +1,240 @@
+/* cilk_fiber-unix.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+#include "cilk_fiber-unix.h"
+#include "cilk_malloc.h"
+#include "bug.h"
+#include "os.h"
+
+#include <cstdio>
+#include <cstdlib>
+
+#include <alloca.h>
+#include <errno.h>
+#include <sys/mman.h>
+#include <unistd.h>
+
+// MAP_ANON is deprecated on Linux, but seems to be required on Mac...
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// Magic number for sanity checking fiber structure
+const unsigned magic_number = 0x5afef00d;
+
+int cilk_fiber_sysdep::s_page_size = getpagesize();
+
+cilk_fiber_sysdep::cilk_fiber_sysdep(std::size_t stack_size)
+ : cilk_fiber(stack_size)
+ , m_magic(magic_number)
+{
+ // Set m_stack and m_stack_base.
+ make_stack(stack_size);
+
+ // Get high-address of stack, with 32-bytes of spare space, and rounded
+ // down to the nearest 32-byte boundary.
+ const uintptr_t align_mask = 32 - 1;
+ m_stack_base -= ((std::size_t) m_stack_base) & align_mask;
+}
+
+cilk_fiber_sysdep::cilk_fiber_sysdep(from_thread_t)
+ : cilk_fiber()
+ , m_magic(magic_number)
+{
+ this->set_allocated_from_thread(true);
+
+ // Dummy stack data for thread-main fiber
+ m_stack = NULL;
+ m_stack_base = NULL;
+}
+
+void cilk_fiber_sysdep::convert_fiber_back_to_thread()
+{
+ // Does nothing on Linux.
+}
+
+cilk_fiber_sysdep::~cilk_fiber_sysdep()
+{
+ CILK_ASSERT(magic_number == m_magic);
+ if (!this->is_allocated_from_thread())
+ free_stack();
+}
+
+#if SUPPORT_GET_CURRENT_FIBER
+cilk_fiber_sysdep* cilk_fiber_sysdep::get_current_fiber_sysdep()
+{
+ return cilkos_get_tls_cilk_fiber();
+}
+#endif
+
+// Jump to resume other fiber. We may or may not come back.
+inline void cilk_fiber_sysdep::resume_other_sysdep(cilk_fiber_sysdep* other)
+{
+ if (other->is_resumable()) {
+ other->set_resumable(false);
+ // Resume by longjmp'ing to the place where we suspended.
+ CILK_LONGJMP(other->m_resume_jmpbuf);
+ }
+ else {
+ // Otherwise, we've never ran this fiber before. Start the
+ // proc method.
+ other->run();
+ }
+}
+
+void cilk_fiber_sysdep::suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other)
+{
+#if SUPPORT_GET_CURRENT_FIBER
+ cilkos_set_tls_cilk_fiber(other);
+#endif
+ CILK_ASSERT(this->is_resumable());
+
+
+ // Jump to the other fiber. We expect to come back.
+ if (! CILK_SETJMP(m_resume_jmpbuf)) {
+ resume_other_sysdep(other);
+ }
+
+ // Return here when another fiber resumes me.
+ // If the fiber that switched to me wants to be deallocated, do it now.
+ do_post_switch_actions();
+}
+
+NORETURN cilk_fiber_sysdep::jump_to_resume_other_sysdep(cilk_fiber_sysdep* other)
+{
+#if SUPPORT_GET_CURRENT_FIBER
+ cilkos_set_tls_cilk_fiber(other);
+#endif
+ CILK_ASSERT(!this->is_resumable());
+
+ // Jump to the other fiber. But we are never coming back because
+ // this fiber is being reset.
+ resume_other_sysdep(other);
+
+ // We should never come back here...
+ __cilkrts_bug("Should not get here");
+}
+
+
+NORETURN cilk_fiber_sysdep::run()
+{
+ // Only fibers created from a pool have a proc method to run and execute.
+ CILK_ASSERT(m_start_proc);
+ CILK_ASSERT(!this->is_allocated_from_thread());
+ CILK_ASSERT(!this->is_resumable());
+
+ // TBD: This setjmp/longjmp pair simply changes the stack pointer.
+ // We could probably replace this code with some assembly.
+ if (! CILK_SETJMP(m_resume_jmpbuf))
+ {
+ // Change stack pointer to fiber stack
+ JMPBUF_SP(m_resume_jmpbuf) = m_stack_base;
+ CILK_LONGJMP(m_resume_jmpbuf);
+ }
+
+ // Verify that 1) 'this' is still valid and 2) '*this' has not been
+ // corrupted.
+ CILK_ASSERT(magic_number == m_magic);
+
+ // If the fiber that switched to me wants to be deallocated, do it now.
+ do_post_switch_actions();
+
+ // Now call the user proc on the new stack
+ m_start_proc(this);
+
+ // alloca() to force generation of frame pointer. The argument to alloca
+ // is contrived to prevent the compiler from optimizing it away. This
+ // code should never actually be executed.
+ int* dummy = (int*) alloca((sizeof(int) + (std::size_t) m_start_proc) & 0x1);
+ *dummy = 0xface;
+
+ // User proc should never return.
+ __cilkrts_bug("Should not get here");
+}
+
+void cilk_fiber_sysdep::make_stack(size_t stack_size)
+{
+ char* p;
+ // We've already validated that the stack size is page-aligned and
+ // is a reasonable value. No need to do any extra rounding here.
+ size_t rounded_stack_size = stack_size;
+
+ // Normally, we have already validated that the stack size is
+ // aligned to 4K. In the rare case that pages are huge though, we
+ // need to do some extra checks.
+ if (rounded_stack_size < 3 * (size_t)s_page_size) {
+ // If the specified stack size is too small, round up to 3
+ // pages. We need at least 2 extra for the guard pages.
+ rounded_stack_size = 3 * (size_t)s_page_size;
+ }
+ else {
+ // Otherwise, the stack size is large enough, but might not be
+ // a multiple of page size. Round up to nearest multiple of
+ // s_page_size, just to be safe.
+ size_t remainder = rounded_stack_size % s_page_size;
+ if (remainder) {
+ rounded_stack_size += s_page_size - remainder;
+ }
+ }
+
+ p = (char*)mmap(0, rounded_stack_size,
+ PROT_READ|PROT_WRITE,
+ MAP_PRIVATE|MAP_ANONYMOUS,
+ -1, 0);
+ if (MAP_FAILED == p) {
+ // For whatever reason (probably ran out of memory), mmap() failed.
+ // There is no stack to return, so the program loses parallelism.
+ m_stack = NULL;
+ m_stack_base = NULL;
+ return;
+ }
+
+ // mprotect guard pages.
+ mprotect(p + rounded_stack_size - s_page_size, s_page_size, PROT_NONE);
+ mprotect(p, s_page_size, PROT_NONE);
+
+ m_stack = p;
+ m_stack_base = p + rounded_stack_size - s_page_size;
+}
+
+
+void cilk_fiber_sysdep::free_stack()
+{
+ if (m_stack) {
+ size_t rounded_stack_size = m_stack_base - m_stack + s_page_size;
+ if (munmap(m_stack, rounded_stack_size) < 0)
+ __cilkrts_bug("Cilk: stack munmap failed error %d\n", errno);
+ }
+}
+
+/* End cilk_fiber-unix.cpp */
diff --git a/libcilkrts/runtime/cilk_fiber-unix.h b/libcilkrts/runtime/cilk_fiber-unix.h
new file mode 100644
index 00000000000..5665bd576bf
--- /dev/null
+++ b/libcilkrts/runtime/cilk_fiber-unix.h
@@ -0,0 +1,144 @@
+/* cilk_fiber-unix.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+#ifndef INCLUDED_CILK_FIBER_UNIX_DOT_H
+#define INCLUDED_CILK_FIBER_UNIX_DOT_H
+
+#ifndef __cplusplus
+# error cilk_fiber-unix.h is a C++-only header
+#endif
+
+#include "cilk_fiber.h"
+#include "jmpbuf.h"
+
+/**
+ * @file cilk_fiber-unix.h
+ *
+ * @brief Unix-specific implementation for cilk_fiber.
+ */
+
+/**
+ * @brief Unix-specific fiber class derived from portable fiber class
+ */
+struct cilk_fiber_sysdep : public cilk_fiber
+{
+ public:
+
+#if SUPPORT_GET_CURRENT_FIBER
+ /**
+ * @brief Gets the current fiber from TLS.
+ */
+ static cilk_fiber_sysdep* get_current_fiber_sysdep();
+#endif
+
+ /**
+ * @brief Construct the system-dependent portion of a fiber.
+ *
+ * @param stack_size The size of the stack for this fiber.
+ */
+ cilk_fiber_sysdep(std::size_t stack_size);
+
+ /**
+ * @brief Construct the system-dependent of a fiber created from a
+ * thread.
+ */
+ cilk_fiber_sysdep(from_thread_t);
+
+ /**
+ * @brief Destructor
+ */
+ ~cilk_fiber_sysdep();
+
+ /**
+ * @brief OS-specific calls to convert this fiber back to thread.
+ *
+ * Nothing to do for Linux.
+ */
+ void convert_fiber_back_to_thread();
+
+ /**
+ * @brief System-dependent function to suspend self and resume execution of "other".
+ *
+ * This fiber is suspended.
+ *
+ * @pre @c is_resumable() should be true.
+ *
+ * @param other Fiber to resume.
+ */
+ void suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other);
+
+ /**
+ * @brief System-dependent function called to jump to @p other
+ * fiber.
+ *
+ * @pre @c is_resumable() should be false.
+ *
+ * @param other Fiber to resume.
+ */
+ NORETURN jump_to_resume_other_sysdep(cilk_fiber_sysdep* other);
+
+ /**
+ * @brief Runs the start_proc.
+ * @pre is_resumable() should be false.
+ * @pre is_allocated_from_thread() should be false.
+ * @pre m_start_proc must be valid.
+ */
+ NORETURN run();
+
+ /**
+ * @brief Returns the base of this fiber's stack.
+ */
+ inline char* get_stack_base_sysdep() { return m_stack_base; }
+
+ private:
+ char* m_stack_base; ///< The base of this fiber's stack.
+ char* m_stack; // Stack memory (low address)
+ __CILK_JUMP_BUFFER m_resume_jmpbuf; // Place to resume fiber
+ unsigned m_magic; // Magic number for checking
+
+ static int s_page_size; // Page size for
+ // stacks.
+
+ // Allocate memory for a stack. This method
+ // initializes m_stack and m_stack_base.
+ void make_stack(size_t stack_size);
+
+ // Deallocates memory for the stack.
+ void free_stack();
+
+ // Common helper method for implementation of resume_other_sysdep
+ // variants.
+ inline void resume_other_sysdep(cilk_fiber_sysdep* other);
+};
+
+#endif // ! defined(INCLUDED_CILK_FIBER_UNIX_DOT_H)
diff --git a/libcilkrts/runtime/cilk_fiber.cpp b/libcilkrts/runtime/cilk_fiber.cpp
new file mode 100644
index 00000000000..aee09875755
--- /dev/null
+++ b/libcilkrts/runtime/cilk_fiber.cpp
@@ -0,0 +1,1073 @@
+/* cilk_fiber.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+/* Implementations of non-platform-specific aspects of cilk_fiber, especially
+ * the cilk_fiber_pool interface.
+ */
+#include "cilk_fiber.h"
+#ifdef _WIN32
+# include "cilk_fiber-win.h"
+#else
+# include "cilk_fiber-unix.h"
+#endif
+#include "cilk_malloc.h"
+#include "bug.h"
+#include <new>
+
+#include <climits>
+#include <cstdio>
+#include <cstdlib>
+#include <cstring>
+
+#include "sysdep.h"
+
+
+extern "C" {
+
+inline int cilk_fiber_pool_sanity_check(cilk_fiber_pool *pool, const char* desc)
+{
+ int errors = 0;
+#if FIBER_DEBUG >= 1
+ if ((NULL != pool) && pool->total > 0) {
+
+ // Root pool should not allocate more fibers than alloc_max
+ errors += ((pool->parent == NULL) &&
+ (pool->total > pool->alloc_max));
+ errors += (pool->total > pool->high_water);
+
+ if (errors) {
+ fprintf(stderr, "ERROR at %s: pool=%p has max_size=%u, total=%d, high_water=%d\n",
+ desc,
+ pool, pool->max_size, pool->total, pool->high_water);
+ }
+ }
+#endif
+ return (errors == 0);
+}
+
+inline void increment_pool_total(cilk_fiber_pool* pool)
+{
+ ++pool->total;
+ if (pool->high_water < pool->total)
+ pool->high_water = pool->total;
+}
+
+inline void decrement_pool_total(cilk_fiber_pool* pool, int fibers_freed)
+{
+ pool->total -= fibers_freed;
+}
+
+
+/**
+ * @brief Free fibers from this pool until we have at most @c
+ * num_to_keep fibers remaining, and then put a fiber back.
+ *
+ * @pre We do not hold @c pool->lock
+ * @post After completion, we do not hold @c pool->lock
+ */
+static void cilk_fiber_pool_free_fibers_from_pool(cilk_fiber_pool* pool,
+ unsigned num_to_keep,
+ cilk_fiber* fiber_to_return)
+{
+ // Free our own fibers, until we fall below our desired threshold.
+ // Each iteration of this loop proceeds in the following stages:
+ // 1. Acquire the pool lock,
+ // 2. Grabs up to B fibers from the pool, stores them into a buffer.
+ // 3. Check if pool is empty enough. If yes, put the last fiber back,
+ // and remember that we should quit.
+ // 4. Release the pool lock, and actually free any buffered fibers.
+ // 5. Check if we are done and should exit the loop. Otherwise, try again.
+ //
+ const bool need_lock = pool->lock;
+ bool last_fiber_returned = false;
+
+ do {
+ const int B = 10; // Pull at most this many fibers from the
+ // parent for one lock acquisition. Make
+ // this value large enough to amortize
+ // against the cost of acquiring and
+ // releasing the lock.
+ int num_to_free = 0;
+ cilk_fiber* fibers_to_free[B];
+
+ // Stage 1: Grab the lock.
+ if (need_lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // Stage 2: Grab up to B fibers to free.
+ int fibers_freed = 0;
+ while ((pool->size > num_to_keep) && (num_to_free < B)) {
+ fibers_to_free[num_to_free++] = pool->fibers[--pool->size];
+ fibers_freed++;
+ }
+ decrement_pool_total(pool, fibers_freed);
+
+ // Stage 3. Pool is below threshold. Put extra fiber back.
+ if (pool->size <= num_to_keep) {
+ // Put the last fiber back into the pool.
+ if (fiber_to_return) {
+ CILK_ASSERT(pool->size < pool->max_size);
+ pool->fibers[pool->size] = fiber_to_return;
+ pool->size++;
+ }
+ last_fiber_returned = true;
+ }
+
+ // Stage 4: Release the lock, and actually free any fibers
+ // buffered.
+ if (need_lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ for (int i = 0; i < num_to_free; ++i) {
+ fibers_to_free[i]->deallocate_to_heap();
+ }
+
+ } while (!last_fiber_returned);
+}
+
+
+/******************************************************************
+ * TBD: We want to simplify / rework the logic for allocating and
+ * deallocating fibers, so that they are hopefully simpler and work
+ * more elegantly for more than two levels.
+ ******************************************************************/
+
+/**
+ * @brief Transfer fibers from @c pool to @c pool->parent.
+ *
+ * @pre Must hold @c pool->lock if it exists.
+ * @post After completion, some number of fibers
+ * have been moved from this pool to the parent.
+ * The lock @c pool->lock is still held.
+ *
+ * TBD: Do we wish to guarantee that the lock has never been
+ * released? It may depend on the implementation...
+ */
+static void cilk_fiber_pool_move_fibers_to_parent_pool(cilk_fiber_pool* pool,
+ unsigned num_to_keep)
+{
+ // ASSERT: We should hold the lock on pool (if it has one).
+ CILK_ASSERT(pool->parent);
+ cilk_fiber_pool* parent_pool = pool->parent;
+
+ // Move fibers from our pool to the parent until we either run out
+ // of space in the parent, or hit our threshold.
+ //
+ // This operation must be done while holding the parent lock.
+
+ // If the parent pool appears to be full, just return early.
+ if (parent_pool->size >= parent_pool->max_size)
+ return;
+
+ spin_mutex_lock(pool->parent->lock);
+ while ((parent_pool->size < parent_pool->max_size) &&
+ (pool->size > num_to_keep)) {
+ parent_pool->fibers[parent_pool->size++] =
+ pool->fibers[--pool->size];
+ }
+
+ // If the child pool has deallocated more than fibers to the heap
+ // than it has allocated, then transfer this "surplus" to the
+ // parent, so that the parent is free to allocate more from the
+ // heap.
+ //
+ // This transfer means that the total in the parent can
+ // temporarily go negative.
+ if (pool->total < 0) {
+ // Reduce parent total by the surplus we have in the local
+ // pool.
+ parent_pool->total += pool->total;
+ pool->total = 0;
+ }
+
+ spin_mutex_unlock(pool->parent->lock);
+}
+
+void cilk_fiber_pool_init(cilk_fiber_pool* pool,
+ cilk_fiber_pool* parent,
+ size_t stack_size,
+ unsigned buffer_size,
+ int alloc_max,
+ int is_shared)
+{
+#if FIBER_DEBUG >= 1
+ fprintf(stderr, "fiber_pool_init, pool=%p, parent=%p, alloc_max=%u\n",
+ pool, parent, alloc_max);
+#endif
+
+ pool->lock = (is_shared ? spin_mutex_create() : NULL);
+ pool->parent = parent;
+ pool->stack_size = stack_size;
+ pool->max_size = buffer_size;
+ pool->size = 0;
+ pool->total = 0;
+ pool->high_water = 0;
+ pool->alloc_max = alloc_max;
+ pool->fibers =
+ (cilk_fiber**) __cilkrts_malloc(buffer_size * sizeof(cilk_fiber*));
+ CILK_ASSERT(NULL != pool->fibers);
+
+#ifdef __MIC__
+#define PREALLOCATE_FIBERS
+#endif
+
+#ifdef PREALLOCATE_FIBERS
+ // Pre-allocate 1/4 of fibers in the pools ahead of time. This
+ // value is somewhat arbitrary. It was chosen to be less than the
+ // threshold (of about 3/4) of fibers to keep in the pool when
+ // transferring fibers to the parent.
+
+ int pre_allocate_count = buffer_size/4;
+ for (pool->size = 0; pool->size < pre_allocate_count; pool->size++) {
+ pool->fibers[pool->size] = cilk_fiber::allocate_from_heap(pool->stack_size);
+ }
+#endif
+}
+
+
+void cilk_fiber_pool_set_fiber_limit(cilk_fiber_pool* root_pool,
+ unsigned max_fibers_to_allocate)
+{
+ // Should only set limit on root pool, not children.
+ CILK_ASSERT(NULL == root_pool->parent);
+ root_pool->alloc_max = max_fibers_to_allocate;
+}
+
+void cilk_fiber_pool_destroy(cilk_fiber_pool* pool)
+{
+ CILK_ASSERT(cilk_fiber_pool_sanity_check(pool, "pool_destroy"));
+
+ // Lock my own pool, if I need to.
+ if (pool->lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // Give any remaining fibers to parent pool.
+ if (pool->parent) {
+ cilk_fiber_pool_move_fibers_to_parent_pool(pool, 0);
+ }
+
+ // Unlock pool.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // If I have any left in my pool, just free them myself.
+ // This method may acquire the pool lock.
+ cilk_fiber_pool_free_fibers_from_pool(pool, 0, NULL);
+
+ // Destroy the lock if there is one.
+ if (pool->lock) {
+ spin_mutex_destroy(pool->lock);
+ }
+ __cilkrts_free(pool->fibers);
+}
+
+
+cilk_fiber* cilk_fiber_allocate(cilk_fiber_pool* pool)
+{
+ CILK_ASSERT(cilk_fiber_pool_sanity_check(pool, "allocate"));
+ return cilk_fiber::allocate(pool);
+}
+
+cilk_fiber* cilk_fiber_allocate_from_heap(size_t stack_size)
+{
+ return cilk_fiber::allocate_from_heap(stack_size);
+}
+
+void cilk_fiber_reset_state(cilk_fiber* fiber, cilk_fiber_proc start_proc)
+{
+ fiber->reset_state(start_proc);
+}
+
+int cilk_fiber_remove_reference(cilk_fiber *fiber, cilk_fiber_pool *pool)
+{
+ return fiber->remove_reference(pool);
+}
+
+cilk_fiber* cilk_fiber_allocate_from_thread()
+{
+ return cilk_fiber::allocate_from_thread();
+}
+
+int cilk_fiber_deallocate_from_thread(cilk_fiber *fiber)
+{
+ return fiber->deallocate_from_thread();
+}
+
+int cilk_fiber_remove_reference_from_thread(cilk_fiber *fiber)
+{
+ return fiber->remove_reference_from_thread();
+}
+
+int cilk_fiber_is_allocated_from_thread(cilk_fiber *fiber)
+{
+ return fiber->is_allocated_from_thread();
+}
+
+#if SUPPORT_GET_CURRENT_FIBER
+cilk_fiber* cilk_fiber_get_current_fiber(void)
+{
+ return cilk_fiber::get_current_fiber();
+}
+#endif
+
+void cilk_fiber_suspend_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber* other)
+{
+ self->suspend_self_and_resume_other(other);
+}
+
+
+void cilk_fiber::reset_state(cilk_fiber_proc start_proc)
+{
+ // Setup the fiber and return.
+ this->m_start_proc = start_proc;
+
+ CILK_ASSERT(!this->is_resumable());
+ CILK_ASSERT(NULL == this->m_pending_remove_ref);
+ CILK_ASSERT(NULL == this->m_pending_pool);
+}
+
+NORETURN
+cilk_fiber_remove_reference_from_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber_pool* self_pool,
+ cilk_fiber* other)
+{
+#if FIBER_DEBUG >= 3
+ __cilkrts_worker* w = __cilkrts_get_tls_worker();
+ fprintf(stderr, "W=%d: cilk_fiber_deactivate_self_and_resume_other: self=%p, other=%p\n",
+ w->self,
+ self, other);
+#endif
+ CILK_ASSERT(cilk_fiber_pool_sanity_check(self_pool, "remove_reference_from_self_resume_other"));
+ self->remove_reference_from_self_and_resume_other(self_pool, other);
+
+ // We should never return here.
+}
+
+void cilk_fiber_set_post_switch_proc(cilk_fiber *self,
+ cilk_fiber_proc post_switch_proc)
+{
+ self->set_post_switch_proc(post_switch_proc);
+}
+
+void cilk_fiber_invoke_tbb_stack_op(cilk_fiber* fiber,
+ __cilk_tbb_stack_op op)
+{
+ fiber->invoke_tbb_stack_op(op);
+}
+
+cilk_fiber_data* cilk_fiber_get_data(cilk_fiber* fiber)
+{
+ return fiber->get_data();
+
+ /// TBD: Change this code to "return (cilk_fiber_data*)fiber;"
+ // plus a static assert, so that this function is
+ // more easily inlined by the compiler.
+}
+
+int cilk_fiber_is_resumable(cilk_fiber *fiber)
+{
+ return fiber->is_resumable();
+}
+
+char* cilk_fiber_get_stack_base(cilk_fiber *fiber)
+{
+ return fiber->get_stack_base();
+}
+
+
+#if defined(_WIN32) && 0 // Only works on Windows. Disable debugging for now.
+#define DBG_STACK_OPS(_fmt, ...) __cilkrts_dbgprintf(_fmt, __VA_ARGS__)
+#else
+#define DBG_STACK_OPS(_fmt, ...)
+#endif
+
+void cilk_fiber_set_stack_op(cilk_fiber *fiber,
+ __cilk_tbb_stack_op_thunk o)
+{
+ cilk_fiber_data *fdata = cilk_fiber_get_data(fiber);
+ DBG_STACK_OPS ("cilk_fiber_set_stack_op - cilk_fiber %p, routine: %p, data: %p\n",
+ fiber,
+ o.routine,
+ o.data);
+ fdata->stack_op_routine = o.routine;
+ fdata->stack_op_data = o.data;
+}
+
+#if 0 // Debugging function
+static
+const char *NameStackOp (enum __cilk_tbb_stack_op op)
+{
+ switch(op)
+ {
+ case CILK_TBB_STACK_ORPHAN: return "CILK_TBB_STACK_ORPHAN";
+ case CILK_TBB_STACK_ADOPT: return "CILK_TBB_STACK_ADOPT";
+ case CILK_TBB_STACK_RELEASE: return "CILK_TBB_STACK_RELEASE";
+ default: return "Unknown";
+ }
+}
+#endif
+
+/*
+ * Save TBB interop information for an unbound thread. It will get picked
+ * up when the thread is bound to the runtime.
+ */
+void cilk_fiber_tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk =
+ __cilkrts_get_tls_tbb_interop();
+
+ DBG_STACK_OPS("Calling save_stack_op; o.routine=%p, o.data=%p, saved_thunk=%p\n",
+ o.routine, o.data, saved_thunk);
+
+ // If there is not already space allocated, allocate some.
+ if (NULL == saved_thunk) {
+ saved_thunk = (__cilk_tbb_stack_op_thunk*)
+ __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk));
+ __cilkrts_set_tls_tbb_interop(saved_thunk);
+ }
+
+ *saved_thunk = o;
+
+ DBG_STACK_OPS ("Unbound Thread %04x: tbb_interop_save_stack_op_info - saved info\n",
+ cilkos_get_current_thread_id());
+}
+
+/*
+ * Save TBB interop information from the cilk_fiber. It will get picked
+ * up when the thread is bound to the runtime next time.
+ */
+void cilk_fiber_tbb_interop_save_info_from_stack(cilk_fiber *fiber)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk;
+ cilk_fiber_data* fdata;
+
+ if (NULL == fiber)
+ return;
+
+ fdata = cilk_fiber_get_data(fiber);
+ // If there is no TBB interop data, just return
+ if (NULL == fdata->stack_op_routine)
+ return;
+
+ saved_thunk = __cilkrts_get_tls_tbb_interop();
+
+ // If there is not already space allocated, allocate some.
+ if (NULL == saved_thunk) {
+ saved_thunk = (__cilk_tbb_stack_op_thunk*)
+ __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk));
+ __cilkrts_set_tls_tbb_interop(saved_thunk);
+ }
+
+ saved_thunk->routine = fdata->stack_op_routine;
+ saved_thunk->data = fdata->stack_op_data;
+}
+
+/*
+ * If there's TBB interop information that was saved before the thread was
+ * bound, apply it now
+ */
+void cilk_fiber_tbb_interop_use_saved_stack_op_info(cilk_fiber* fiber)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk =
+ __cilkrts_get_tls_tbb_interop();
+
+ CILK_ASSERT(fiber);
+ // If we haven't allocated a TBB interop index, we don't have any saved info
+ if (NULL == saved_thunk) {
+ DBG_STACK_OPS ("cilk_fiber %p: tbb_interop_use_saved_stack_op_info - no saved info\n",
+ fiber);
+ return;
+ }
+
+ DBG_STACK_OPS ("cilk_fiber %p: tbb_interop_use_saved_stack_op_info - using saved info\n",
+ fiber);
+
+ // Associate the saved info with the __cilkrts_stack
+ cilk_fiber_set_stack_op(fiber, *saved_thunk);
+
+ // Free the saved data. We'll save it again if needed when the code
+ // returns from the initial function
+ cilk_fiber_tbb_interop_free_stack_op_info();
+}
+
+/*
+ * Free saved TBB interop memory. Should only be called when the thread is
+ * not bound.
+ */
+void cilk_fiber_tbb_interop_free_stack_op_info(void)
+{
+ __cilk_tbb_stack_op_thunk *saved_thunk =
+ __cilkrts_get_tls_tbb_interop();
+
+ // If we haven't allocated a TBB interop index, we don't have any saved info
+ if (NULL == saved_thunk)
+ return;
+
+ DBG_STACK_OPS ("tbb_interop_free_stack_op_info - freeing saved info\n");
+
+ // Free the memory and wipe out the TLS value
+ __cilkrts_free(saved_thunk);
+ __cilkrts_set_tls_tbb_interop(NULL);
+}
+
+
+
+#if NEED_FIBER_REF_COUNTS
+int cilk_fiber_has_references(cilk_fiber *fiber)
+{
+ return (fiber->get_ref_count() > 0);
+}
+
+int cilk_fiber_get_ref_count(cilk_fiber *fiber)
+{
+ return fiber->get_ref_count();
+}
+
+void cilk_fiber_add_reference(cilk_fiber *fiber)
+{
+ fiber->inc_ref_count();
+}
+#endif // NEED_FIBER_REF_COUNTS
+
+
+} // End extern "C"
+
+
+cilk_fiber_sysdep* cilk_fiber::sysdep()
+{
+ return static_cast<cilk_fiber_sysdep*>(this);
+}
+
+
+cilk_fiber::cilk_fiber()
+ : m_start_proc(NULL)
+ , m_post_switch_proc(NULL)
+ , m_pending_remove_ref(NULL)
+ , m_pending_pool(NULL)
+ , m_flags(0)
+{
+ // Clear cilk_fiber_data base-class data members
+ std::memset((cilk_fiber_data*) this, 0, sizeof(cilk_fiber_data));
+
+ // cilk_fiber data members
+ init_ref_count(0);
+}
+
+cilk_fiber::cilk_fiber(std::size_t stack_size)
+{
+ *this = cilk_fiber(); // A delegating constructor would be nice here
+ this->stack_size = stack_size;
+}
+
+cilk_fiber::~cilk_fiber()
+{
+ // Empty destructor.
+}
+
+
+char* cilk_fiber::get_stack_base()
+{
+ return this->sysdep()->get_stack_base_sysdep();
+}
+
+cilk_fiber* cilk_fiber::allocate_from_heap(std::size_t stack_size)
+{
+ // Case 1: pool is NULL. create a new fiber from the heap
+ // No need for locks here.
+ cilk_fiber_sysdep* ret =
+ (cilk_fiber_sysdep*) __cilkrts_malloc(sizeof(cilk_fiber_sysdep));
+
+ // Error condition. If we failed to allocate a fiber from the
+ // heap, we are in trouble though...
+ if (!ret)
+ return NULL;
+
+ ::new(ret) cilk_fiber_sysdep(stack_size);
+
+ CILK_ASSERT(0 == ret->m_flags);
+ CILK_ASSERT(NULL == ret->m_pending_remove_ref);
+ CILK_ASSERT(NULL == ret->m_pending_pool);
+ ret->init_ref_count(1);
+ return ret;
+}
+
+
+#if USE_FIBER_TRY_ALLOCATE_FROM_POOL
+/**
+ * Helper method: try to allocate a fiber from this pool or its
+ * ancestors without going to the OS / heap.
+ *
+ * Returns allocated pool, or NULL if no pool is found.
+ *
+ * If pool contains a suitable fiber. Return it. Otherwise, try to
+ * recursively grab a fiber from the parent pool, if there is one.
+ *
+ * This method will not allocate a fiber from the heap.
+ *
+ * This method could be written either recursively or iteratively.
+ * It probably does not matter which one we do.
+ *
+ * @note This method is compiled, but may not be used unless the
+ * USE_FIBER_TRY_ALLOCATE_FROM_POOL switch is set.
+ */
+cilk_fiber* cilk_fiber::try_allocate_from_pool_recursive(cilk_fiber_pool* pool)
+{
+ cilk_fiber* ret = NULL;
+
+ if (pool->size > 0) {
+ // Try to get the lock.
+ if (pool->lock) {
+ // For some reason, it seems to be better to just block on the parent
+ // pool lock, instead of using a try-lock?
+#define USE_TRY_LOCK_IN_FAST_ALLOCATE 0
+#if USE_TRY_LOCK_IN_FAST_ALLOCATE
+ int got_lock = spin_mutex_trylock(pool->lock);
+ if (!got_lock) {
+ // If we fail, skip to the parent.
+ if (pool->parent) {
+ return try_allocate_from_pool_recursive(pool->parent);
+ }
+ }
+#else
+ spin_mutex_lock(pool->lock);
+#endif
+ }
+
+ // Check in the pool if we have the lock.
+ if (pool->size > 0) {
+ ret = pool->fibers[--pool->size];
+ }
+
+ // Release the lock once we are done updating pool fields.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+ }
+
+ if ((!ret) && (pool->parent)) {
+ return try_allocate_from_pool_recursive(pool->parent);
+ }
+
+ if (ret) {
+ // When we pull a fiber out of the pool, set its reference
+ // count before we return it.
+ ret->init_ref_count(1);
+ }
+ return ret;
+}
+#endif // USE_FIBER_TRY_ALLOCATE_FROM_POOL
+
+
+cilk_fiber* cilk_fiber::allocate(cilk_fiber_pool* pool)
+{
+ // Pool should not be NULL in this method. But I'm not going to
+ // actually assert it, because we are likely to seg fault anyway
+ // if it is.
+ // CILK_ASSERT(NULL != pool);
+
+ cilk_fiber *ret = NULL;
+
+#if USE_FIBER_TRY_ALLOCATE_FROM_POOL
+ // "Fast" path, which doesn't go to the heap or OS until checking
+ // the ancestors first.
+ ret = try_allocate_from_pool_recursive(pool);
+ if (ret)
+ return ret;
+#endif
+
+ // If we don't get anything from the "fast path", then go through
+ // a slower path to look for a fiber.
+ //
+ // 1. Lock the pool if it is shared.
+ // 2. Look in our local pool. If we find one, release the lock
+ // and quit searching.
+ // 3. Otherwise, check whether we can allocate from heap.
+ // 4. Release the lock if it was acquired.
+ // 5. Try to allocate from the heap, if step 3 said we could.
+ // If we find a fiber, then quit searching.
+ // 6. If none of these steps work, just recursively try again
+ // from the parent.
+
+ // 1. Lock the pool if it is shared.
+ if (pool->lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // 2. Look in local pool.
+ if (pool->size > 0) {
+ ret = pool->fibers[--pool->size];
+ if (ret) {
+ // If we found one, release the lock once we are
+ // done updating pool fields, and break out of the
+ // loop.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // When we pull a fiber out of the pool, set its reference
+ // count just in case.
+ ret->init_ref_count(1);
+ return ret;
+ }
+ }
+
+ // 3. Check whether we can allocate from the heap.
+ bool can_allocate_from_heap = false;
+ if (pool->total < pool->alloc_max) {
+ // Track that we are allocating a new fiber from the
+ // heap, originating from this pool.
+ // This increment may be undone if we happen to fail to
+ // allocate from the heap.
+ increment_pool_total(pool);
+ can_allocate_from_heap = true;
+ }
+
+ // 4. Unlock the pool, and then allocate from the heap.
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // 5. Actually try to allocate from the heap / OS.
+ if (can_allocate_from_heap) {
+ ret = allocate_from_heap(pool->stack_size);
+ // If we got something from the heap, just return it.
+ if (ret) {
+ return ret;
+ }
+
+ // Otherwise, we failed in our attempt to allocate a
+ // fiber from the heap. Grab the lock and decrement
+ // the total again.
+ if (pool->lock) {
+ spin_mutex_lock(pool->lock);
+ }
+ decrement_pool_total(pool, 1);
+ if (pool->lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+ }
+
+ // 6. If we get here, then searching this pool failed. Go search
+ // the parent instead if we have one.
+ if (pool->parent) {
+ return allocate(pool->parent);
+ }
+
+ return ret;
+}
+
+int cilk_fiber::remove_reference(cilk_fiber_pool* pool)
+{
+ int ref_count = this->dec_ref_count();
+ if (ref_count == 0) {
+ if (pool) {
+ deallocate_self(pool);
+ }
+ else {
+ deallocate_to_heap();
+ }
+ }
+ return ref_count;
+}
+
+cilk_fiber* cilk_fiber::allocate_from_thread()
+{
+ void* retmem = __cilkrts_malloc(sizeof(cilk_fiber_sysdep));
+ CILK_ASSERT(retmem);
+ cilk_fiber_sysdep* ret = ::new(retmem) cilk_fiber_sysdep(from_thread);
+
+ // A fiber allocated from a thread begins with a reference count
+ // of 2. The first is for being created, and the second is for
+ // being running.
+ //
+ // Suspending this fiber will decrement the count down to 1.
+ ret->init_ref_count(2);
+
+#if SUPPORT_GET_CURRENT_FIBER
+ // We're creating the main fiber for this thread. Set this fiber as the
+ // current fiber.
+ cilkos_set_tls_cilk_fiber(ret);
+#endif
+ return ret;
+}
+
+int cilk_fiber::deallocate_from_thread()
+{
+ CILK_ASSERT(this->is_allocated_from_thread());
+#if SUPPORT_GET_CURRENT_FIBER
+ CILK_ASSERT(this == cilkos_get_tls_cilk_fiber());
+ // Reverse of "allocate_from_thread".
+ cilkos_set_tls_cilk_fiber(NULL);
+#endif
+
+ this->assert_ref_count_at_least(2);
+
+ // Suspending the fiber should conceptually decrement the ref
+ // count by 1.
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->convert_fiber_back_to_thread();
+
+ // Then, freeing the fiber itself decrements the ref count again.
+ int ref_count = this->sub_from_ref_count(2);
+ if (ref_count == 0) {
+ self->~cilk_fiber_sysdep();
+ __cilkrts_free(self);
+ }
+ return ref_count;
+}
+
+int cilk_fiber::remove_reference_from_thread()
+{
+ int ref_count = dec_ref_count();
+ if (ref_count == 0) {
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->~cilk_fiber_sysdep();
+ __cilkrts_free(self);
+ }
+ return ref_count;
+}
+
+
+#if SUPPORT_GET_CURRENT_FIBER
+cilk_fiber* cilk_fiber::get_current_fiber()
+{
+ return cilk_fiber_sysdep::get_current_fiber_sysdep();
+}
+#endif
+
+void cilk_fiber::do_post_switch_actions()
+{
+ if (m_post_switch_proc)
+ {
+ cilk_fiber_proc proc = m_post_switch_proc;
+ m_post_switch_proc = NULL;
+ proc(this);
+ }
+
+ if (m_pending_remove_ref)
+ {
+ m_pending_remove_ref->remove_reference(m_pending_pool);
+
+ // Even if we don't free it,
+ m_pending_remove_ref = NULL;
+ m_pending_pool = NULL;
+ }
+}
+
+void cilk_fiber::suspend_self_and_resume_other(cilk_fiber* other)
+{
+#if FIBER_DEBUG >=1
+ fprintf(stderr, "suspend_self_and_resume_other: self =%p, other=%p [owner=%p, resume_sf=%p]\n",
+ this, other, other->owner, other->resume_sf);
+#endif
+
+ // Decrement my reference count (to suspend)
+ // Increment other's count (to resume)
+ // Suspended fiber should have a reference count of at least 1. (It is not in a pool).
+ this->dec_ref_count();
+ other->inc_ref_count();
+ this->assert_ref_count_at_least(1);
+
+ // Pass along my owner.
+ other->owner = this->owner;
+ this->owner = NULL;
+
+ // Change this fiber to resumable.
+ CILK_ASSERT(!this->is_resumable());
+ this->set_resumable(true);
+
+ // Normally, I'd assert other->is_resumable(). But this flag may
+ // be false the first time we try to "resume" a fiber.
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->suspend_self_and_resume_other_sysdep(other->sysdep());
+
+ // HAVE RESUMED EXECUTION
+ // When we come back here, we should have at least two references:
+ // one for the fiber being allocated / out of a pool, and one for it being active.
+ this->assert_ref_count_at_least(2);
+}
+
+NORETURN
+cilk_fiber::remove_reference_from_self_and_resume_other(cilk_fiber_pool* self_pool,
+ cilk_fiber* other)
+{
+ // Decrement my reference count once (to suspend)
+ // Increment other's count (to resume)
+ // Suspended fiber should have a reference count of at least 1. (It is not in a pool).
+ this->dec_ref_count();
+ other->inc_ref_count();
+
+ // Set a pending remove reference for this fiber, once we have
+ // actually switched off.
+ other->m_pending_remove_ref = this;
+ other->m_pending_pool = self_pool;
+
+ // Pass along my owner.
+ other->owner = this->owner;
+ this->owner = NULL;
+
+ // Since we are deallocating self, this fiber does not become
+ // resumable.
+ CILK_ASSERT(!this->is_resumable());
+
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->jump_to_resume_other_sysdep(other->sysdep());
+
+ __cilkrts_bug("Deallocating fiber. We should never come back here.");
+ std::abort();
+}
+
+
+void cilk_fiber::deallocate_to_heap()
+{
+ cilk_fiber_sysdep* self = this->sysdep();
+ self->~cilk_fiber_sysdep();
+ __cilkrts_free(self);
+}
+
+void cilk_fiber::deallocate_self(cilk_fiber_pool* pool)
+{
+ this->set_resumable(false);
+
+ CILK_ASSERT(NULL != pool);
+ CILK_ASSERT(!this->is_allocated_from_thread());
+ this->assert_ref_count_equals(0);
+
+ // Cases:
+ //
+ // 1. pool has space: Add to this pool.
+ // 2. pool is full: Give some fibers to parent, and then free
+ // enough to make space for the fiber we are deallocating.
+ // Then put the fiber back into the pool.
+
+ const bool need_lock = pool->lock;
+ // Grab the lock for the remaining cases.
+ if (need_lock) {
+ spin_mutex_lock(pool->lock);
+ }
+
+ // Case 1: this pool has space. Return the fiber.
+ if (pool->size < pool->max_size)
+ {
+ // Add this fiber to pool
+ pool->fibers[pool->size++] = this;
+ if (need_lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+ return;
+ }
+
+ // Case 2: Pool is full.
+ //
+ // First free up some space by giving fibers to the parent.
+ if (pool->parent)
+ {
+ // Pool is full. Move all but "num_to_keep" fibers to parent,
+ // if we can.
+ unsigned num_to_keep = pool->max_size/2 + pool->max_size/4;
+ cilk_fiber_pool_move_fibers_to_parent_pool(pool, num_to_keep);
+ }
+
+ if (need_lock) {
+ spin_mutex_unlock(pool->lock);
+ }
+
+ // Now, free a fiber to make room for the one we need to put back,
+ // and then put this fiber back. This step may actually return
+ // fibers to the heap.
+ cilk_fiber_pool_free_fibers_from_pool(pool, pool->max_size -1, this);
+}
+
+
+// NOTE: Except for print-debug, this code is the same as in Windows.
+void cilk_fiber::invoke_tbb_stack_op(__cilk_tbb_stack_op op)
+{
+ cilk_fiber_data *fdata = this->get_data();
+
+ if (0 == fdata->stack_op_routine)
+ {
+ if (CILK_TBB_STACK_RELEASE != op)
+ DBG_STACK_OPS ("Wkr %p: invoke_tbb_stack_op - %s (%d) for cilk_fiber %p, fiber %p, thread id %04x - No stack op routine\n",
+ fdata->owner,
+ NameStackOp(op),
+ op,
+ fdata,
+ this,
+ cilkos_get_current_thread_id());
+ return;
+ }
+
+ // Call TBB to do it's thing
+ DBG_STACK_OPS ("Wkr %p: invoke_tbb_stack_op - op %s data %p for cilk_fiber %p, fiber %p, thread id %04x\n",
+ fdata->owner,
+ NameStackOp(op),
+ fdata->stack_op_data,
+ fdata,
+ this,
+ cilkos_get_current_thread_id());
+
+ (*fdata->stack_op_routine)(op, fdata->stack_op_data);
+ if (op == CILK_TBB_STACK_RELEASE)
+ {
+ fdata->stack_op_routine = 0;
+ fdata->stack_op_data = 0;
+ }
+}
+
+
+
+#if NEED_FIBER_REF_COUNTS
+
+void cilk_fiber::atomic_inc_ref_count()
+{
+ cilkos_atomic_add(&m_outstanding_references, 1);
+}
+
+long cilk_fiber::atomic_dec_ref_count()
+{
+ return cilkos_atomic_add(&m_outstanding_references, -1);
+}
+
+long cilk_fiber::atomic_sub_from_ref_count(long v)
+{
+ return cilkos_atomic_add(&m_outstanding_references, -v);
+}
+
+#endif // NEED_FIBER_REF_COUNTS
+
+/* End cilk_fibers.cpp */
diff --git a/libcilkrts/runtime/cilk_fiber.h b/libcilkrts/runtime/cilk_fiber.h
new file mode 100644
index 00000000000..e1d5f5b32a3
--- /dev/null
+++ b/libcilkrts/runtime/cilk_fiber.h
@@ -0,0 +1,877 @@
+/* cilk_fiber.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+/**
+ * @file cilk_fiber.h
+ *
+ * @brief Abstraction of a "fiber": A coprocess-like stack and auxiliary data
+ */
+
+#ifndef INCLUDED_CILK_FIBER_DOT_H
+#define INCLUDED_CILK_FIBER_DOT_H
+
+#include <cilk/common.h>
+#ifdef __cplusplus
+# include <cstddef>
+#else
+# include <stddef.h>
+#endif
+
+#include "bug.h"
+#include "cilk-tbb-interop.h"
+#include "spin_mutex.h"
+#include "internal/abi.h" // Define __cilkrts_stack_frame
+
+/**
+ * @brief Debugging level for Cilk fiber code.
+ *
+ * A value of 0 means no debugging.
+ * Higher values generate more debugging output.
+ */
+#define FIBER_DEBUG 0
+
+/**
+ * @brief Flag for validating reference counts.
+ *
+ * Set to 1 to assert that fiber reference counts are reasonable.
+ */
+#define FIBER_CHECK_REF_COUNTS 1
+
+/**
+ * @brief Flag to determine whether fibers support reference counting.
+ * We require reference counting only on Windows, for exception
+ * processing. Unix does not need reference counting.
+ */
+#if defined(_WIN32)
+# define NEED_FIBER_REF_COUNTS 1
+#endif
+
+/**
+ * @brief Flag to enable support for the
+ * cilk_fiber_get_current_fiber() method.
+ *
+ * I'd like this flag to be 0. However, the cilk_fiber test depends
+ * on being able to call this method.
+ */
+#if !defined(SUPPORT_GET_CURRENT_FIBER)
+# define SUPPORT_GET_CURRENT_FIBER 0
+#endif
+
+/**
+ * @brief Switch for enabling "fast path" check for fibers, which
+ * doesn't go to the heap or OS until checking the ancestors first.
+ *
+ * Doing this check seems to make the stress test in
+ * cilk_fiber_pool.t.cpp run faster. But it doesn't seem to make much
+ * difference in other benchmarks, so it is disabled by default.
+ */
+#define USE_FIBER_TRY_ALLOCATE_FROM_POOL 0
+
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/// @brief Forward reference to fiber pool.
+typedef struct cilk_fiber_pool cilk_fiber_pool;
+
+/** @brief Opaque data structure representing a fiber */
+typedef struct cilk_fiber cilk_fiber;
+
+/** @brief Function pointer type for use as a fiber's "main" procedure */
+typedef void (*cilk_fiber_proc)(cilk_fiber*);
+
+/** @brief Data structure associated with each fiber. */
+typedef struct cilk_fiber_data
+{
+ __STDNS size_t stack_size; /**< Size of stack for fiber */
+ __cilkrts_worker* owner; /**< Worker using this fiber */
+ __cilkrts_stack_frame* resume_sf; /**< Stack frame to resume */
+ __cilk_tbb_pfn_stack_op stack_op_routine; /**< Cilk/TBB interop callback */
+ void* stack_op_data; /**< Data for Cilk/TBB callback */
+ void* client_data; /**< Data managed by client */
+
+#ifdef _WIN32
+ char *initial_sp; /**< Initalized in fiber_stub */
+# ifdef _WIN64
+ char *steal_frame_sp; /**< RSP for frame stealing work */
+ // Needed for exception handling so we can
+ // identify when about to unwind off stack
+# endif
+#endif
+
+} cilk_fiber_data;
+
+/** @brief Pool of cilk_fiber for fiber reuse
+ *
+ * Pools form a hierarchy, with each pool pointing to its parent. When the
+ * pool undeflows, it gets a fiber from its parent. When a pool overflows,
+ * it returns some fibers to its parent. If the root pool underflows, it
+ * allocates and initializes a new fiber from the heap but only if the total
+ * is less than max_size; otherwise, fiber creation fails.
+ */
+struct cilk_fiber_pool
+{
+ spin_mutex* lock; ///< Mutual exclusion for pool operations
+ __STDNS size_t stack_size; ///< Size of stacks for fibers in this pool.
+ cilk_fiber_pool* parent; ///< @brief Parent pool.
+ ///< If this pool is empty, get from parent
+
+ // Describes inactive fibers stored in the pool.
+ cilk_fiber** fibers; ///< Array of max_size fiber pointers
+ unsigned max_size; ///< Limit on number of fibers in pool
+ unsigned size; ///< Number of fibers currently in the pool
+
+ // Statistics on active fibers that were allocated from this pool,
+ // but no longer in the pool.
+ int total; ///< @brief Fibers allocated - fiber deallocated from pool
+ ///< total may be negative for non-root pools.
+ int high_water; ///< High water mark of total fibers
+ int alloc_max; ///< Limit on number of fibers allocated from the heap/OS
+};
+
+/** @brief Initializes a cilk_fiber_pool structure
+ *
+ * @param pool - The address of the pool that is to be initialized
+ * @param parent - The address of this pool's parent, or NULL for root pool
+ * @param stack_size - Size of stacks for fibers allocated from this pool.
+ * @param buffer_size - The maximum number of fibers that may be pooled.
+ * @param alloc_max - Limit on # of fibers this pool can allocate from the heap.
+ * @param is_shared - True if accessing this pool needs a lock, false otherwise.
+ */
+void cilk_fiber_pool_init(cilk_fiber_pool* pool,
+ cilk_fiber_pool* parent,
+ size_t stack_size,
+ unsigned buffer_size,
+ int alloc_max,
+ int is_shared);
+
+/** @brief Sets the maximum number of fibers to allocate from a root pool.
+ *
+ * @param root_pool - A root fiber pool
+ * @param max_fibers_to_allocate - The limit on # of fibers to allocate.
+ *
+ * Sets the maximum number of fibers that can be allocated from this
+ * pool and all its descendants. This pool must be a root pool.
+ */
+void cilk_fiber_pool_set_fiber_limit(cilk_fiber_pool* root_pool,
+ unsigned max_fibers_to_allocate);
+
+/** @brief De-initalizes a cilk_fiber_pool
+ *
+ * @param pool - The address of the pool that is to be destroyed
+ */
+void cilk_fiber_pool_destroy(cilk_fiber_pool* pool);
+
+/** @brief Allocates a new cilk_fiber.
+ *
+ * If the specified pool is empty, this method may choose to either
+ * allocate a fiber from the heap (if pool->total < pool->alloc_max),
+ * or retrieve a fiber from the parent pool.
+ *
+ * @note If a non-null fiber is returned, @c cilk_fiber_reset_state
+ * should be called on this fiber before using it.
+ *
+ * An allocated fiber begins with a reference count of 1.
+ * This method may lock @c pool or one of its ancestors.
+ *
+ * @pre pool should not be NULL.
+ *
+ * @param pool The fiber pool from which to retrieve a fiber.
+ * @return An allocated fiber, or NULL if failed to allocate.
+ */
+cilk_fiber* cilk_fiber_allocate(cilk_fiber_pool* pool);
+
+/** @brief Allocate and initialize a new cilk_fiber using memory from
+ * the heap and/or OS.
+ *
+ * The allocated fiber begins with a reference count of 1.
+ *
+ * @param stack_size The size (in bytes) to be allocated for the fiber's
+ * stack.
+ * @return An initialized fiber. This method should not return NULL
+ * unless some exceptional condition has occurred.
+ */
+cilk_fiber* cilk_fiber_allocate_from_heap(size_t stack_size);
+
+
+/** @brief Resets an fiber object just allocated from a pool with the
+ * specified proc.
+ *
+ * After this call, cilk_fiber_data object associated with this fiber
+ * is filled with zeros.
+ *
+ * This function can be called only on a fiber that has been allocated
+ * from a pool, but never used.
+ *
+ * @param fiber The fiber to reset and initialize.
+ * @param start_proc The function to run when switching to the fiber. If
+ * null, the fiber can be used with cilk_fiber_run_proc()
+ * but not with cilk_fiber_resume().
+ */
+void cilk_fiber_reset_state(cilk_fiber* fiber,
+ cilk_fiber_proc start_proc);
+
+/** @brief Remove a reference from this fiber, possibly deallocating it.
+ *
+ * This fiber is deallocated only when there are no other references
+ * to it. Deallocation happens either by returning the fiber to the
+ * specified pool, or returning it to the heap.
+ *
+ * A fiber that is currently executing should not remove the last
+ * reference to itself.
+ *
+ * When a fiber is deallocated, destructors are not called for the
+ * objects (if any) still on its stack. The fiber's stack and fiber
+ * data is returned to the stack pool but the client fiber data is not
+ * deallocated.
+ *
+ * If the pool overflows because of a deallocation, then some fibers
+ * will be returned to the parent pool. If the root pool overflows,
+ * then the fiber is returned to the heap.
+ *
+ * @param fiber The Cilk fiber to remove a reference to.
+ * @param pool The fiber pool to which the fiber should be returned. The
+ * caller is assumed to have exclusive access to the pool
+ * either because there is no contention for it or because
+ * its lock has been acquired. If pool is NULL, any
+ * deallocated fiber is destroyed and returned to the
+ * heap.
+ *
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to a pool or the heap.
+ */
+int cilk_fiber_remove_reference(cilk_fiber *fiber, cilk_fiber_pool *pool);
+
+/** @brief Allocates and intializes this thread's main fiber
+ *
+ * Each thread has an "implicit" main fiber that control's the
+ * thread's initial stack. This function makes this fiber visible to
+ * the client and allocates the Cilk-specific aspects of the implicit
+ * fiber. A call to this function must be paired with a call to
+ * cilk_fiber_deallocate_fiber_from_thread()
+ * or a memory leak (or worse) will result.
+ *
+ * A fiber allocated from a thread begins with a reference count of 2.
+ * One is for being allocated, and one is for being active.
+ * (A fiber created from a thread is automatically currently executing.)
+ * The matching calls above each decrement the reference count by 1.
+ *
+ * @return A fiber for the currently executing thread.
+ */
+cilk_fiber* cilk_fiber_allocate_from_thread(void);
+
+/** @brief Remove a fiber created from a thread,
+ * possibly deallocating it.
+ *
+ * Same as cilk_fiber_remove_reference, except that it works on fibers
+ * created via cilk_fiber_allocate_from_thread().
+ *
+ * Fibers created from a thread are never returned to a pool.
+ *
+ * @param fiber The Cilk fiber to remove a reference from.
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to the heap.
+ */
+int cilk_fiber_remove_reference_from_thread(cilk_fiber *fiber);
+
+/** @brief Deallocate a fiber created from a thread,
+ * possibly destroying it.
+ *
+ * This method decrements the reference count of the fiber by 2, and
+ * destroys the fiber struct if the reference count is 0.
+ *
+ * OS-specific cleanup for the fiber executes unconditionally with
+ * this method. The destruction of the actual object, however, does
+ * not occur unless the reference count is 0.
+ *
+ * @param fiber The cilk_fiber to deallocate from a thread.
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to the heap.
+ */
+int cilk_fiber_deallocate_from_thread(cilk_fiber *fiber);
+
+/** @brief Returns true if this fiber is allocated from a thread.
+ */
+int cilk_fiber_is_allocated_from_thread(cilk_fiber *fiber);
+
+
+/** @brief Suspend execution on current fiber resumes other fiber.
+ *
+ * Suspends the current fiber and transfers control to a new fiber. Execution
+ * on the new fiber resumes from the point at which fiber suspended itself to
+ * run a different fiber. If fiber was freshly allocated, then runs the
+ * start_proc function specified at allocation. This function returns when
+ * another fiber resumes the self fiber. Note that the state of the
+ * floating-point control register (i.e., the register that controls rounding
+ * mode, etc.) is valid but indeterminate on return -- different
+ * implementations will have different results.
+ *
+ * When the @c self fiber is resumed, execution proceeds as though
+ * this function call returns.
+ *
+ * This operation increments the reference count of @p other.
+ * This operation decrements the reference count of @p self.
+ *
+ * @param self Fiber to switch from. Must equal current fiber.
+ * @param other Fiber to switch to.
+ */
+void cilk_fiber_suspend_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber* other);
+
+/** @brief Removes a reference from the currently executing fiber and
+ * resumes other fiber.
+ *
+ * Removes a reference from @p self and transfer control to @p other
+ * fiber. Execution on @p other resumes from the point at which @p
+ * other suspended itself to run a different fiber. If @p other fiber
+ * was freshly allocated, then runs the function specified at
+ * creation.
+ *
+ *
+ * This operation increments the reference count of @p other.
+ *
+ * This operation conceptually decrements the reference count of
+ * @p self twice, once to suspend it, and once to remove a reference to
+ * it. Then, if the count is 0, it is returned to the specified pool
+ * or destroyed.
+ *
+ * @pre @p self is the currently executing fiber.
+ *
+ * @param self Fiber to remove reference switch from.
+ * @param self_pool Pool to which the current fiber should be returned
+ * @param other Fiber to switch to.
+ */
+NORETURN
+cilk_fiber_remove_reference_from_self_and_resume_other(cilk_fiber* self,
+ cilk_fiber_pool* self_pool,
+ cilk_fiber* other);
+
+/** @brief Set the proc method to execute immediately after a switch
+ * to this fiber.
+ *
+ * The @c post_switch_proc method executes immediately after switching
+ * away form @p self fiber to some other fiber, but before @c self
+ * gets cleaned up.
+ *
+ * @note A fiber can have only one post_switch_proc method at a time.
+ * If this method is called multiple times before switching to the
+ * fiber, only the last proc method will execute.
+ *
+ * @param self Fiber.
+ * @param post_switch_proc Proc method to execute immediately after switching to this fiber.
+ */
+void cilk_fiber_set_post_switch_proc(cilk_fiber* self, cilk_fiber_proc post_switch_proc);
+
+/** @brief Invoke TBB stack op for this fiber.
+ *
+ * @param fiber Fiber to invoke stack op for.
+ * @param op The stack op to invoke
+ */
+void cilk_fiber_invoke_tbb_stack_op(cilk_fiber* fiber, __cilk_tbb_stack_op op);
+
+/** @brief Returns the fiber data associated with the specified fiber.
+ *
+ * The returned struct is owned by the fiber and is deallocated automatically
+ * when the fiber is destroyed. However, the client_data field is owned by
+ * the client and must be deallocated separately. When called for a
+ * newly-allocated fiber, the returned data is zero-filled.
+ *
+ * @param fiber The fiber for which data is being requested.
+ * @return The fiber data for the specified fiber
+ */
+cilk_fiber_data* cilk_fiber_get_data(cilk_fiber* fiber);
+
+/** @brief Retrieve the owner field from the fiber.
+ *
+ * This method is provided for convenience. One can also get the
+ * fiber data, and then get the owner field.
+ */
+__CILKRTS_INLINE
+__cilkrts_worker* cilk_fiber_get_owner(cilk_fiber* fiber)
+{
+ // TBD: We really want a static assert here, that this cast is
+ // doing the right thing.
+ cilk_fiber_data* fdata = (cilk_fiber_data*)fiber;
+ return fdata->owner;
+}
+
+/** @brief Sets the owner field of a fiber.
+ *
+ * This method is provided for convenience. One can also get the
+ * fiber data, and then get the owner field.
+ */
+__CILKRTS_INLINE
+void cilk_fiber_set_owner(cilk_fiber* fiber, __cilkrts_worker* owner)
+{
+ // TBD: We really want a static assert here, that this cast is
+ // doing the right thing.
+ cilk_fiber_data* fdata = (cilk_fiber_data*)fiber;
+ fdata->owner = owner;
+}
+
+/** @brief Returns true if this fiber is resumable.
+ *
+ * A fiber is considered resumable when it is not currently being
+ * executed.
+ *
+ * This function is used by Windows exception code.
+ * @param fiber The fiber to check.
+ * @return Nonzero value if fiber is resumable.
+ */
+int cilk_fiber_is_resumable(cilk_fiber* fiber);
+
+/**
+ * @brief Returns the base of this fiber's stack.
+ *
+ * On some platforms (e.g., Windows), the fiber must have started
+ * running before we can get this information.
+ *
+ * @param fiber The fiber to get the stack pointer from.
+ * @return The base of the stack, or NULL if this
+ * information is not available yet.
+ */
+char* cilk_fiber_get_stack_base(cilk_fiber* fiber);
+
+
+/****************************************************************************
+ * TBB interop functions
+ * **************************************************************************/
+/**
+ * @brief Set the TBB callback information for a stack
+ *
+ * @param fiber The fiber to set the TBB callback information for
+ * @param o The TBB callback thunk. Specifies the callback address and
+ * context value.
+ */
+void cilk_fiber_set_stack_op(cilk_fiber *fiber,
+ __cilk_tbb_stack_op_thunk o);
+
+/**
+ * @brief Save the TBB callback address and context value in
+ * thread-local storage.
+ *
+ * We'll use it later when the thread binds to a worker.
+ *
+ * @param o The TBB callback thunk which is to be saved.
+ */
+void cilk_fiber_tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o);
+
+/**
+ * @brief Move TBB stack-op info from thread-local storage and store
+ * it into the fiber.
+ *
+ * Called when we bind a thread to the runtime. If there is any TBB
+ * interop information in thread-local storage, bind it to the stack
+ * now.
+ *
+ * @pre \c fiber should not be NULL.
+ * @param fiber The fiber that should take over the TBB interop information.
+ */
+void cilk_fiber_tbb_interop_use_saved_stack_op_info(cilk_fiber *fiber);
+
+/**
+ * @brief Free any TBB interop information saved in thread-local storage
+ */
+void cilk_fiber_tbb_interop_free_stack_op_info(void);
+
+/**
+ * @brief Migrate any TBB interop information from a cilk_fiber to
+ * thread-local storage.
+ *
+ * Returns immediately if no TBB interop information has been
+ * associated with the stack.
+ *
+ * @param fiber The cilk_fiber who's TBB interop information should be
+ * saved in thread-local storage.
+ */
+void cilk_fiber_tbb_interop_save_info_from_stack(cilk_fiber* fiber);
+
+
+#if SUPPORT_GET_CURRENT_FIBER
+/** @brief Returns the fiber associated with the currently executing thread
+ *
+ * @note This function is currently used only for testing the Cilk
+ * runtime.
+ *
+ * @return Fiber associated with the currently executing thread or NULL if no
+ * fiber was associated with this thread.
+ */
+cilk_fiber* cilk_fiber_get_current_fiber(void);
+#endif
+
+
+#if NEED_FIBER_REF_COUNTS
+/** @brief Returns true if this fiber has reference count > 0.
+ *
+ * @param fiber The fiber to check for references.
+ * @return Nonzero value if the fiber has references.
+ */
+int cilk_fiber_has_references(cilk_fiber *fiber);
+
+/** @brief Returns the value of the reference count.
+ *
+ * @param fiber The fiber to check for references.
+ * @return The value of the reference count of fiber.
+ */
+int cilk_fiber_get_ref_count(cilk_fiber *fiber);
+
+/** @brief Adds a reference to this fiber.
+ *
+ * Increments the reference count of a current fiber. Fibers with
+ * nonzero reference count will not be freed or returned to a fiber
+ * pool.
+ *
+ * @param fiber The fiber to add a reference to.
+ */
+void cilk_fiber_add_reference(cilk_fiber *fiber);
+
+#endif // NEED_FIBER_REF_COUNTS
+
+__CILKRTS_END_EXTERN_C
+
+#ifdef __cplusplus
+// Some C++ implementation details
+
+/// Opaque declaration of a cilk_fiber_sysdep object.
+struct cilk_fiber_sysdep;
+
+/**
+ * cilk_fiber is a base-class for system-dependent fiber implementations.
+ */
+struct cilk_fiber : protected cilk_fiber_data
+{
+ protected:
+ // This is a rare acceptable use of protected inheritence and protected
+ // variable access: when the base class and derived class collaborate
+ // tightly to comprise a single component.
+
+ /// For overloading constructor of cilk_fiber.
+ enum from_thread_t { from_thread = 1 };
+
+ // Boolean flags capturing the status of the fiber.
+ // Each one can be set independently.
+ // A default fiber is constructed with a flag value of 0.
+ static const int RESUMABLE = 0x01; ///< True if the fiber is in a suspended state and can be resumed.
+ static const int ALLOCATED_FROM_THREAD = 0x02; ///< True if fiber was allocated from a thread.
+
+ cilk_fiber_proc m_start_proc; ///< Function to run on start up/reset
+ cilk_fiber_proc m_post_switch_proc; ///< Function that executes when we first switch to a new fiber from a different one.
+
+ cilk_fiber* m_pending_remove_ref;///< Fiber to possibly delete on start up or resume
+ cilk_fiber_pool* m_pending_pool; ///< Pool where m_pending_remove_ref should go if it is deleted.
+ unsigned m_flags; ///< Captures the status of this fiber.
+
+#if NEED_FIBER_REF_COUNTS
+ volatile long m_outstanding_references; ///< Counts references to this fiber.
+#endif
+
+ /// Creates a fiber with NULL data.
+ cilk_fiber();
+
+ /**
+ * @brief Creates a fiber with user-specified arguments.
+ *
+ * @param stack_size Size of stack to use for this fiber.
+ */
+ cilk_fiber(std::size_t stack_size);
+
+ /// Empty destructor.
+ ~cilk_fiber();
+
+ /**
+ * @brief Performs any actions that happen after switching from
+ * one fiber to another.
+ *
+ * These actions are:
+ * 1. Execute m_post_switch_proc on a fiber.
+ * 2. Do any pending deallocations from the previous fiber.
+ */
+ void do_post_switch_actions();
+
+ /**
+ *@brief Helper method that converts a @c cilk_fiber object into a
+ * @c cilk_fiber_sysdep object.
+ *
+ * The @c cilk_fiber_sysdep object contains the system-dependent parts
+ * of the implementation of a @\c cilk_fiber.
+ *
+ * We could have @c cilk_fiber_sysdep inherit from @c cilk_fiber and
+ * then use virtual functions. But since a given platform only uses
+ * one definition of @c cilk_fiber_sysdep at a time, we statically
+ * cast between them.
+ */
+ inline cilk_fiber_sysdep* sysdep();
+
+ /**
+ * @brief Set resumable flag to specified state.
+ */
+ inline void set_resumable(bool state) {
+ m_flags = state ? (m_flags | RESUMABLE) : (m_flags & (~RESUMABLE));
+ }
+
+ /**
+ *@brief Set the allocated_from_thread flag.
+ */
+ inline void set_allocated_from_thread(bool state) {
+ m_flags = state ? (m_flags | ALLOCATED_FROM_THREAD) : (m_flags & (~ALLOCATED_FROM_THREAD));
+ }
+
+ public:
+
+ /**
+ * @brief Allocates and initializes a new cilk_fiber, either from
+ * the specified pool or from the heap.
+ *
+ * @pre pool should not be NULL.
+ */
+ static cilk_fiber* allocate(cilk_fiber_pool* pool);
+
+ /**
+ * @brief Allocates a fiber from the heap.
+ */
+ static cilk_fiber* allocate_from_heap(size_t stack_size);
+
+ /**
+ * @brief Return a fiber to the heap.
+ */
+ void deallocate_to_heap();
+
+ /**
+ * @brief Reset the state of a fiber just allocated from a pool.
+ */
+ void reset_state(cilk_fiber_proc start_proc);
+
+ /**
+ * @brief Remove a reference from this fiber, possibly
+ * deallocating it if the reference count becomes 0.
+ *
+ * @param pool The fiber pool to which this fiber should be returned.
+ * @return The final reference count.
+ */
+ int remove_reference(cilk_fiber_pool* pool);
+
+ /**
+ * @brief Deallocate the fiber by returning it to the pool.
+ * @pre This method should only be called if the reference count
+ * is 0.
+ *
+ * @param pool The fiber pool to return this fiber to. If NULL,
+ * fiber is returned to the heap.
+ */
+ void deallocate_self(cilk_fiber_pool *pool);
+
+ /** @brief Allocates and intializes this thread's main fiber. */
+ static cilk_fiber* allocate_from_thread();
+
+ /** @brief Deallocate a fiber created from a thread,
+ * possibly destroying it.
+ *
+ * This method decrements the reference count of this fiber by 2,
+ * and destroys the fiber if the reference count is 0.
+ *
+ * OS-specific cleanup for the fiber executes unconditionally with for
+ * this method. The destruction of the actual object, however, does
+ * not occur unless the reference count is 0.
+ *
+ * @return Final reference count. If the count is 0, the fiber was
+ * returned to the heap.
+ */
+ int deallocate_from_thread();
+
+ /** @brief Removes a reference from this fiber.
+ *
+ * This method deallocates this fiber if the reference count
+ * becomes 0.
+ *
+ * @pre This fiber must be allocated from a thread.
+ * @return The final reference count of this fiber.
+ */
+ int remove_reference_from_thread();
+
+#if SUPPORT_GET_CURRENT_FIBER
+ /** @brief Get the current fiber from TLS.
+ *
+ * @note This function is only used for testing the runtime.
+ */
+ static cilk_fiber* get_current_fiber();
+#endif
+
+ /** @brief Suspend execution on current fiber resumes other fiber.
+ *
+ * Control returns after resuming execution of the self fiber.
+ */
+ void suspend_self_and_resume_other(cilk_fiber* other);
+
+
+ /** @brief Removes a reference from the currently executing fiber
+ * and resumes other fiber.
+ *
+ * This fiber may be returned to a pool or deallocated.
+ */
+ NORETURN remove_reference_from_self_and_resume_other(cilk_fiber_pool* self_pool,
+ cilk_fiber* other);
+
+ /** @brief Set the proc method to execute immediately after a switch
+ * to this fiber.
+ *
+ * @param post_switch_proc Proc method to execute immediately
+ * after switching to this fiber.
+ */
+ inline void set_post_switch_proc(cilk_fiber_proc post_switch_proc) {
+ m_post_switch_proc = post_switch_proc;
+ }
+
+ /** @brief Returns true if this fiber is resumable.
+ *
+ * A fiber is considered resumable when it is not currently being
+ * executed.
+ */
+ inline bool is_resumable(void) {
+ return (m_flags & RESUMABLE);
+ }
+
+ /** @brief Returns true if fiber was allocated from a thread. */
+ inline bool is_allocated_from_thread(void) {
+ return (m_flags & ALLOCATED_FROM_THREAD);
+ }
+
+ /**
+ *@brief Get the address at the base of the stack for this fiber.
+ */
+ inline char* get_stack_base();
+
+ /** @brief Return the data for this fiber. */
+ cilk_fiber_data* get_data() { return this; }
+
+ /** @brief Return the data for this fiber. */
+ cilk_fiber_data const* get_data() const { return this; }
+
+
+#if NEED_FIBER_REF_COUNTS
+ /** @brief Verifies that this fiber's reference count equals v. */
+ inline void assert_ref_count_equals(long v) {
+ #if FIBER_CHECK_REF_COUNTS
+ CILK_ASSERT(m_outstanding_references >= v);
+ #endif
+ }
+
+ /** @brief Verifies that this fiber's reference count is at least v. */
+ inline void assert_ref_count_at_least(long v) {
+ #if FIBER_CHECK_REF_COUNTS
+ CILK_ASSERT(m_outstanding_references >= v);
+ #endif
+ }
+
+ /** @brief Get reference count. */
+ inline long get_ref_count() { return m_outstanding_references; }
+
+ /** @brief Initialize reference count.
+ * Operation is not atomic.
+ */
+ inline void init_ref_count(long v) { m_outstanding_references = v; }
+
+ // For Windows, updates to the fiber reference count need to be
+ // atomic, because exceptions can live on a stack that we are not
+ // currently executing on. Thus, we can update the reference
+ // count of a fiber we are not currently executing on.
+
+ /** @brief Increment reference count for this fiber [Windows]. */
+ inline void inc_ref_count() { atomic_inc_ref_count(); }
+
+ /** @brief Decrement reference count for this fiber [Windows]. */
+ inline long dec_ref_count() { return atomic_dec_ref_count(); }
+
+ /** @brief Subtract v from the reference count for this fiber [Windows]. */
+ inline long sub_from_ref_count(long v) { return atomic_sub_from_ref_count(v); }
+#else // NEED_FIBER_REF_COUNTS
+
+ // Without reference counting, we have placeholder methods.
+ inline void init_ref_count(long v) { }
+
+ inline void inc_ref_count() { }
+
+ // With no reference counting, dec_ref_count always return 0.
+ // Thus, anyone checking is always the "last" one.
+ inline long dec_ref_count() { return 0; }
+ inline long sub_from_ref_count(long v) { return 0; }
+
+ // The assert methods do nothing.
+ inline void assert_ref_count_equals(long v) { }
+ inline void assert_ref_count_at_least(long v) { }
+#endif
+
+ /**
+ * @brief Call TBB to tell it about an "interesting" event.
+ *
+ * @param op Value specifying the event to track.
+ */
+ void invoke_tbb_stack_op(__cilk_tbb_stack_op op);
+
+private:
+
+ /**
+ * @brief Helper method: try to allocate a fiber from this pool or
+ * its ancestors without going to the OS / heap.
+ *
+ * Returns allocated pool, or NULL if no pool is found.
+ *
+ * If pool contains a suitable fiber. Return it. Otherwise, try to
+ * recursively grab a fiber from the parent pool, if there is one.
+ *
+ * This method will not allocate a fiber from the heap.
+ */
+ static cilk_fiber* try_allocate_from_pool_recursive(cilk_fiber_pool* pool);
+
+
+#if NEED_FIBER_REF_COUNTS
+ /**
+ * @brief Atomic increment of reference count.
+ */
+ void atomic_inc_ref_count();
+
+ /**
+ * @brief Atomic decrement of reference count.
+ */
+ long atomic_dec_ref_count();
+
+ /**
+ * @brief Atomic subtract of v from reference count.
+ * @param v Value to subtract.
+ */
+ long atomic_sub_from_ref_count(long v);
+#endif // NEED_FIBER_REF_COUNTS
+
+};
+
+#endif // __cplusplus
+
+#endif // ! defined(INCLUDED_CILK_FIBER_DOT_H)
diff --git a/libcilkrts/runtime/cilk_malloc.c b/libcilkrts/runtime/cilk_malloc.c
index 89d6fc97ab3..2094b0335c9 100644
--- a/libcilkrts/runtime/cilk_malloc.c
+++ b/libcilkrts/runtime/cilk_malloc.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "cilk_malloc.h"
@@ -33,6 +38,10 @@
#include <malloc.h>
#define HAS_MEMALIGN 1
#endif
+#ifdef __VXWORKS__
+#define HAS_MEMALIGN 1
+#include <memLib.h>
+#endif
#define PREFERRED_ALIGNMENT 64 /* try to keep runtime system data
structures within one cache line */
diff --git a/libcilkrts/runtime/cilk_malloc.h b/libcilkrts/runtime/cilk_malloc.h
index f547aa9798f..2ccce8a4ae3 100644
--- a/libcilkrts/runtime/cilk_malloc.h
+++ b/libcilkrts/runtime/cilk_malloc.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
diff --git a/libcilkrts/runtime/component.h b/libcilkrts/runtime/component.h
index 869ea9bd79b..01aab3a6274 100644
--- a/libcilkrts/runtime/component.h
+++ b/libcilkrts/runtime/component.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#ifndef INCLUDED_COMPONENT_DOT_H
diff --git a/libcilkrts/runtime/doxygen-layout.xml b/libcilkrts/runtime/doxygen-layout.xml
index 5e75f563eda..8757667d829 100644
--- a/libcilkrts/runtime/doxygen-layout.xml
+++ b/libcilkrts/runtime/doxygen-layout.xml
@@ -1,28 +1,33 @@
<doxygenlayout version="1.0">
<!--
-# Copyright (C) 2011
-# Intel Corporation
-#
-# This file is part of the Intel Cilk Plus Library. This library is free
-# software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# Under Section 7 of GPL version 3, you are granted additional
-# permissions described in the GCC Runtime Library Exception, version
-# 3.1, as published by the Free Software Foundation.
-#
-# You should have received a copy of the GNU General Public License and
-# a copy of the GCC Runtime Library Exception along with this program;
-# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-# <http://www.gnu.org/licenses/>.
+# @copyright
+# Copyright (C) 2011
+# Intel Corporation
+#
+# @copyright
+# This file is part of the Intel Cilk Plus Library. This library is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# @copyright
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# @copyright
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# @copyright
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
-->
<!-- Navigation index tabs for HTML output -->
diff --git a/libcilkrts/runtime/doxygen.cfg b/libcilkrts/runtime/doxygen.cfg
index 698cbfcd328..12048bc77bd 100644
--- a/libcilkrts/runtime/doxygen.cfg
+++ b/libcilkrts/runtime/doxygen.cfg
@@ -1,27 +1,32 @@
# Doxyfile 1.7.4
-# Copyright (C) 2011
-# Intel Corporation
-#
-# This file is part of the Intel Cilk Plus Library. This library is free
-# software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# Under Section 7 of GPL version 3, you are granted additional
-# permissions described in the GCC Runtime Library Exception, version
-# 3.1, as published by the Free Software Foundation.
-#
-# You should have received a copy of the GNU General Public License and
-# a copy of the GCC Runtime Library Exception along with this program;
-# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-# <http://www.gnu.org/licenses/>.
+# @copyright
+# Copyright (C) 2011-2012
+# Intel Corporation
+#
+# @copyright
+# This file is part of the Intel Cilk Plus Library. This library is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# @copyright
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# @copyright
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# @copyright
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
# This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project.
@@ -48,7 +53,7 @@ DOXYFILE_ENCODING = UTF-8
# The PROJECT_NAME tag is a single word (or a sequence of words surrounded
# by quotes) that should identify the project.
-PROJECT_NAME = "Cilk Runtime"
+PROJECT_NAME = "Intel Cilk Plus Runtime"
# The PROJECT_NUMBER tag can be used to enter a project or revision number.
# This could be handy for archiving the generated documentation or
@@ -635,7 +640,9 @@ WARN_LOGFILE =
INPUT = ./ \
../include/internal/abi.h \
- ../include/cilk/cilk_api.h
+ ../include/cilk/cilk_api.h \
+ ../include/cilk/common.h \
+ ./readme.dox
# This tag can be used to specify the character encoding of the source files
@@ -1490,7 +1497,10 @@ PREDEFINED = _WIN32 \
CILK_API(t)=t \
CILK_ABI(t)=t \
CILK_ABI_THROWS(t)=t \
- CALLBACK=
+ CALLBACK= \
+ __CILKRTS_INLINE=inline \
+ __CILKRTS_ABI_VERSION=1 \
+ __cplusplus \
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
# this tag can be used to specify a list of macro names that should be expanded.
diff --git a/libcilkrts/runtime/except-gcc.cpp b/libcilkrts/runtime/except-gcc.cpp
index d577428e5a4..0d643c8d310 100644
--- a/libcilkrts/runtime/except-gcc.cpp
+++ b/libcilkrts/runtime/except-gcc.cpp
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "except-gcc.h"
@@ -34,6 +39,7 @@
#include "full_frame.h"
#include "scheduler.h"
#include "frame_malloc.h"
+#include "pedigrees.h"
#include <stdint.h>
#include <typeinfo>
@@ -159,8 +165,24 @@ __cilkrts_return_exception(__cilkrts_stack_frame *sf)
CILK_ASSERT(sf->flags & CILK_FRAME_DETACHED);
sf->flags &= ~CILK_FRAME_DETACHED;
+ /*
+ * If we are in replay mode, and a steal occurred during the recording
+ * phase, stall till a steal actually occurs.
+ */
+ replay_wait_for_steal_if_parent_was_stolen(w);
+
/* If this is to be an abnormal return, save the active exception. */
if (!__cilkrts_pop_tail(w)) {
+ /* Write a record to the replay log for an attempt to return to a
+ stolen parent. This must be done before the exception handler
+ invokes __cilkrts_leave_frame which will bump the pedigree so
+ the replay_wait_for_steal_if_parent_was_stolen() above will match on
+ replay */
+ replay_record_orphaned(w);
+
+ /* Now that the record/replay stuff is done, update the pedigree */
+ update_pedigree_on_leave_frame(w, sf);
+
/* Inline pop_frame; this may not be needed. */
w->current_stack_frame = sf->call_parent;
sf->call_parent = 0;
@@ -191,6 +213,10 @@ __cilkrts_return_exception(__cilkrts_stack_frame *sf)
the same stack and part of the same full frame. The caller is
cleaning up the Cilk frame during unwind and will reraise the
exception */
+
+ /* Now that the record/replay stuff is done, update the pedigree */
+ update_pedigree_on_leave_frame(w, sf);
+
#if DEBUG_EXCEPTIONS /* DEBUG ONLY */
{
__cxa_eh_globals *state = __cxa_get_globals();
@@ -247,7 +273,8 @@ NORETURN __cilkrts_c_sync_except (__cilkrts_worker *w, __cilkrts_stack_frame *sf
__cxa_eh_globals *state = __cxa_get_globals();
_Unwind_Exception *exc = (_Unwind_Exception *)sf->except_data;
- CILK_ASSERT (sf->flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING) == (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING));
+ CILK_ASSERT((sf->flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING)) ==
+ (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING));
sf->flags &= ~CILK_FRAME_EXCEPTING;
#if DEBUG_EXCEPTIONS
diff --git a/libcilkrts/runtime/except-gcc.h b/libcilkrts/runtime/except-gcc.h
index 14a174befee..fb2ae796d20 100644
--- a/libcilkrts/runtime/except-gcc.h
+++ b/libcilkrts/runtime/except-gcc.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
diff --git a/libcilkrts/runtime/except.h b/libcilkrts/runtime/except.h
index 94f5b1e3a24..c8739554e9b 100644
--- a/libcilkrts/runtime/except.h
+++ b/libcilkrts/runtime/except.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
diff --git a/libcilkrts/runtime/frame_malloc.c b/libcilkrts/runtime/frame_malloc.c
index 7f49b17cd8b..d9143034de9 100644
--- a/libcilkrts/runtime/frame_malloc.c
+++ b/libcilkrts/runtime/frame_malloc.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "frame_malloc.h"
@@ -31,7 +36,9 @@
#include "local_state.h"
#include "cilk_malloc.h"
+#ifndef __VXWORKS__
#include <memory.h>
+#endif
/* #define USE_MMAP 1 */
#if USE_MMAP
diff --git a/libcilkrts/runtime/frame_malloc.h b/libcilkrts/runtime/frame_malloc.h
index 95dffe46d4b..c414ae195c7 100644
--- a/libcilkrts/runtime/frame_malloc.h
+++ b/libcilkrts/runtime/frame_malloc.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
diff --git a/libcilkrts/runtime/full_frame.c b/libcilkrts/runtime/full_frame.c
index c6036f13acc..e51b9afab57 100644
--- a/libcilkrts/runtime/full_frame.c
+++ b/libcilkrts/runtime/full_frame.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2010-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
@@ -64,10 +69,8 @@ full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w,
ff->registration = 0;
#endif
ff->frame_size = 0;
-// ff->exception_sp_offset = 0;
-// ff->eh_kind = EH_NONE;
- ff->stack_self = 0;
- ff->stack_child = 0;
+ ff->fiber_self = 0;
+ ff->fiber_child = 0;
ff->sync_master = 0;
@@ -118,6 +121,26 @@ COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp)
__cilkrts_get_tls_worker()->self, ff, ff->sync_sp, sp);
}
+COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size)
+{
+ /* When resuming the parent after a steal, __cilkrts_take_stack is used to
+ * subtract the new stack pointer from the current stack pointer, storing
+ * the offset in ff->sync_sp. When resuming after a sync,
+ * __cilkrts_take_stack is used to subtract the new stack pointer from
+ * itself, leaving ff->sync_sp at zero (null). Although the pointers being
+ * subtracted are not part of the same contiguous chunk of memory, the
+ * flat memory model allows us to subtract them and get a useable offset.
+ *
+ * __cilkrts_adjust_stack() is used to deallocate a Variable Length Array
+ * by adding it's size to ff->sync_sp.
+ */
+ ff->sync_sp = ff->sync_sp + size;
+
+ DBGPRINTF("%d- __cilkrts_adjust_stack - adjust (+) sync "
+ "stack of full frame %p to %p (+ size: 0x%x)\n",
+ __cilkrts_get_tls_worker()->self, ff, ff->sync_sp, size);
+}
+
COMMON_PORTABLE
void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff)
{
diff --git a/libcilkrts/runtime/full_frame.h b/libcilkrts/runtime/full_frame.h
index cc2bde0a006..0ca75624490 100644
--- a/libcilkrts/runtime/full_frame.h
+++ b/libcilkrts/runtime/full_frame.h
@@ -2,51 +2,53 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#ifndef INCLUDED_FULL_FRAME_DOT_H
#define INCLUDED_FULL_FRAME_DOT_H
+
#include "rts-common.h"
#include "worker_mutex.h"
#include <cilk/common.h>
-
+#include <internal/abi.h>
#include <stddef.h>
+#include "cilk_fiber.h"
__CILKRTS_BEGIN_EXTERN_C
-// Forwarded declarations
-typedef struct __cilkrts_stack_frame __cilkrts_stack_frame;
-typedef struct __cilkrts_stack __cilkrts_stack;
-
/** Magic numbers for full_frame, used for debugging */
typedef unsigned long long ff_magic_t;
/* COMMON_SYSDEP */ struct pending_exception_info; /* opaque */
-/* COMMON_SYSDEP */ struct __cilkrts_stack; /* opaque */
/*************************************************************
Full frames
@@ -302,23 +304,23 @@ struct full_frame
ptrdiff_t frame_size;
/**
- * Allocated stacks that need to be freed. The stacks work
- * like a reducer. The leftmost frame may have stack_self
+ * Allocated fibers that need to be freed. The fibers work
+ * like a reducer. The leftmost frame may have @c fiber_self
* null and owner non-null.
*
* [local]
* TBD: verify exception code satisfies this requirement.
*/
- __cilkrts_stack *stack_self;
+ cilk_fiber *fiber_self;
/**
- * Allocated stacks that need to be freed. The stacks work
- * like a reducer. The leftmost frame may have stack_self
+ * Allocated fibers that need to be freed. The fibers work
+ * like a reducer. The leftmost frame may have @c fiber_self
* null and owner non-null.
*
* [self-locked]
*/
- __cilkrts_stack *stack_child;
+ cilk_fiber *fiber_child;
/**
* If the sync_master is set, this function can only be sync'd by the team
@@ -375,56 +377,62 @@ struct full_frame
*/
/**
- * Records the stack pointer within the 'sf' stack frame as the current stack
- * pointer at the point of suspending full frame 'ff'.
- *
- * Preconditions:
- * - ff->sync_sp must be either null or contain the result of a prior call to
- * __cilkrts_take_stack().
- * - If ff->sync_sp is not null, then SP(sf) must refer to the same stack as
- * the 'sp' argument to the prior call to __cilkrts_take_stack().
+ * @brief Records the stack pointer within the @c sf stack frame as the
+ * current stack pointer at the point of suspending full frame @c ff.
+ *
+ * @pre @c ff->sync_sp must be either null or contain the result of a prior call to
+ * @c __cilkrts_take_stack().
+ * @pre If @c ff->sync_sp is not null, then @c SP(sf) must refer to the same stack as
+ * the @c sp argument to the prior call to @c __cilkrts_take_stack().
*
- * Postconditions:
- * - If ff->sync_sp was null before the call, then ff->sync_sp will be set to
- * SP(sf).
- * - Otherwise, ff->sync_sp will be restored to the value it had just prior
- * to the last call to __cilkrts_take_stack(), except offset by any change
- * in the stack pointer between the call to __cilkrts_take_stack() and
- * this call to __cilkrts_put_stack().
+
+ * @post If @c ff->sync_sp was null before the call, then @c
+ * ff->sync_sp will be set to @c SP(sf).
+ * @post Otherwise, @c ff->sync_sp will be restored to the value it had just prior
+ * to the last call to @c __cilkrts_take_stack(), except offset by any change
+ * in the stack pointer between the call to @c __cilkrts_take_stack() and
+ * this call to @c __cilkrts_put_stack().
*
* @param ff The full frame that is being suspended.
- * @param sf The __cilkrts_stack_frame that is being suspended. The stack
+ * @param sf The @c __cilkrts_stack_frame that is being suspended. The stack
* pointer will be taken from the jmpbuf contained within this
- * __cilkrts_stack_frame.
+ * @c __cilkrts_stack_frame.
*/
COMMON_PORTABLE void __cilkrts_put_stack(full_frame *ff,
__cilkrts_stack_frame *sf);
/**
- * Records the stack pointer 'sp' as the stack pointer at the point of
- * resuming execution on full frame 'ff'. The value of 'sp' may be on a
- * different stack than the original value recorded for the stack pointer
- * using __cilkrts_put_stack().
+ * @brief Records the stack pointer @c sp as the stack pointer at the point of
+ * resuming execution on full frame @c ff.
+ *
+ * The value of @c sp may be on a different stack than the original
+ * value recorded for the stack pointer using __cilkrts_put_stack().
*
- * Precondition:
- * - ff->sync_sp must contain a value set by __cilkrts_put_stack().
+ * @pre @c ff->sync_sp must contain a value set by @c __cilkrts_put_stack().
*
- * Postcondition:
- * - ff->sync_sp contains an *integer* value used to compute a change in the
- * stack pointer upon the next call to __cilkrts_take_stack().
- * - If 'sp' equals ff->sync_sp, then ff->sync_sp is set to null.
+ * @post @c ff->sync_sp contains an *integer* value used to compute a change in the
+ * stack pointer upon the next call to @c __cilkrts_take_stack().
+ * @post If @c sp equals @c ff->sync_sp, then @c ff->sync_sp is set to null.
*
* @param ff The full frame that is being resumed.
* @param sp The stack pointer for the stack the function is being resumed on.
*/
COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp);
+/*
+ * @brief Adjust the stack for to deallocate a Variable Length Array
+ *
+ * @param ff The full frame that is being adjusted.
+ * @param size The size of the array being deallocated from the stack
+ */
+COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size);
+
/**
- * Allocates and initailizes a full_frame.
+ * @brief Allocates and initailizes a full_frame.
*
* @param w The memory for the full_frame will be allocated out of the
* worker's pool.
- * @param sf The __cilkrts_stack_frame which will be saved as the call_stack
+ * @param sf The @c __cilkrts_stack_frame which will be saved as the call_stack
* for this full_frame.
*
* @return The newly allocated and initialized full_frame.
@@ -434,7 +442,7 @@ full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w,
__cilkrts_stack_frame *sf);
/**
- * Deallocates a full_frame.
+ * @brief Deallocates a full_frame.
*
* @param w The memory for the full_frame will be returned to the worker's pool.
* @param ff The full_frame to be deallocated.
@@ -443,18 +451,18 @@ COMMON_PORTABLE
void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff);
/**
- * Performs sanity checks to check the integrity of a full_frame.
+ * @brief Performs sanity checks to check the integrity of a full_frame.
*
* @param ff The full_frame to be validated.
*/
COMMON_PORTABLE void validate_full_frame(full_frame *ff);
/**
- * Locks the mutex contained in a full_frame. The full_frame is validated
- * before the runtime attempts to lock it.
+ * @brief Locks the mutex contained in a full_frame.
+ *
+ * The full_frame is validated before the runtime attempts to lock it.
*
- * Postcondition:
- * - ff->lock will be owned by w.
+ * @post @c ff->lock will be owned by @c w.
*
* @param w The worker that will own the full_frame. If the runtime is
* collecting stats, the intervals will be attributed to the worker.
@@ -464,10 +472,9 @@ COMMON_PORTABLE void __cilkrts_frame_lock(__cilkrts_worker *w,
full_frame *ff);
/**
- * Unlocks the mutex contained in a full_frame.
+ * @brief Unlocks the mutex contained in a full_frame.
*
- * Precondition:
- * - ff->lock must must be owned by w.
+ * @pre @c ff->lock must must be owned by @c w.
*
* @param w The worker that currently owns the full_frame.
* @param ff The full_frame containing the mutex to be unlocked.
diff --git a/libcilkrts/runtime/global_state.cpp b/libcilkrts/runtime/global_state.cpp
index 8bdef33556c..d772c74d073 100644
--- a/libcilkrts/runtime/global_state.cpp
+++ b/libcilkrts/runtime/global_state.cpp
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "global_state.h"
@@ -32,6 +37,8 @@
#include "metacall_impl.h"
#include "stats.h"
#include "cilk/cilk_api.h"
+#include "cilk_malloc.h"
+#include "record-replay.h"
#include <algorithm> // For max()
#include <cstring>
@@ -180,7 +187,6 @@ template <typename INT_T, typename CHAR_T>
int store_int(INT_T *out, const CHAR_T *val, INT_T min, INT_T max)
{
errno = 0;
- char *end = 0;
long val_as_long = to_long(val);
if (val_as_long == 0 && errno != 0)
return __CILKRTS_SET_PARAM_INVALID;
@@ -271,7 +277,7 @@ int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value)
//
// Number of stacks we'll hold in the per-worker stack cache. Maximum
// value is 42. See __cilkrts_make_global_state for details.
- return store_int(&g->stack_cache_size, value, 0, 42);
+ return store_int(&g->fiber_pool_size, value, 0, 42);
}
else if (strmatch(param, s_shared_stacks))
{
@@ -280,7 +286,7 @@ int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value)
// Maximum number of stacks we'll hold in the global stack
// cache. Maximum value is 42. See __cilkrts_make_global_state for
// details.
- return store_int(&g->global_stack_cache_size, value, 0, 42);
+ return store_int(&g->global_fiber_pool_size, value, 0, 42);
}
else if (strmatch(param, s_nstacks))
{
@@ -293,7 +299,9 @@ int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value)
// Undocumented at this time, though there are plans to expose it.
// The current implentation is for Linux debugging only and is not
// robust enough for users.
- return store_int<long>(&g->max_stacks, value, 0, INT_MAX);
+ if (cilkg_singleton_ptr)
+ return __CILKRTS_SET_PARAM_LATE;
+ return store_int<unsigned>(&g->max_stacks, value, 0, INT_MAX);
}
else if (strmatch(param, s_stack_size))
{
@@ -356,6 +364,8 @@ global_state_t* cilkg_get_user_settable_values()
// multiple threads from initializing this data.
if (! cilkg_user_settable_values_initialized)
{
+ size_t len;
+
// Preserve stealing disabled since it may have been set by the
// debugger
int stealing_disabled = g->stealing_disabled;
@@ -377,12 +387,43 @@ global_state_t* cilkg_get_user_settable_values()
g->force_reduce = 0; // Default Off
g->P = hardware_cpu_count; // Defaults to hardware CPU count
g->max_user_workers = 0; // 0 unless set by user
- g->stack_cache_size = 7; // Arbitrary default
- g->global_stack_cache_size = 3; // Arbitrary default
- g->max_stacks = 0; // 0 == unlimited
+ g->fiber_pool_size = 7; // Arbitrary default
+
+ g->global_fiber_pool_size = 3 * 3* g->P; // Arbitrary default
+ // 3*P was the default size of the worker array (including
+ // space for extra user workers). This parameter was chosen
+ // to match previous versions of the runtime.
+
+ if (4 == sizeof(void *))
+ g->max_stacks = 1200; // Only 1GB on 32-bit machines
+ else
+ g->max_stacks = 2400; // 2GB on 64-bit machines
+
+ // If we have 2400 1MB stacks, that is 2 gb. If we reach this
+ // limit on a single-socket machine, we may have other
+ // problems. Is 2400 too small for large multicore machines?
+
+ // TBD(jsukha, 11/27/2012): I set this limit on stacks to be a
+ // value independent of P. When running on a Xeon Phi with
+ // small values of P, I recall seeing a few microbenchmarks
+ // (e.g., fib) where a limit of 10*P seemed to be
+ // unnecessarily slowing things down.
+ //
+ // That being said, the code has changed sufficiently that
+ // this observation may no longer be true.
+ //
+ // Note: in general, the worst-case number of stacks required
+ // for a Cilk computation with spawn depth "d" on P workers is
+ // O(Pd). Code with unbalanced recursion may run into issues
+ // with this stack usage.
+
g->max_steal_failures = 128; // TBD: depend on max_workers?
g->stack_size = 0; // 0 unless set by the user
+ // Assume no record or replay log for now
+ g->record_replay_file_name = NULL;
+ g->record_or_replay = RECORD_REPLAY_NONE; // set by user
+
if (always_force_reduce())
g->force_reduce = true;
else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_FORCE_REDUCE"))
@@ -414,6 +455,33 @@ global_state_t* cilkg_get_user_settable_values()
// total_workers must be computed now to support __cilkrts_get_total_workers
g->total_workers = g->P + calc_max_user_workers(g) - 1;
+#ifdef CILK_RECORD_REPLAY
+ // RecordReplay: See if we've been asked to replay a log
+ len = cilkos_getenv(envstr, 0, "CILK_REPLAY_LOG");
+ if (len > 0)
+ {
+ len += 1; // Allow for trailing NUL
+ g->record_or_replay = REPLAY_LOG;
+ g->record_replay_file_name = (char *)__cilkrts_malloc(len);
+ cilkos_getenv(g->record_replay_file_name, len, "CILK_REPLAY_LOG");
+ }
+
+ // RecordReplay: See if we've been asked to record a log
+ len = cilkos_getenv(envstr, 0, "CILK_RECORD_LOG");
+ if (len > 0)
+ {
+ if (RECORD_REPLAY_NONE != g->record_or_replay)
+ cilkos_warning("CILK_RECORD_LOG ignored since CILK_REPLAY_LOG is defined.\n");
+ else
+ {
+ len += 1; // Allow for trailing NUL
+ g->record_or_replay = RECORD_LOG;
+ g->record_replay_file_name = (char *)__cilkrts_malloc(len);
+ cilkos_getenv(g->record_replay_file_name, len, "CILK_RECORD_LOG");
+ }
+ }
+#endif
+
cilkg_user_settable_values_initialized = true;
}
@@ -439,8 +507,6 @@ global_state_t* cilkg_init_global_state()
// Get partially-initialized global state.
global_state_t* g = cilkg_get_user_settable_values();
- int i, max_workers;
-
if (g->max_stacks > 0) {
// nstacks is currently honored on non-Windows systems only.
@@ -461,12 +527,19 @@ global_state_t* cilkg_init_global_state()
// interaction with the local stack cache is specifically to help out
// MIC.
- g->stack_cache_size = 1; // One stack per worker cache.
+ // About max_stacks / P stacks, except we require at least 1
+ // per pool.
+ if (((int)g->max_stacks / g->P) < g->fiber_pool_size)
+ g->fiber_pool_size = g->max_stacks / g->P;
- if (g->max_stacks < g->P)
+ if (g->fiber_pool_size <= 0) {
+ g->fiber_pool_size = 1;
+ }
+
+ if ((int)g->max_stacks < g->P)
g->max_stacks = g->P;
- g->global_stack_cache_size = g->max_stacks;
+ g->global_fiber_pool_size = g->P * (g->fiber_pool_size+1);
}
// Number of bytes/address - validation for debugger integration
@@ -483,7 +556,6 @@ global_state_t* cilkg_init_global_state()
g->workers_running = 0;
g->ltqsize = 1024; /* FIXME */
- g->stacks = 0;
g->stack_size = cilkos_validate_stack_size(g->stack_size);
g->failure_to_allocate_stack = 0;
diff --git a/libcilkrts/runtime/global_state.h b/libcilkrts/runtime/global_state.h
index 8409f0161cf..2ee02a572e3 100644
--- a/libcilkrts/runtime/global_state.h
+++ b/libcilkrts/runtime/global_state.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -41,39 +46,23 @@
#include "frame_malloc.h"
#include "stats.h"
#include "bug.h"
+#include "cilk_fiber.h"
__CILKRTS_BEGIN_EXTERN_C
-/** @brief Abstract, per-strand stack (system dependent) */
-typedef struct __cilkrts_stack __cilkrts_stack;
-
/**
* Non-null place-holder for a stack handle that has no meaningful value.
*/
-#define PLACEHOLDER_STACK ((__cilkrts_stack *) -2)
+#define PLACEHOLDER_FIBER ((cilk_fiber *) -2)
/**
- * Temporary place holder to use during a provably good steal, before the real
- * stack handle is known. Differs from PLACEHOLDER_STACK in that this value
- * is used in the case where the stack *is* assigned a meaningful value, but
- * that meaningful value is not known (yet).
+ * States for record_or_replay
*/
-#define BIND_PROVABLY_GOOD_STACK ((__cilkrts_stack *) -1)
-
-/** @brief Data structure for a cache of stack handles */
-typedef struct __cilkrts_stack_cache {
- /** Mutex used to secure exclusive access to the cache */
- mutex lock;
-
- /** Max for cached stacks */
- unsigned int size;
-
- /** Count of cached stacks */
- unsigned int n;
-
- /** Array to hold cached stacks */
- __cilkrts_stack **stacks;
-} __cilkrts_stack_cache;
+enum record_replay_t {
+ RECORD_REPLAY_NONE,
+ RECORD_LOG,
+ REPLAY_LOG
+};
/**
* @brief The global state is a structure that is shared by all workers in
@@ -101,7 +90,7 @@ typedef struct __cilkrts_stack_cache {
* initialization and after deinitialization.
*/
-typedef /* COMMON_PORTABLE */ struct global_state_t {
+struct global_state_t { /* COMMON_PORTABLE */
/* Fields described as "(fixed)" should not be changed after
* initialization.
@@ -115,52 +104,60 @@ typedef /* COMMON_PORTABLE */ struct global_state_t {
* debugger integration library will need to be changed to match!!!
*************************************************************************/
- int addr_size; /**< Number of bytes for an address, used by debugger (fixed)*/
+ int addr_size; ///< Number of bytes for an address, used by debugger (fixed)
- int system_workers; /**< Number of system workers (fixed) */
+ int system_workers; ///< Number of system workers (fixed)
/**
- * Maximum number of user workers that can be bound to cilk workers.
+ * @brief USER SETTING: Maximum number of user workers that can be
+ * bound to cilk workers.
+ *
* 0 unless set by user. Call cilkg_calc_max_user_workers to get
* the value.
*/
- int max_user_workers; /* USER SETTING - max Q (fixed) */
+ int max_user_workers;
- int total_workers; /**< Total number of worker threads allocated (fixed) */
+ int total_workers; ///< Total number of worker threads allocated (fixed)
- int workers_running; /**< True when system workers have beens started */
+ int workers_running; ///< True when system workers have beens started */
- /** Set by debugger to disable stealing (fixed) */
+ /// Set by debugger to disable stealing (fixed)
int stealing_disabled;
- /** System-dependent part of the global state */
+ /// System-dependent part of the global state
struct global_sysdep_state *sysdep;
- /** Array of worker structures. */
+ /// Array of worker structures.
__cilkrts_worker **workers;
/******* END OF DEBUGGER-INTEGRATION FIELDS ***************/
- /** Number of frames in each worker's lazy task queue */
+ /// Number of frames in each worker's lazy task queue
__STDNS size_t ltqsize;
/**
+ * @brief USER SETTING: Force all possible reductions.
+ *
* TRUE if running a p-tool that requires reducers to call the reduce()
- * method even if no actual stealing occurs
+ * method even if no actual stealing occurs.
+ *
+ * When set to TRUE, runtime will simulate steals, forcing calls to the
+ * the reduce() methods of reducers.
+ *
*/
- int force_reduce; /* USER SETTING */
+ int force_reduce;
- /** Per-worker stack cache size */
- int stack_cache_size; /* USER SETTING */
+ /// USER SETTING: Per-worker fiber pool size
+ int fiber_pool_size;
- /** Global stack cache size */
- int global_stack_cache_size; /* USER SETTING */
+ /// USER SETTING: Global fiber pool size
+ int global_fiber_pool_size;
/**
- * TRUE when workers should exit scheduling loop so we can shut down the
- * runtime and free the global state.
+ * @brief TRUE when workers should exit scheduling loop so we can
+ * shut down the runtime and free the global state.
*
- * Note that work_done will be checked *FREQUENTLY* in the scheduling loop
+ * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop
* by idle workers. We need to ensure that it's not in a cache line which
* may be invalidated by other cores. The surrounding fields are either
* constant after initialization or not used until shutdown (stats) so we
@@ -168,66 +165,81 @@ typedef /* COMMON_PORTABLE */ struct global_state_t {
*/
volatile int work_done;
- int under_ptool; /**< True when running under a serial PIN tool */
-
- statistics stats; /**< Statistics on use of runtime */
+ int under_ptool; ///< True when running under a serial PIN tool
- /**
- * Number of allocated stacks. When the runtime is compiled with
- * profiling, workers use atomic operations to keep count. Otherwise
- * the counter is zero.
- */
- long stacks;
+ statistics stats; ///< Statistics on use of runtime
/**
- * Maximum number of stacks the runtime will allocate (apart from those
- * created by the OS when worker threads are created). If max_stacks <= 0,
- * there is no pre-defined maximum.
+ * @brief USER SETTING: Maximum number of stacks the runtime will
+ * allocate (apart from those created by the OS when worker
+ * threads are created).
+ *
+ * If max_stacks == 0,there is no pre-defined maximum.
*/
- long max_stacks; /* USER SETTING */
+ unsigned max_stacks;
- /** Size of each stack */
+ /// Size of each stack
size_t stack_size;
- /** Global cache for per-worker memory */
+ /// Global cache for per-worker memory
struct __cilkrts_frame_cache frame_malloc;
- /** Global cache of stacks */
- __cilkrts_stack_cache stack_cache;
+ /// Global fiber pool
+ cilk_fiber_pool fiber_pool;
/**
- * Track whether the runtime has failed to allocate a stack. This prevents
- * multiple warnings from being issued.
+ * @brief Track whether the runtime has failed to allocate a
+ * stack.
+ *
+ * Setting this flag prevents multiple warnings from being
+ * issued.
*/
int failure_to_allocate_stack;
/**
- * Buffer to force max_steal_failures to appear on a different cache line
- * from the previous member variables. This is because max_steal_failures
- * is read constantly and other modified values in the global state will
+ * @brief USER SETTING: indicate record or replay log.
+ * Set to NULL if not used in this run.
+ */
+ char *record_replay_file_name;
+
+ /**
+ * @brief Record/replay state.
+ * Valid states are:
+ * RECORD_REPLAY_NONE - Not recording or replaying a log
+ * RECORD_LOG - Recording a log for replay later
+ * REPLAY_LOG - Replay a log recorded earlier
+ */
+ enum record_replay_t record_or_replay;
+
+ /**
+ * @brief Buffer to force max_steal_failures to appear on a
+ * different cache line from the previous member variables.
+ *
+ * This padding is needed because max_steal_failures is read
+ * constantly and other modified values in the global state will
* cause thrashing.
*/
char cache_buf[64];
/**
- * Maximum number of times a thread should fail to steal before checking
- * if Cilk is shutting down.
+ * @brief Maximum number of times a thread should fail to steal
+ * before checking if Cilk is shutting down.
*/
unsigned int max_steal_failures;
- /** Pointer to scheduler entry point */
+ /// Pointer to scheduler entry point
void (*scheduler)(__cilkrts_worker *w);
/**
- * Buffer to force P and Q to appear on a different cache line from the
- * previous member variables.
+ * @brief Buffer to force P and Q to appear on a different cache
+ * line from the previous member variables.
*/
char cache_buf_2[64];
- int P; /**< USER SETTING: number of system workers + 1 (fixed) */
- int Q; /**< Number of user threads currently bound to workers */
-} global_state_t;
+ int P; ///< USER SETTING: number of system workers + 1 (fixed)
+ int Q; ///< Number of user threads currently bound to workers
+};
/**
* @brief Initialize the global state object. This method must both
diff --git a/libcilkrts/runtime/jmpbuf.c b/libcilkrts/runtime/jmpbuf.c
index 5b34636daba..6c472240c1d 100644
--- a/libcilkrts/runtime/jmpbuf.c
+++ b/libcilkrts/runtime/jmpbuf.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "jmpbuf.h"
diff --git a/libcilkrts/runtime/jmpbuf.h b/libcilkrts/runtime/jmpbuf.h
index 8d93915d6ca..5ea6c2e0c8d 100644
--- a/libcilkrts/runtime/jmpbuf.h
+++ b/libcilkrts/runtime/jmpbuf.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -44,33 +49,49 @@
#include <setjmp.h>
#if 0 /* defined CILK_USE_C_SETJMP && defined JB_RSP */
-#define SP(SF) (SF)->ctx[0].__jmpbuf[JB_RSP]
-#define FP(SF) (SF)->ctx[0].__jmpbuf[JB_RBP]
-#define PC(SF) (SF)->ctx[0].__jmpbuf[JB_PC]
+# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_RSP]
+# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_RBP]
+# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC]
#elif 0 /* defined CILK_USE_C_SETJMP && defined JB_SP */
-#define SP(SF) (SF)->ctx[0].__jmpbuf[JB_SP]
-#define FP(SF) (SF)->ctx[0].__jmpbuf[JB_BP]
-#define PC(SF) (SF)->ctx[0].__jmpbuf[JB_PC]
+# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_SP]
+# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_BP]
+# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC]
#elif defined _WIN64
-#define SP(SF) ((_JUMP_BUFFER*)(&(SF)->ctx))->Rsp
-#define FP(SF) ((_JUMP_BUFFER*)(&(SF)->ctx))->Rbp
-#define PC(SF) ((_JUMP_BUFFER*)(&(SF)->ctx))->Rip
+# define JMPBUF_SP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rsp
+# define JMPBUF_FP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rbp
+# define JMPBUF_PC(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rip
#elif defined _WIN32
-/** Fetch stack pointer from a __cilkrts_stack_frame */
-#define SP(SF) SF->ctx.Esp
-/** Fetch frame pointer from a __cilkrts_stack_frame */
-#define FP(SF) SF->ctx.Ebp
-/** Fetch program counter from a __cilkrts_stack_frame */
-#define PC(SF) SF->ctx.Eip
+ /** Fetch stack pointer from a __cilkrts_stack_frame */
+# define JMPBUF_SP(ctx) (ctx).Esp
+ /** Fetch frame pointer from a __cilkrts_stack_frame */
+# define JMPBUF_FP(ctx) (ctx).Ebp
+ /** Fetch program counter from a __cilkrts_stack_frame */
+# define JMPBUF_PC(ctx) (ctx).Eip
#else /* defined __GNUC__ || defined __ICC */
-/* word 0 is frame address
- word 1 is resume address
- word 2 is stack address */
-#define FP(SF) (SF)->ctx[0]
-#define PC(SF) (SF)->ctx[1]
-#define SP(SF) (SF)->ctx[2]
+ /* word 0 is frame address
+ * word 1 is resume address
+ * word 2 is stack address */
+# define JMPBUF_FP(ctx) (ctx)[0]
+# define JMPBUF_PC(ctx) (ctx)[1]
+# define JMPBUF_SP(ctx) (ctx)[2]
#endif
+/**
+ * @brief Get frame pointer from jump buffer in__cilkrts_stack_frame.
+ */
+#define FP(SF) JMPBUF_FP((SF)->ctx)
+
+/**
+ * @brief Get program counter from jump buffer in__cilkrts_stack_frame.
+ */
+#define PC(SF) JMPBUF_PC((SF)->ctx)
+
+/**
+ * @brief Get stack pointer from jump buffer in__cilkrts_stack_frame.
+ */
+#define SP(SF) JMPBUF_SP((SF)->ctx)
+
+
__CILKRTS_BEGIN_EXTERN_C
/**
diff --git a/libcilkrts/runtime/local_state.c b/libcilkrts/runtime/local_state.c
index a8cabff624f..bc835da6243 100644
--- a/libcilkrts/runtime/local_state.c
+++ b/libcilkrts/runtime/local_state.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2010-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
@@ -39,10 +44,20 @@ void run_scheduling_stack_fcn(__cilkrts_worker *w)
w->l->post_suspend = 0;
w->l->suspended_stack = 0;
+
+ // Conceptually, after clearing w->l->frame_ff,
+ // w no longer owns the full frame ff.
+ // The next time another (possibly different) worker takes
+ // ownership of ff will be at a provably_good_steal on ff.
+ w->l->frame_ff = NULL;
+
CILK_ASSERT(fcn);
CILK_ASSERT(ff2);
-
fcn(w, ff2, sf2);
+
+ // After we run the scheduling stack function, we shouldn't
+ // (still) not have a full frame.
+ CILK_ASSERT(NULL == w->l->frame_ff);
}
/* End local_state.c */
diff --git a/libcilkrts/runtime/local_state.h b/libcilkrts/runtime/local_state.h
index 1a1a9b2720f..92781e86883 100644
--- a/libcilkrts/runtime/local_state.h
+++ b/libcilkrts/runtime/local_state.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -40,9 +45,13 @@
#include <internal/abi.h>
#include "worker_mutex.h"
#include "global_state.h"
+#include "record-replay.h"
+#include "signal_node.h"
#include <setjmp.h>
#include <stddef.h>
+#include <stdio.h>
+
#ifndef _WIN32
# include <pthread.h>
@@ -51,18 +60,20 @@
__CILKRTS_BEGIN_EXTERN_C
/* Opaque types. */
-typedef struct signal_node_t signal_node_t;
+
struct full_frame;
struct free_list;
struct pending_exception_info;
+/// Opaque type for replay entry.
+typedef struct replay_entry_t replay_entry_t;
/**
- * Magic numbers for local_state, used for debugging
+ * @brief Magic numbers for local_state, used for debugging
*/
typedef unsigned long long ls_magic_t;
/**
- * Scheduling stack function: A function that is decided on the program stack,
+ * @brief Scheduling stack function: A function that is decided on the program stack,
* but that must be executed on the scheduling stack.
*/
typedef void (*scheduling_stack_fcn_t) (__cilkrts_worker *w,
@@ -70,7 +81,7 @@ typedef void (*scheduling_stack_fcn_t) (__cilkrts_worker *w,
__cilkrts_stack_frame *sf);
/**
- * Type of this worker.
+ * @brief Type of this worker.
**/
typedef enum cilk_worker_type
{
@@ -81,10 +92,12 @@ typedef enum cilk_worker_type
/**
- * The local_state structure contains additional OS-independent
+ * @brief The local_state structure contains additional OS-independent
* information that's associated with a worker, but doesn't need to be
- * visible to the compiler. No compiler-generated code should need to
- * know the layout of this structure.
+ * visible to the compiler.
+ *
+ * No compiler-generated code should need to know the layout of this
+ * structure.
*
* The fields of this struct can be classified as either local or
* shared.
@@ -111,8 +124,7 @@ typedef enum cilk_worker_type
* that are involved in synchronization protocols (i.e., the THE
* protocol).
*/
-/* COMMON_PORTABLE */
-typedef struct local_state
+struct local_state /* COMMON_PORTABLE */
{
/** This value should be in the first field in any local_state */
# define WORKER_MAGIC_0 ((ls_magic_t)0xe0831a4a940c60b8ULL)
@@ -175,19 +187,76 @@ typedef struct local_state
struct full_frame *next_frame_ff;
/**
+ * This is set iff this is a WORKER_USER and there has been a steal. It
+ * points to the first frame that was stolen since the team was last fully
+ * sync'd. Only this worker may continue past a sync in this function.
+ *
+ * This field is set by a thief for a victim that is a user
+ * thread, while holding the victim's lock.
+ * It can be cleared without a lock by the worker that will
+ * continue exuecting past the sync.
+ *
+ * [shared read/write]
+ */
+ struct full_frame *last_full_frame;
+
+ /**
+ * Team on which this worker is a participant. When a user worker enters,
+ * its team is its own worker struct and it can never change teams. When a
+ * system worker steals, it adopts the team of its victim.
+ *
+ * When a system worker w steals, it reads victim->l->team and
+ * joins this team. w->l->team is constant until the next time w
+ * returns control to the runtime.
+ * We must acquire the worker lock to change w->l->team.
+ *
+ * @note This field is 64-byte aligned because it is the first in
+ * the group of shared read-only fields. We want this group to
+ * fall on a different cache line from the previous group, which
+ * is shared read-write.
+ *
+ * [shared read-only]
+ */
+ __attribute__((aligned(64)))
+ __cilkrts_worker *team;
+
+ /**
+ * Type of this worker
+ *
+ * This field changes only when a worker binds or unbinds.
+ * Otherwise, the field is read-only while the worker is bound.
+ *
+ * [shared read-only]
+ */
+ cilk_worker_type type;
+
+ /**
* Lazy task queue of this worker - an array of pointers to stack frames.
*
* Read-only because deques are a fixed size in the current
* implementation.
+ *
+ * @note This field is 64-byte aligned because it is the first in
+ * the group of local fields. We want this group to fall on a
+ * different cache line from the previous group, which is shared
+ * read-only.
+ *
* [local read-only]
*/
+ __attribute__((aligned(64)))
__cilkrts_stack_frame **ltq;
/**
- * Stacks waiting to be reused
+ * Pool of fibers waiting to be reused.
* [local read/write]
*/
- __cilkrts_stack_cache stack_cache;
+ cilk_fiber_pool fiber_pool;
+
+ /**
+ * The fiber for the scheduling stacks.
+ * [local read/write]
+ */
+ cilk_fiber* scheduling_fiber;
/**
* Saved pointer to the leaf node in thread-local storage, when a
@@ -207,24 +276,6 @@ typedef struct local_state
unsigned rand_seed;
/**
- * Type of this worker
- *
- * This field changes only when a worker binds or unbinds.
- * Otherwise, the field is read-only while the worker is bound.
- *
- * [shared read-only]
- */
- cilk_worker_type type;
-
- /**
- * jmp_buf used to jump back into the runtime system after an
- * unsuccessful steal check or sync.
- *
- * [local read/write]
- */
- jmp_buf env;
-
- /**
* Function to execute after transferring onto the scheduling stack.
*
* [local read/write]
@@ -240,7 +291,7 @@ typedef struct local_state
__cilkrts_stack_frame *suspended_stack;
/**
- * __cilkrts_stack that should be freed after returning from a
+ * cilk_fiber that should be freed after returning from a
* spawn with a stolen parent or after stalling at a sync.
* We calculate the stack to free when executing a reduction on
@@ -252,7 +303,7 @@ typedef struct local_state
*
* [local read/write]
*/
- __cilkrts_stack* stack_to_free;
+ cilk_fiber* fiber_to_free;
/**
* Saved exception object for an exception that is being passed to
@@ -263,14 +314,6 @@ typedef struct local_state
struct pending_exception_info *pending_exception;
/**
- * Place to save return address so we can report it to Inspector
- *
- * Used only by Windows.
- * [local read/write]
- */
- void *sync_return_address;
-
- /**
* Buckets for the memory allocator
*
* [local read/write]
@@ -290,7 +333,7 @@ typedef struct local_state
* Useful only when CILK_PROFIlE is compiled in.
* [local read/write]
*/
- statistics stats;
+ statistics* stats;
/**
* Count indicates number of failures since last successful steal. This is
@@ -301,63 +344,39 @@ typedef struct local_state
unsigned int steal_failure_count;
/**
- * Team on which this worker is a participant. When a user worker enters,
- * its team is its own worker struct and it can never change teams. When a
- * system worker steals, it adopts the team of its victim.
- *
- * When a system worker w steals, it reads victim->l->team and
- * joins this team. w->l->team is constant until the next time w
- * returns control to the runtime.
- * We must acquire the worker lock to change w->l->team.
- *
- * [shared read-only]
- */
- __cilkrts_worker *team;
-
- /**
- * This is set iff this is a WORKER_USER and there has been a steal. It
- * points to the first frame that was stolen since the team was last fully
- * sync'd. Only this worker may continue past a sync in this function.
- *
- * This field is set by a thief for a victim that is a user
- * thread, while holding the victim's lock.
- * It can be cleared without a lock by the worker that will
- * continue exuecting past the sync.
+ * 1 if work was stolen from another worker. When true, this will flag
+ * setup_for_execution_pedigree to increment the pedigree when we resume
+ * execution to match the increment that would have been done on a return
+ * from a spawn helper.
*
- * [shared read/write]
+ * [local read/write]
*/
- struct full_frame *last_full_frame;
+ int work_stolen;
/**
- * NULL for WORKER_SYSTEMs (they are created on their scheduling stacks, so
- * they already know where their scheduling stacks are). A WORKER_USER can
- * jump to this stack when it returns to a stolen parent and wants to begin
- * stealing.
+ * File pointer for record or replay
+ * Does FILE * work on Windows?
+ * During record, the file will be opened in write-only mode.
+ * During replay, the file will be opened in read-only mode.
*
* [local read/write]
*/
- void *scheduler_stack;
+ FILE *record_replay_fptr;
/**
- * 0 if the user thread has not yet been imported. 1 if the user thread
- * has been imported. \"Imported\" means the user thread has returned to a
- * stolen parent and a scheduling stack or fiber has been created for it.
- * Ignored for system workers.
+ * Root of array of replay entries - NULL if we're not replaying a log
*
* [local read/write]
*/
- int user_thread_imported;
+ replay_entry_t *replay_list_root;
/**
- * 1 if work was stolen from another worker. When true, this will flag
- * setup_for_execution_pedigree to increment the pedigree when we resume
- * execution to match the increment that would have been done on a return
- * from a spawn helper.
+ * Current replay entry - NULL if we're not replaying a log
*
* [local read/write]
*/
- int work_stolen;
-
+ replay_entry_t *replay_list_entry;
+
/**
* Separate the signal_node from other things in the local_state by the
* sizeof a cache line for performance reasons.
@@ -383,7 +402,7 @@ typedef struct local_state
* [shared read-only]
*/
ls_magic_t worker_magic_1;
-} local_state;
+};
/**
* Perform cleanup according to the function set before the longjmp().
diff --git a/libcilkrts/runtime/metacall_impl.c b/libcilkrts/runtime/metacall_impl.c
index ae311dc103f..65a2aa02890 100644
--- a/libcilkrts/runtime/metacall_impl.c
+++ b/libcilkrts/runtime/metacall_impl.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "metacall_impl.h"
@@ -32,12 +37,12 @@ NOINLINE
CILK_API_VOID
__cilkrts_metacall(unsigned int tool, unsigned int code, void *data)
{
+#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
// The metacall type, code and data are packed together into a single
// struct which will be interpreted by the tool. This function is the
// one and only use of a "cilkscreen_metacall" annotation
metacall_data_t d = { tool, code, data };
-#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
// Note that Inspector uses probe mode, and is implementing the metacall
// interface to force the runtime to run with a single worker. So
// __cilkrts_metacall must use __notify_intrinsic instead of
diff --git a/libcilkrts/runtime/metacall_impl.h b/libcilkrts/runtime/metacall_impl.h
index d68645fe218..07917fddb52 100644
--- a/libcilkrts/runtime/metacall_impl.h
+++ b/libcilkrts/runtime/metacall_impl.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2010-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
@@ -91,8 +96,8 @@ int __cilkrts_running_under_sequential_ptool(void);
/**
* Notify Cilkscreen of the extent of the stack.
*
- * @param in begin Start (low address) of stack
- * @param in end One past high address of stack
+ * @param[in] begin Start (low address) of stack
+ * @param[in] end One past high address of stack
*/
void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end);
diff --git a/libcilkrts/runtime/os-unix.c b/libcilkrts/runtime/os-unix.c
index 3fa50c88d89..9a8543a16ef 100644
--- a/libcilkrts/runtime/os-unix.c
+++ b/libcilkrts/runtime/os-unix.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#ifdef __linux__
@@ -45,8 +50,13 @@
# include <sys/sysctl.h>
// Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output
#elif defined __FreeBSD__
+// No additional include files
#elif defined __CYGWIN__
// Cygwin on Windows - no additional include files
+#elif defined __VXWORKS__
+# include <vxWorks.h>
+# include <vxCpuLib.h>
+# include <taskLib.h>
#else
# error "Unsupported OS"
#endif
@@ -74,7 +84,12 @@
#if !defined CILK_WORKER_TLS
static int cilk_keys_defined;
-static pthread_key_t worker_key, reducer_key, tbb_interop_key, pedigree_leaf_key;
+static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key;
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+static pthread_key_t fiber_key;
+#endif
+
static void *serial_worker;
@@ -88,8 +103,7 @@ static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr)
// Assert that we have either one or two nodes
// left in the pedigree chain.
// If we have more, then something is going wrong...
- CILK_ASSERT((!pedigree_tls->parent) ||
- (pedigree_tls->parent && (!pedigree_tls->parent->parent)));
+ CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent);
__cilkrts_free(pedigree_tls);
}
}
@@ -102,19 +116,29 @@ void __cilkrts_init_tls_variables(void)
on cilk_keys_defined. */
if (cilk_keys_defined)
return;
- status = pthread_key_create(&worker_key, 0);
- CILK_ASSERT (status == 0);
- status = pthread_key_create(&reducer_key, 0);
- CILK_ASSERT (status == 0);
- status = pthread_key_create(&tbb_interop_key, 0);
+ status = pthread_key_create(&worker_key, NULL);
CILK_ASSERT (status == 0);
status = pthread_key_create(&pedigree_leaf_key,
__cilkrts_pedigree_leaf_destructor);
CILK_ASSERT (status == 0);
+ status = pthread_key_create(&tbb_interop_key, NULL);
+ CILK_ASSERT (status == 0);
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+ status = pthread_key_create(&fiber_key, NULL);
+ CILK_ASSERT (status == 0);
+#endif
cilk_keys_defined = 1;
return;
}
+COMMON_SYSDEP
+void* cilkos_get_current_thread_id(void)
+{
+ return (void*)pthread_self();
+}
+
+
CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker()
{
if (__builtin_expect(cilk_keys_defined, 1))
@@ -129,14 +153,6 @@ CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast()
return (__cilkrts_worker *)pthread_getspecific(worker_key);
}
-COMMON_SYSDEP struct cilkred_map *__cilkrts_get_tls_reducer(void)
-{
- if (__builtin_expect(cilk_keys_defined, 1))
- return (struct cilkred_map *)pthread_getspecific(reducer_key);
- else
- return 0;
-}
-
COMMON_SYSDEP
__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void)
{
@@ -188,6 +204,17 @@ __cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new)
return pedigree_tls;
}
+#if SUPPORT_GET_CURRENT_FIBER > 0
+COMMON_SYSDEP
+cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void)
+{
+ if (__builtin_expect(cilk_keys_defined, 1))
+ return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key);
+ else
+ return NULL;
+}
+#endif
+
COMMON_SYSDEP
void __cilkrts_set_tls_worker(__cilkrts_worker *w)
{
@@ -203,11 +230,12 @@ void __cilkrts_set_tls_worker(__cilkrts_worker *w)
}
}
-COMMON_SYSDEP void __cilkrts_set_tls_reducer(struct cilkred_map *r)
+COMMON_SYSDEP
+void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
{
if (__builtin_expect(cilk_keys_defined, 1)) {
int status;
- status = pthread_setspecific(reducer_key, r);
+ status = pthread_setspecific(tbb_interop_key, t);
CILK_ASSERT (status == 0);
return;
}
@@ -215,29 +243,30 @@ COMMON_SYSDEP void __cilkrts_set_tls_reducer(struct cilkred_map *r)
}
COMMON_SYSDEP
-void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t)
+void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
{
if (__builtin_expect(cilk_keys_defined, 1)) {
int status;
- status = pthread_setspecific(tbb_interop_key, t);
+ status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
CILK_ASSERT (status == 0);
return;
}
abort();
}
-
+#if SUPPORT_GET_CURRENT_FIBER > 0
COMMON_SYSDEP
-void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf)
+void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber)
{
if (__builtin_expect(cilk_keys_defined, 1)) {
int status;
- status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf);
+ status = pthread_setspecific(fiber_key, fiber);
CILK_ASSERT (status == 0);
return;
}
abort();
}
+#endif
#else
void __cilkrts_init_tls_variables(void)
@@ -245,7 +274,7 @@ void __cilkrts_init_tls_variables(void)
}
#endif
-#if defined __linux__
+#if defined (__linux__) && ! defined(ANDROID)
/*
* Get the thread id, rather than the pid. In the case of MIC offload, it's
* possible that we have multiple threads entering Cilk, and each has a
@@ -312,7 +341,14 @@ static int linux_get_affinity_count (int tid)
COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
{
-#if defined __linux__
+#if defined ANDROID
+ return sysconf (_SC_NPROCESSORS_ONLN);
+#elif defined __MIC__
+ /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial
+ /// on KNC. Also, ignore the last core.
+ int P = sysconf (_SC_NPROCESSORS_ONLN);
+ return P/2 - 2;
+#elif defined __linux__
int affinity_count = linux_get_affinity_count(linux_gettid());
return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN);
@@ -331,6 +367,8 @@ COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void)
return ncores;
// Just get the number of processors
// return sysconf(_SC_NPROCESSORS_ONLN);
+#elif defined __VXWORKS__
+ return __builtin_popcount( vxCpuEnabledGet() );
#else
#error "Unknown architecture"
#endif
@@ -360,7 +398,7 @@ COMMON_SYSDEP void __cilkrts_short_pause(void)
#elif defined __i386__ || defined __x86_64
__asm__("pause");
#else
-# warning __cilkrts_short_pause undefined
+# warning __cilkrts_short_pause empty
#endif
}
@@ -377,12 +415,16 @@ COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x)
COMMON_SYSDEP void __cilkrts_sleep(void)
{
+#ifdef __VXWORKS__
+ taskDelay(1);
+#else
usleep(1);
+#endif
}
COMMON_SYSDEP void __cilkrts_yield(void)
{
-#if __APPLE__ || __FreeBSD__
+#if __APPLE__ || __FreeBSD__ || __VXWORKS__
// On MacOS, call sched_yield to yield quantum. I'm not sure why we
// don't do this on Linux also.
sched_yield();
@@ -393,6 +435,10 @@ COMMON_SYSDEP void __cilkrts_yield(void)
// giving up the processor and latency starting up when work becomes
// available
_mm_delay_32(1024);
+#elif defined(ANDROID)
+ // On Android, call sched_yield to yield quantum. I'm not sure why we
+ // don't do this on Linux also.
+ sched_yield();
#else
// On Linux, call pthread_yield (which in turn will call sched_yield)
// to yield quantum.
@@ -488,4 +534,9 @@ size_t cilkos_validate_stack_size(size_t specified_stack_size) {
return specified_stack_size;
}
+long cilkos_atomic_add(volatile long* p, long x)
+{
+ return __sync_add_and_fetch(p, x);
+}
+
/* End os-unix.c */
diff --git a/libcilkrts/runtime/os.h b/libcilkrts/runtime/os.h
index 192b21a3e5e..9630de02b02 100644
--- a/libcilkrts/runtime/os.h
+++ b/libcilkrts/runtime/os.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -37,7 +42,8 @@
#define INCLUDED_OS_DOT_H
#include "rts-common.h"
-#include <cilk/common.h>
+#include "cilk/common.h"
+#include "cilk-tbb-interop.h"
#ifdef __cplusplus
# include <cstddef>
@@ -45,21 +51,8 @@
# include <stddef.h>
#endif
-// #ifndef _WIN32
-// # include <pthread.h> // For pthread_key_t
-// #endif
-
-// Forward declarations
-typedef struct __cilk_tbb_stack_op_thunk __cilk_tbb_stack_op_thunk;
-
__CILKRTS_BEGIN_EXTERN_C
-#ifdef _WIN32
-typedef unsigned cilkos_thread_id_t;
-#else
-typedef void* cilkos_thread_id_t;
-#endif
-
// /* Thread-local storage */
// #ifdef _WIN32
@@ -74,27 +67,28 @@ typedef void* cilkos_thread_id_t;
/* The RTS assumes that some thread-local state exists that stores the
worker and reducer map currently associated with a thread. These routines
manipulate this state. */
-typedef struct __cilkrts_worker __cilkrts_worker;
-typedef struct cilkred_map cilkred_map;
-typedef struct __cilkrts_pedigree __cilkrts_pedigree;
+/** @brief Thread-local state for cilk fibers. */
+typedef struct cilk_fiber_sysdep cilk_fiber_sysdep;
+
+/** @brief Initialize all TLS variables for Cilk. */
COMMON_SYSDEP void __cilkrts_init_tls_variables(void);
+/** @brief Set worker struct in TLS. */
COMMON_SYSDEP
void __cilkrts_set_tls_worker(__cilkrts_worker *w) cilk_nothrow;
-/* Likewise for reducer maps */
-COMMON_SYSDEP cilkred_map *__cilkrts_get_tls_reducer(void) cilk_nothrow;
-
-COMMON_SYSDEP void __cilkrts_set_tls_reducer(cilkred_map *) cilk_nothrow;
-
-/* Ditto for TBB-interop structures. */
+/** @brief Get stack_op for TBB-interop structures from TLS. */
COMMON_SYSDEP
__cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void);
+
+/** @brief Set stack_op for TBB-interop structures in TLS. */
COMMON_SYSDEP
void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t);
/**
+ * @brief Get the pointer to the pedigree leaf node from TLS.
+ *
* Function to get a pointer to the thread's pedigree leaf node. This
* pointer can be NULL.
*/
@@ -102,24 +96,53 @@ COMMON_SYSDEP
__cilkrts_pedigree * __cilkrts_get_tls_pedigree_leaf(int create_new);
/**
- * Set the pointer to the pedigree leaf node.
+ * @brief Sets the pointer to the pedigree leaf node in TLS.
*
* If the previous pointer value was not NULL, it is the caller's
* responsibility to ensure that previous pointer value is saved and
* freed.
+ *
+ * @param pedigree_leaf The leaf node to store into TLS.
*/
COMMON_SYSDEP
void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf);
-/* Return number of CPUs supported by this hardware, using whatever definition
+
+#if SUPPORT_GET_CURRENT_FIBER > 0
+/**
+ * @brief Get the cilk_fiber from TLS.
+ */
+COMMON_SYSDEP
+cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void);
+
+/**
+ * @brief Set the cilk_fiber in TLS.
+ *
+ * @param fiber The fiber to store into TLS.
+ */
+COMMON_SYSDEP
+void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber);
+#endif
+
+/**
+ * @brief Function for returning the current thread id.
+ * @warning This function is useful for debugging purposes only.
+ */
+COMMON_SYSDEP
+void* cilkos_get_current_thread_id(void);
+
+/** @brief Return number of CPUs supported by this hardware, using whatever definition
of CPU is considered appropriate. */
COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void);
-/* timer support */
+/** @brief Get current value of timer */
COMMON_SYSDEP unsigned long long __cilkrts_getticks(void);
/* Machine instructions */
+
+/// Stall execution for a few cycles.
COMMON_SYSDEP void __cilkrts_short_pause(void);
+/// Wrapper for xchg instruction
COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x);
/* gcc before 4.4 does not implement __sync_synchronize properly */
@@ -153,51 +176,67 @@ COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x);
// # pragma intrinsic(_ReadWriteBarrier)
// # define __cilkrts_fence() _ReadWriteBarrier()
#else
-COMMON_SYSDEP void __cilkrts_fence(void);
+COMMON_SYSDEP void __cilkrts_fence(void); ///< MFENCE instruction
#endif
-COMMON_SYSDEP void __cilkrts_sleep(void); /* Sleep briefly */
-COMMON_SYSDEP void __cilkrts_yield(void); /* Yield quantum */
+COMMON_SYSDEP void __cilkrts_sleep(void); ///< Sleep briefly
+COMMON_SYSDEP void __cilkrts_yield(void); ///< Yield quantum
-/*
- * Gets environment variable 'varname' and copy its value into 'value'.
+/**
+ * @brief Gets environment variable 'varname' and copy its value into 'value'.
+ *
* If the entire value, including the null terminator fits into 'vallen'
* bytes, then returns the length of the value excluding the null. Otherwise,
* leaves the contents of 'value' undefined and returns the number of
* characters needed to store the environment variable's value, *including*
* the null terminator.
+ *
+ * @param value Buffer to store value.
+ * @param vallen Length of value buffer
+ * @param varname Name of the environment variable.
+ * @return Length of value buffer (excluding the null).
*/
COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen,
const char* varname);
-/*
- * Unrecoverable error: Print an error message and abort execution.
+/**
+ * @brief Unrecoverable error: Print an error message and abort execution.
*/
COMMON_SYSDEP void cilkos_error(const char *fmt, ...);
-/*
- * Print a warning message and return.
+/**
+ * @brief Print a warning message and return.
*/
COMMON_SYSDEP void cilkos_warning(const char *fmt, ...);
-/*
- * Convert the user's specified stack size into a "reasonable" value
- * for the current OS.
+/**
+ * @brief Convert the user's specified stack size into a "reasonable"
+ * value for the current OS.
+ *
+ * @param specified_stack_size User-specified stack size.
+ * @return New stack size value, modified for the OS.
*/
COMMON_SYSDEP size_t cilkos_validate_stack_size(size_t specified_stack_size);
-#ifdef _WIN32
-/*
- * Windows-only low-level functions for processor groups.
+/**
+ * @brief Atomic addition: computes *p += x.
+ *
+ * @param p Pointer to value to update
+ * @param x Value of x.
*/
+COMMON_SYSDEP long cilkos_atomic_add(volatile long* p, long x);
+#ifdef _WIN32
+
+/**
+ * @brief Windows-only low-level functions for processor groups.
+ */
typedef struct _GROUP_AFFINITY GROUP_AFFINITY;
-/*
- * init_processor_group_function_ptrs
- *
- * Probe the executing OS to see if it supports processor groups. These
- * functions are expected to be available in Windows 7 or later.
+/**
+ * @brief Probe the executing OS to see if it supports processor
+ * groups. These functions are expected to be available in Windows 7
+ * or later.
*/
void win_init_processor_groups(void);
@@ -208,8 +247,7 @@ int win_set_thread_group_affinity(/*HANDLE*/ void* hThread,
GROUP_AFFINITY* PreviousGroupAffinity);
/**
- * This method should be called to clean up any state it allocated in
- * TLS.
+ * @brief Cleans up any state allocated in TLS.
*
* Only defined for Windows because Linux calls destructors for each
* thread-local variable.
diff --git a/libcilkrts/runtime/os_mutex-unix.c b/libcilkrts/runtime/os_mutex-unix.c
index fce65c981ea..fe99cffc70a 100644
--- a/libcilkrts/runtime/os_mutex-unix.c
+++ b/libcilkrts/runtime/os_mutex-unix.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "os_mutex.h"
@@ -96,7 +101,6 @@ struct os_mutex *__cilkrts_os_mutex_create(void)
void __cilkrts_os_mutex_lock(struct os_mutex *p)
{
int status;
-
status = pthread_mutex_lock (&p->mutex);
ITT_SYNC_ACQUIRED(p);
if (__builtin_expect(status, 0) == 0)
@@ -109,20 +113,16 @@ void __cilkrts_os_mutex_lock(struct os_mutex *p)
status, p);
}
-#if 0
int __cilkrts_os_mutex_trylock(struct os_mutex *p)
{
int status;
-
status = pthread_mutex_trylock (&p->mutex);
return (status == 0);
}
-#endif
void __cilkrts_os_mutex_unlock(struct os_mutex *p)
{
int status;
-
ITT_SYNC_RELEASING(p);
status = pthread_mutex_unlock (&p->mutex);
CILK_ASSERT(status == 0);
diff --git a/libcilkrts/runtime/os_mutex.h b/libcilkrts/runtime/os_mutex.h
index 154fcd0b9cf..80f0ebc5725 100644
--- a/libcilkrts/runtime/os_mutex.h
+++ b/libcilkrts/runtime/os_mutex.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -42,7 +47,9 @@
__CILKRTS_BEGIN_EXTERN_C
+/// Opaque type
typedef struct os_mutex os_mutex;
+
/**
* Allocate and initialize an os_mutex
*
@@ -57,7 +64,14 @@ COMMON_SYSDEP os_mutex* __cilkrts_os_mutex_create(void);
*/
COMMON_SYSDEP void __cilkrts_os_mutex_lock(os_mutex *m);
-/*COMMON_SYSDEP int __cilkrts_os_mutex_trylock(os_mutex *m);*/
+/**
+ * Try to acquire the os_mutex.
+ *
+ * @param m The os_mutex to try to acquire
+ * @return 0 if the lock acquire failed
+ * @return nonzero if the lock was acquired
+ */
+COMMON_SYSDEP int __cilkrts_os_mutex_trylock(os_mutex *m);
/**
* Release the os_mutex
diff --git a/libcilkrts/runtime/pedigrees.c b/libcilkrts/runtime/pedigrees.c
index 4a66b4e9327..5d00f9aade3 100644
--- a/libcilkrts/runtime/pedigrees.c
+++ b/libcilkrts/runtime/pedigrees.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2007-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2007-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
diff --git a/libcilkrts/runtime/pedigrees.h b/libcilkrts/runtime/pedigrees.h
index 8b12e650145..a38d2a97a6d 100644
--- a/libcilkrts/runtime/pedigrees.h
+++ b/libcilkrts/runtime/pedigrees.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#ifndef INCLUDED_PEDIGREES_DOT_H
diff --git a/libcilkrts/runtime/record-replay.cpp b/libcilkrts/runtime/record-replay.cpp
new file mode 100644
index 00000000000..1c0ada6d13e
--- /dev/null
+++ b/libcilkrts/runtime/record-replay.cpp
@@ -0,0 +1,765 @@
+/* record-replay.cpp -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ **************************************************************************/
+
+/*
+ * Implementation of the record/replay functionality for Cilk Plus
+ */
+
+#include <cstring>
+#include <vector>
+#include <stdlib.h>
+
+// clang is really strict about printf formats, so use the annoying integer
+// printf macros. Unfortunately they're not avaiable on Windows
+#ifdef _WIN32
+#define PRIu64 "llu"
+#else
+#define __STDC_FORMAT_MACROS 1
+#include <inttypes.h>
+#endif
+
+#include "record-replay.h"
+#include "bug.h"
+#include "internal/abi.h"
+#include "local_state.h"
+#include "full_frame.h"
+#include "global_state.h"
+#include "cilk_malloc.h"
+#include "os.h" // for cilkos_error()
+
+#if RECORD_ON_REPLAY
+#pragma message ("*** Record on Replay is enabled!")
+#endif
+
+// Defined to write sequence number to the logs. Note that you cannot
+// diff logs with sequence numbers because the numbers may increment in
+// different orders.
+//#define INCLUDE_SEQUENCE_NUMBER 1
+
+const int PED_VERSION = 1; // Log recording version
+
+// Log types
+enum ped_type_t
+{
+ ped_type_unknown,
+ ped_type_steal,
+ ped_type_sync,
+ ped_type_orphaned,
+ ped_type_last // Flags end of the list
+};
+
+// Log type strings
+#define PED_TYPE_STR_STEAL "Steal"
+#define PED_TYPE_STR_SYNC "Sync"
+#define PED_TYPE_STR_WORKERS "Workers"
+#define PED_TYPE_STR_ORPHANED "Orphaned"
+
+#define PED_TYPE_SIZE 16 // Buffer size for the type of pedigree. Must
+ // hold largest pedigree record type string.
+#define PEDIGREE_BUFF_SIZE 512 // Buffer size for the string representation
+ // of a pedigree.
+
+/**
+ * Data we store for a replay log entry
+ */
+typedef struct replay_entry_t
+{
+ uint64_t *m_reverse_pedigree; /**< Reverse pedigree for replay log entry */
+ ped_type_t m_type; /**< Type of replay log entry */
+ int16_t m_pedigree_len; /**< Number of terms in reverse pedigree */
+ int16_t m_value; /**< Victim for STEALs, 0 if matching steal found for ORPHANs */
+
+ /**
+ * Load data read from the log into the entry
+ */
+ bool load(const char *type, const char *pedigee_str, int32_t value1, int32_t value2)
+ {
+ // Convert the type into an enum
+ if (0 == strcmp(type, PED_TYPE_STR_STEAL))
+ {
+ m_type = ped_type_steal;
+ m_value = (int16_t)value1; // Victim
+ }
+ else
+ {
+ m_value = -1; // Victim not valid
+ if (0 == strcmp(type, PED_TYPE_STR_SYNC))
+ m_type = ped_type_sync;
+ else if (0 == strcmp(type, PED_TYPE_STR_ORPHANED))
+ m_type = ped_type_orphaned;
+ else
+ {
+ m_type = ped_type_unknown;
+ return false;
+ }
+ }
+
+ // Parse the pedigree
+ m_pedigree_len = 0;
+
+ const char *p = pedigee_str;
+ char *end;
+
+ uint64_t temp_pedigree[PEDIGREE_BUFF_SIZE/2];
+
+ while(1)
+ {
+ temp_pedigree[m_pedigree_len++] = (uint64_t)strtol(p, &end, 10);
+ if ('\0' == *end)
+ break;
+ p = end + 1;
+ }
+
+ // Allocate memory to hold the pedigree.
+ // Copy the pedigree in reverse order since that's the order we'll
+ // traverse it
+ m_reverse_pedigree =
+ (uint64_t *)__cilkrts_malloc(sizeof(int64_t) * m_pedigree_len);
+ for (int n = 0; n < m_pedigree_len; n++)
+ m_reverse_pedigree[n] = temp_pedigree[(m_pedigree_len - 1) - n];
+
+ return true;
+ }
+
+ /**
+ * Match this entry against the data supplied. This includes walking the
+ * pedigree from the specified node.
+ */
+ bool match (ped_type_t type, const __cilkrts_pedigree *node, int victim = -1)
+ {
+ int i = 0;
+
+ // If the type isn't what they're seeking, we don't have a match
+ if (type != m_type)
+ return false;
+
+ // If we're looking for a STEAL, then the victim must match
+ if ((type == ped_type_steal) && (victim != m_value))
+ return false;
+
+ // Compare the current pedigree against what was recorded
+ while ((NULL != node) && (i < m_pedigree_len))
+ {
+ // If we've got a pedigree rank difference, then we don't have
+ // a match
+ if (node->rank != m_reverse_pedigree[i])
+ return false;
+ node = node->parent;
+ i++;
+ }
+
+ // Make sure we exhausted both the pedigree chain and the recorded
+ // pedigree
+ return ((NULL == node) && (i == m_pedigree_len));
+ }
+
+ /**
+ * Advance to the next entry, skipping any ORPHANED records we didn't see
+ * a matching STEAL for
+ */
+ replay_entry_t *next_entry()
+ {
+ replay_entry_t *entry = this;
+
+ // You can't go beyond the end
+ if (ped_type_last == entry->m_type)
+ return entry;
+
+ // Advance to the next entry
+ entry++;
+
+ // Skip any ORPHANED records that don't have a matching steal. We
+ // initialized the value field to -1 for ORPHANED. After loading all
+ // the log data, we iterated through all the STEAL records setting the
+ // matching ORPHANED record's value field to 0. So if an ORPHANED
+ // record's value field is still -1, it doesn't have a matching STEAL
+ // record, and I don't know why we chose not to return from the
+ // spawned function.
+ while ((ped_type_orphaned == entry->m_type) && (-1 == entry->m_value))
+ {
+ entry++;
+ }
+
+ return entry;
+ }
+
+ /**
+ * Release any allocated resources
+ */
+ void unload()
+ {
+ __cilkrts_free(m_reverse_pedigree);
+ m_reverse_pedigree = NULL;
+ }
+
+} replay_entry_t;
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Walk the pedigree and generate a string representation with underscores
+ * between terms. Currently does a recursive walk to generate a forward
+ * pedigree.
+ *
+ * @param p The buffer that is to be filled. Assumed to be PEDIGREE_BUFF_SIZE
+ * characters long
+ * @param pnode The initial pedigree term to be written.
+ *
+ * @return A pointer into the pedigree string buffer after a term has been
+ * written.
+ */
+static
+char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode)
+{
+ CILK_ASSERT(pnode);
+ if (pnode->parent)
+ {
+ p = walk_pedigree_nodes(p, pnode->parent);
+ p += sprintf(p, "_");
+ }
+
+ return p + sprintf(p, "%" PRIu64, pnode->rank);
+}
+
+/**
+ * Write a record to a replay log file.
+ *
+ * @param w The worker we're writing the pedigree for.
+ * @param type The type of the pedigree record, as a string
+ * @param initial_node The initial pedigree node to be written, or NULL if
+ * there is no pedigree for this record type.
+ * @param i1 First integer value to be written to the record.
+ * @param i2 Second integer value to be written to the record. Only applies
+ * to STEAL records. Defaults to -1 (unused). The second value is always
+ * written to make parsing easier.
+ */
+static
+void write_to_replay_log (__cilkrts_worker *w, const char *type,
+ const __cilkrts_pedigree *initial_node,
+ int i1 = -1, int i2 = -1)
+{
+ char pedigree[PEDIGREE_BUFF_SIZE];
+
+ // If we don't have an initial pedigree node, just use "0" to fill the slot
+ if (NULL == initial_node)
+ strcpy(pedigree, "0");
+ else
+ walk_pedigree_nodes(pedigree, initial_node);
+
+#ifndef INCLUDE_SEQUENCE_NUMBER
+ // Simply write the record
+ fprintf(w->l->record_replay_fptr, "%s %s %d %d\n",
+ type, pedigree, i1, i2);
+#else
+ // Write the record with a sequence number. The sequence number should
+ // always be the last term, and ignored on read
+
+ static long volatile seq_num = 0;
+ long write_num;
+
+ // Atomic increment functions are compiler/OS-specific
+#ifdef _WIN32
+ write_num = _InterlockedIncrement(&seq_num);
+#else /* GCC */
+ write_num = __sync_add_and_fetch(&seq_num, 1);
+#endif // _WIN32
+
+ fprintf(w->l->record_replay_fptr, "%s %s %d %d %ld\n",
+ type, pedigree, i1, i2, write_num);
+#endif // INCLUDE_SEQUENCE_NUMBER
+
+ fflush(w->l->record_replay_fptr);
+}
+
+/**
+ * Record data for a successful steal.
+ *
+ * The pedigree for a STEAL record is the pedigree of the stolen frame.
+ *
+ * @note It's assumed that replay_record_steal() has already checked that we're
+ * recording a log and that the record/replay functionality has not been
+ * compiled out.
+ *
+ * @param w The worker stealing a frame.
+ * @param victim_id The ID of the worker which had it's frame stolen.
+ */
+void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id)
+{
+ // Follow the pedigree chain using worker's stack frame
+ CILK_ASSERT(w->l->next_frame_ff);
+ CILK_ASSERT(w->l->next_frame_ff->call_stack);
+
+ // Record steal: STEAL pedigree victim_id thief_id
+ write_to_replay_log (w, PED_TYPE_STR_STEAL,
+ &(w->l->next_frame_ff->call_stack->parent_pedigree),
+ victim_id);
+}
+
+/**
+ * Record data for the worker that continues from a sync
+ *
+ * The pedigree for a SYNC record is the pedigree at the sync.
+ *
+ * @note It's assumed that replay_record_sync() has already checked that we're
+ * recording a log and that the record/replay functionality has not been
+ * compiled out.
+ *
+ * @param w The worker continuing from a sync.
+ */
+void replay_record_sync_internal(__cilkrts_worker *w)
+{
+ // Record sync: SYNC pedigree last_worker_id
+ write_to_replay_log (w, PED_TYPE_STR_SYNC, &w->pedigree);
+}
+
+/**
+ * Record the pedigree of an attempt to return to a stolen parent
+ *
+ * The pedigree for an ORPHANED record is the pedigree of our parent
+ *
+ * @note It's assumed that replay_record_orphaned() has already checked that
+ * we're recording a log and that the record/replay functionality has not
+ * been compiled out.
+ *
+ * @param w The worker continuing noting that it has been orphaned.
+ */
+void replay_record_orphaned_internal(__cilkrts_worker *w)
+{
+ // Record steal: ORPHANED pedigree self
+ write_to_replay_log (w, PED_TYPE_STR_ORPHANED, w->pedigree.parent);
+}
+
+/**
+ * Attempt to match a SYNC record. We have a match when this worker was
+ * recorded returning from the current call to __cilkrts_sync() with the
+ * same pedigree and this was the worker that continued from the sync, since
+ * it was the last to sync.
+ *
+ * If we find a match, the caller is expected to stall it is the last worker
+ * to reach a sync so it will be the worker to continue from the sync.
+ *
+ * @note It's assumed that replay_match_sync_pedigree() has already returned
+ * if we're not replaying a log, or if record/replay functionality has
+ * been compiled out.
+ *
+ * @param w The worker we're checking to see if we've got a match
+ */
+int replay_match_sync_pedigree_internal(__cilkrts_worker *w)
+{
+ // Return true if we have a match
+ if (w->l->replay_list_entry->match(ped_type_sync, &w->pedigree))
+ return 1;
+ else
+ return 0;
+}
+
+/**
+ * Advance to the next log entry from a SYNC record. Consume the current
+ * SYNC record on this worker and advance to the next one.
+ *
+ * @note It's assumed that replay_advance_from_sync() has already returned if
+ * we're not replaying a log, or if record/replay functionality has been
+ * compiled out.
+ *
+ * @param w The worker whose replay log we're advancing.
+ */
+void replay_advance_from_sync_internal (__cilkrts_worker *w)
+{
+ // The current replay entry must be a SYNC
+ CILK_ASSERT(ped_type_sync == w->l->replay_list_entry->m_type);
+
+ // Advance to the next entry
+ w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
+}
+
+/**
+ * Called from random_steal() to override the ID of the randomly chosen victim
+ * worker which this worker will attempt to steal from. Returns the worker id
+ * of the next victim this worker was recorded stealing from, or -1 if the
+ * next record in the log is not a STEAL.
+ *
+ * @note This call does NOT attempt to match the pedigree. That will be done
+ * by replay_match_victim_pedigree() after random_steal() has locked the victim
+ * worker.
+ *
+ * @param w The __cilkrts_worker we're executing on. The worker's replay log
+ * is checked for a STEAL record. If we've got one, the stolen worker ID is
+ * returned.
+ *
+ * @return -1 if the next record is not a STEAL
+ * @return recorded stolen worker ID if we've got a matching STEAL record
+ */
+int replay_get_next_recorded_victim_internal(__cilkrts_worker *w)
+{
+ // If the next record isn't a STEAL, abort the attempt to steal work
+ if (ped_type_steal != w->l->replay_list_entry->m_type)
+ return -1;
+
+ // Return the victim's worker ID from the STEAL record. We'll check
+ // the pedigree after random_steal has locked the victim worker.
+ return w->l->replay_list_entry->m_value;
+}
+
+/**
+ * Called from random_steal() to determine if we have a STEAL record that
+ * matches the pedigree at the head of the victim worker. If we do have a
+ * match, the STEAL record is consumed.
+ *
+ * @note It's assumed that replay_match_victim_pedigree() has already returned if
+ * we're not replaying a log, or if record/replay functionality has been
+ * compiled out.
+ *
+ * @return 1 if we have a match
+ * @return 0 if the current replay record isn't a STEAL record, or the victim
+ * isn't correct, or the pedigree doesn't match.
+ */
+int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim)
+{
+ // If we don't have a match, return 0
+ if (! w->l->replay_list_entry->match(ped_type_steal,
+ &((*victim->head)->parent_pedigree),
+ victim->self))
+ return 0;
+
+ // Consume this entry
+ w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
+
+ // Return success
+ return 1;
+}
+
+/**
+ * If the frame we're about to return to was recorded as being stolen,
+ * stall until it is.
+ *
+ * @note It's assumed that replay_wait_for_steal_if_parent_was_stolen() has
+ * already returned if we're not replaying a log, or if record/replay
+ * functionality has been compiled out.
+ *
+ * @param w The worker we're executing on.
+ */
+void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w)
+{
+ // If our parent wasn't recorded orphanen, return now
+ if (! w->l->replay_list_entry->match (ped_type_orphaned,
+ w->pedigree.parent))
+ return;
+
+ // Stall until our parent is stolen. Note that we're comparing head
+ // and tail, not head and exc. The steal is not completed until tail
+ // is modified.
+ while (!((w->tail - 1) < w->head))
+ __cilkrts_sleep();
+
+ // Consume the entry
+ w->l->replay_list_entry = w->l->replay_list_entry->next_entry();
+}
+
+/**
+ * Allocate memory for the list of logged events.
+ *
+ * This function will read through the file and count the number of records
+ * so it can estimate how big a buffer to allocate for the array or replay
+ * entries. It will then rewind the file to the beginning so it can be
+ * loaded into memory.
+ *
+ * @param w The worker we're loading the file for.
+ * @param f The file of replay data we're scanning.
+ */
+static
+void allocate_replay_list(__cilkrts_worker *w, FILE *f)
+{
+ // Count the number of entries - yeah, it's a hack, but it lets me
+ // allocate the space all at once instead of in chunks
+ char buf[1024];
+ int entries = 1; // Include "LAST" node
+
+ while (! feof(f))
+ {
+ if (fgets(buf, 1024, f))
+ {
+ // Skip the Workers record - should only be in file for Worker 0
+ if (0 != strncmp(PED_TYPE_STR_WORKERS, buf, sizeof(PED_TYPE_STR_WORKERS)-1))
+ entries++;
+ }
+ }
+
+ w->l->replay_list_root =
+ (replay_entry_t *)__cilkrts_malloc(entries * sizeof(replay_entry_t));
+ w->l->replay_list_root[entries - 1].m_type = ped_type_last;
+
+ // Reset the file to the beginning
+ rewind(f);
+}
+
+/**
+ * Load the replay log for a worker into memory.
+ *
+ * @param w The worker we're loading the replay for.
+ */
+static
+void load_recorded_log(__cilkrts_worker *w)
+{
+ char ped_type[PED_TYPE_SIZE];
+ char ped_str[PEDIGREE_BUFF_SIZE];
+ int32_t i1 = -1, i2 = -1;
+ int fret;
+ char local_replay_file_name[512];
+ FILE *f;
+
+ // Open the log for reading
+ sprintf(local_replay_file_name, "%s%d.cilklog", w->g->record_replay_file_name, w->self);
+ f = fopen(local_replay_file_name, "r");
+
+ // Make sure we found a log!
+ CILK_ASSERT (NULL != f);
+
+ // Initialize the replay_list
+ allocate_replay_list(w, f);
+ replay_entry_t *entry = w->l->replay_list_root;
+
+ // Read the data out and add it to our tables
+ while (! feof(f))
+ {
+#ifndef INCLUDE_SEQUENCE_NUMBER
+ fret = fscanf(f, "%s %s %d %d\n", ped_type, ped_str, &i1, &i2);
+ if(EOF == fret)
+ break;
+
+ // We must have read 4 fields
+ CILK_ASSERT(4 == fret);
+#else
+ int32_t write_num;
+ fret = fscanf(f, "%s %s %d %d %d\n", ped_type, ped_str,
+ &i1, &i2, &write_num);
+ if(EOF == fret)
+ break;
+
+ // We must have read 5 fields
+ CILK_ASSERT(5 == fret);
+#endif // INCLUDE_SEQUENCE_NUMBER
+
+ // Load the data into the entry
+ if (0 == strcmp(ped_type, PED_TYPE_STR_WORKERS))
+ {
+ // Verify we're replaying with the same number of workers we recorded with
+ if (i1 != w->g->P)
+ {
+ // Fatal error - does not return
+ cilkos_error("Cannot continue replay: number of workers(%d) doesn't match "
+ "that from the recording(%d).\n", w->g->P, i1);
+ }
+
+ // Verify that we understand this version of the pedigree file
+ if (PED_VERSION != i2)
+ {
+ // Fatal error - does not return
+ cilkos_error("Pedigree file version %d doesn't match current "
+ "version %d - cannot continue.\n",
+ i2, PED_VERSION);
+ }
+ }
+ else
+ {
+ entry->load(ped_type, ped_str, i1, i2);
+ entry++;
+ }
+ }
+
+ // Make sure we've filled the allocated memory. We initialized the last
+ // entry in
+ CILK_ASSERT(ped_type_last == entry->m_type);
+ w->l->replay_list_entry = w->l->replay_list_root;
+
+ // Close the log and return
+ fclose(f);
+}
+
+/**
+ * Scan a recorded log to match STEALs againsted ORPHANED records.
+ *
+ * @param g Cilk Runtime global state. Passed to access the worker array so
+ * we can scan a worker's ORPHANED entries for one that matches a STEAL entry.
+ * @param entry The root of a replay_list for a worker.
+ */
+static
+void scan_for_matching_steals(global_state_t *g, replay_entry_t *entry)
+{
+ // Iterate over all of the entries
+ while (ped_type_last != entry->m_type)
+ {
+ // Look for STEALs. That will tell us which worker the frame was
+ // stolen from
+ if (ped_type_steal == entry->m_type)
+ {
+ bool found = false;
+
+ // Validate the worker ID and make sure we've got a list
+ CILK_ASSERT((entry->m_value >= 0) && (entry->m_value < g->total_workers));
+ replay_entry_t *victim_entry = g->workers[entry->m_value]->l->replay_list_root;
+ CILK_ASSERT(NULL != victim_entry);
+
+ // Scan the victim's list for the matching ORPHANED record
+ while ((ped_type_last != victim_entry->m_type) && ! found)
+ {
+ if (ped_type_orphaned == victim_entry->m_type)
+ {
+ if (entry->m_pedigree_len == victim_entry->m_pedigree_len)
+ {
+ if (0 == memcmp(entry->m_reverse_pedigree,
+ victim_entry->m_reverse_pedigree,
+ entry->m_pedigree_len * sizeof(int64_t)))
+ {
+ // Note that this ORPHANED record has a matching steal
+ victim_entry->m_value = 0;
+ found = true;
+ }
+ }
+ }
+ victim_entry++;
+ }
+ }
+ entry++;
+ }
+}
+
+
+/*
+ * Initialize per-worker data for record or replay - See record-replay.h
+ * for full routine header.
+ */
+void replay_init_workers(global_state_t *g)
+{
+ int i;
+ char worker_file_name[512];
+
+ // If we're not recording or replaying a log, we're done. All of the
+ // fields in the global_state_t or local_state_t are already initialized
+ // to default values.
+ if (RECORD_REPLAY_NONE == g->record_or_replay)
+ return;
+
+ // If we're replaying a log, read each worker's log and construct the
+ // in-memory log
+ if (REPLAY_LOG == g->record_or_replay)
+ {
+ // Read all of the data
+ for (i = 0; i < g->total_workers; ++i)
+ {
+ // This function will also initialize and fill the worker's
+ // replay list
+ load_recorded_log(g->workers[i]);
+ }
+
+ // Scan for orphans with no matching steal. Mark them so they'll be
+ // skipped as we advance through the log.
+ for (i = 0; i < g->total_workers; ++i)
+ {
+ scan_for_matching_steals(g, g->workers[i]->l->replay_list_root);
+ }
+
+ // If we're recording the logs while replaying, create the log files.
+ // This will only be used for debugging. Create the logs in the
+ // current directory. It should be as good a place as any...
+#if RECORD_ON_REPLAY
+ for(i = 0; i < g->total_workers; ++i)
+ {
+ __cilkrts_worker *w = g->workers[i];
+ sprintf(worker_file_name, "replay_log_%d.cilklog", w->self);
+ w->l->record_replay_fptr = fopen(worker_file_name, "w+");
+ CILK_ASSERT(NULL != w->l->record_replay_fptr);
+ }
+
+ // Record the number of workers, file version in Worker 0's file
+ write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
+#endif // RECORD_ON_REPLAY
+ }
+
+ // If we're recording, create the log files
+ if (RECORD_LOG == g->record_or_replay)
+ {
+ for(i = 0; i < g->total_workers; ++i)
+ {
+ __cilkrts_worker *w = g->workers[i];
+ sprintf(worker_file_name, "%s%d.cilklog",
+ g->record_replay_file_name,
+ w->self);
+ w->l->record_replay_fptr = fopen(worker_file_name, "w+");
+ CILK_ASSERT(NULL != w->l->record_replay_fptr);
+ }
+
+ // Record the number of workers, file version in Worker 0's file
+ write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION);
+ }
+}
+
+/*
+ * Do any necessary cleanup for the logs - See record-replay.h for full
+ * routine header.
+ */
+void replay_term(global_state_t *g)
+{
+ // Free memory for the record/replay log file name, if we've got one
+ if (g->record_replay_file_name)
+ __cilkrts_free(g->record_replay_file_name);
+
+ // Per-worker cleanup
+ for(int i = 0; i < g->total_workers; ++i)
+ {
+ __cilkrts_worker *w = g->workers[i];
+
+ // Close the log files, if we've opened them
+ if(w->l->record_replay_fptr)
+ fclose(w->l->record_replay_fptr);
+
+ if (w->l->replay_list_root)
+ {
+ // We should have consumed the entire list
+ CILK_ASSERT(ped_type_last == w->l->replay_list_entry->m_type);
+
+ replay_entry_t *entry = w->l->replay_list_root;
+ while (ped_type_last != entry->m_type)
+ {
+ // Free the pedigree memory for each entry
+ entry->unload();
+ entry++;
+ }
+ __cilkrts_free(w->l->replay_list_root);
+ w->l->replay_list_root = NULL;
+ w->l->replay_list_entry = NULL;
+ }
+ }
+}
+
+__CILKRTS_END_EXTERN_C
diff --git a/libcilkrts/runtime/record-replay.h b/libcilkrts/runtime/record-replay.h
new file mode 100644
index 00000000000..f65e667a8e1
--- /dev/null
+++ b/libcilkrts/runtime/record-replay.h
@@ -0,0 +1,427 @@
+/* record_replay.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ *
+ **************************************************************************/
+
+/**
+ * @file record-replay.h
+ *
+ * @brief record-replay.h and .cpp encapsulate most of the functionality to
+ * record and play back a Cilk Plus application.
+ *
+ * Recording is directed by the setting of the CILK_RECORD_LOG environment
+ * variable. If it's defined, the value specifies the root we'll use to
+ * generate files for each worker using the following format string:
+ * "%s%d.cilklog", where the integer is the value of w->self.
+ *
+ * Replay is directed by the setting of the CILK_REPLAY_LOG environment
+ * variable, interpreted the same way as CILK_RECORD_LOG. If both
+ * CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given
+ * and the attempt to record a log will be ignored.
+ *
+ * Recording is relatively straightforward. We write all information about a
+ * worker to a per-worker file.
+ *
+ * Each pedigree record consists of the following fields. All fields must be
+ * present in every record to make parsing easy.
+ * - Type - A string identifying the pedigree record. See the PED_TYPE_STR_
+ * macros for the currently defined values.
+ * - Pedigree - A string of pedigree values, with underscores between
+ * adjacent values.
+ * - i1 - Record type-specific value. -1 if not used.
+ * - i2 - Record type-specific value. -1 if not used.
+ *
+ * WORKERS record - only written to the file for worker 0. Note that this is
+ * the first worker in the workers array. Worker 0 is the first system worker,
+ * *NOT* a user worker.
+ * - Type: "Workers"
+ * - Pedigree: Always "0" - ignored
+ * - i1: Number of workers (g->P) when we recorded the log. A mismatch when
+ * we attempt to replay the log will result in aborting the execution.
+ * - i2: Log version number - Specified by PED_VERSION in record-replay.cpp
+ *
+ * STEAL record - written after a successful steal.
+ * - Type: "Steal"
+ * - Pedigree: Pedigree of stolen frame
+ * - i1: Worker the frame was stolen from
+ * - i2: -1
+ *
+ * SYNC record - written after a worker continues from a sync.
+ * - Type: "Sync"
+ * - Pedigree: Pedigree of sync. Note that this is the pedigree *before*
+ * the pedigree in incremented in setup_for_execution_pedigree().
+ * - i1: -1
+ * - i2: -1
+ *
+ * ORPHANED record - saved on a return to a stolen parent.
+ * - Type: "Orphaned"
+ * - Pedigree: Pedigree of the parent frame *before* the pedigree is
+ * incremented by the return
+ * - i1: -1
+ * - i2: -1
+ *
+ * On replay, the data is loaded into a per-worker array, and the data is
+ * consumed in order as needed.
+ */
+
+#ifndef INCLUDED_RECORD_REPLAY_DOT_H
+#define INCLUDED_RECORD_REPLAY_DOT_H
+
+#include "cilk/common.h"
+#include "global_state.h"
+
+/**
+ * Define CILK_RECORD_REPLAY to enable record/replay functionality. If
+ * CILK_RECORD_REPLAY is not defined, all of the record/replay functions in
+ * record-replay.h will be stubbed out. Since they're declared as inline,
+ * functions, the resulting build should have no performance impact due to
+ * the implementation or record/replay.
+ */
+ #define CILK_RECORD_REPLAY 1
+
+/**
+ * Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log. This
+ * should only be needed when debugging the replay functionality. This should
+ * always be defined as 0 when record-replay.h is checked in.
+ */
+#define RECORD_ON_REPLAY 0
+
+__CILKRTS_BEGIN_EXTERN_C
+
+#ifdef CILK_RECORD_REPLAY
+// Declarations of internal record/replay functions. The inlined versions
+// further down do some preliminary testing (like if we're not recording or
+// replaying) and will stub out the functionality if we've compiled out the
+// record/replay feature
+int replay_match_sync_pedigree_internal(__cilkrts_worker *w);
+void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w);
+void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id);
+void replay_record_sync_internal(__cilkrts_worker *w);
+void replay_record_orphaned_internal(__cilkrts_worker *w);
+int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim);
+void replay_advance_from_sync_internal (__cilkrts_worker *w);
+int replay_get_next_recorded_victim_internal(__cilkrts_worker *w);
+#endif // CILK_RECORD_REPLAY
+
+// Publically defined record/replay API
+
+/**
+ * If we're replaying a log, wait for our parent to be stolen if it was when
+ * the log was recorded. If record/replay is compiled out, this is a noop.
+ *
+ * @param w The __cilkrts_worker we're executing on. The worker's replay
+ * list will be checked for a ORPHANED record with a matching pedigree. If
+ * there is a match, the ORPHANED record will be consumed.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
+{
+ // Only check if we're replaying a log
+ if (REPLAY_LOG == w->g->record_or_replay)
+ replay_wait_for_steal_if_parent_was_stolen_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w)
+{
+ // If record/replay is disabled, we never wait
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Called from random_steal() to override the ID of the randomly chosen victim
+ * worker which this worker will attempt to steal from. Returns the worker id
+ * of the next victim this worker was recorded stealing from, or -1 if the
+ * next record in the log is not a STEAL.
+ *
+ * @note This call does NOT attempt to match the pedigree. That will be done
+ * by replay_match_victim_pedigree() after random_steal() has locked the victim
+ * worker.
+ *
+ * @param w The __cilkrts_worker we're executing on. The worker's replay log
+ * is checked for a STEAL record. If we've got one, the stolen worker ID is
+ * returned.
+ * @param id The randomly chosen victim worker ID. If we're not replaying a
+ * log, or if record/replay has been compiled out, this is the value that
+ * will be returned.
+ *
+ * @return id if we're not replaying a log
+ * @return -1 if the next record is not a STEAL
+ * @return recorded stolen worker ID if we've got a matching STEAL record
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
+{
+ // Only check if we're replaying a log
+ if (REPLAY_LOG == w->g->record_or_replay)
+ return replay_get_next_recorded_victim_internal(w);
+ else
+ return id;
+}
+#else
+__CILKRTS_INLINE
+int replay_get_next_recorded_victim(__cilkrts_worker *w, int id)
+{
+ // Record/replay is disabled. Always return the original worker id
+ return id;
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Initialize per-worker data for record/replay. A noop if record/replay
+ * is disabled, or if we're not recording or replaying anything.
+ *
+ * If we're recording a log, this will ready us to create the per-worker
+ * logs.
+ *
+ * If we're replaying a log, this will read the logs into the per-worker
+ * structures.
+ *
+ * @param g Cilk runtime global state
+ */
+void replay_init_workers(global_state_t *g);
+
+/**
+ * Record a record on a successful steal. A noop if record/replay is
+ * diabled, or if we're not recording anything
+ *
+ * @param w The __cilkrts_worker we're executing on. The pedigree of
+ * the stolen frame will be walked to generate the STEAL record.
+ *
+ * @param victim_id The worker ID of the worker w stole from.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
+{
+#if RECORD_ON_REPLAY
+ // If we're recording on replay, write the record if we're recording or
+ // replaying
+ if (RECORD_REPLAY_NONE == w->g->record_or_replay)
+ return;
+#else
+ // Only write the record if we're recording
+ if (RECORD_LOG != w->g->record_or_replay)
+ return;
+#endif
+
+ replay_record_steal_internal(w, victim_id);
+}
+#else
+__CILKRTS_INLINE
+void replay_record_steal(__cilkrts_worker *w, int32_t victim_id)
+{
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Record a record when continuing after a sync. A noop if record/replay is
+ * diabled, or if we're not recording anything, or if the sync was abandoned,
+ * meaning this isn't the worker that continues from the sync.
+ *
+ * @param w The __cilkrts_worker for we're executing on. The pedigree of
+ * the sync-ing frame will be walked to generate the SYNC record.
+ *
+ * @param continuing True if this worker will be continuing from the
+ * cilk_sync. A SYNC record will only be generated if continuing is true.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_record_sync(__cilkrts_worker *w, int continuing)
+{
+ // If this was not the last worker to the syn, return
+ if (! continuing)
+ return;
+
+#if RECORD_ON_REPLAY
+ // If we're recording on replay, write the record if we're recording or
+ // replaying
+ if (RECORD_REPLAY_NONE == w->g->record_or_replay)
+ return;
+#else
+ // Only write the record if we're recording
+ if (RECORD_LOG != w->g->record_or_replay)
+ return;
+#endif
+
+ replay_record_sync_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_record_sync(__cilkrts_worker *w, int abandoned)
+{
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Record a record on a return to a stolen parent. A noop if record/replay is
+ * diabled, or if we're not recording anything.
+ *
+ * @param w The __cilkrts_worker for we're executing on. The pedigree of the
+ * frame that has discovered that its parent has been stolken will be walked
+ * to generate the ORPHANED record.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_record_orphaned(__cilkrts_worker *w)
+{
+#if RECORD_ON_REPLAY
+ // If we're recording on replay, write the record if we're recording or
+ // replaying
+ if (RECORD_REPLAY_NONE == w->g->record_or_replay)
+ return;
+#else
+ // Only write the record if we're recording
+ if (RECORD_LOG != w->g->record_or_replay)
+ return;
+#endif
+
+ replay_record_orphaned_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_record_orphaned(__cilkrts_worker *w)
+{
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Test whether the frame at the head of the victim matches the pedigree of
+ * the frame that was recorded being stolen. Called in random steal to verify
+ * that we're about to steal the correct frame.
+ *
+ * @param w The __cilkrts_worker for we're executing on. The current worker
+ * is needed to find the replay entry to be checked.
+ *
+ * @param victim The __cilkrts_worker for we're proposing to steal a frame
+ * from. The victim's head entry is
+ * is needed to find the replay entry to be checked.
+ *
+ * @return 0 if we're replaying a log and the victim's pedigree does NOT match
+ * the next frame the worker is expected to steal.
+ *
+ * @return 1 in all other cases to indicate that the steal attempt should
+ * continue
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
+{
+ // We're not replaying a log. The victim is always acceptable
+ if (REPLAY_LOG != w->g->record_or_replay)
+ return 1;
+
+ // Return 1 if the victim's pedigree matches the frame the worker stole
+ // when we recorded the log
+ return replay_match_victim_pedigree_internal(w, victim);
+}
+#else
+__CILKRTS_INLINE
+int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim)
+{
+ // Record/replay is disabled. The victim is always acceptable
+ return 1;
+}
+#endif // CILK_RECORD_REPLAY
+
+/**
+ * Test whether the current replay entry is a sync record matching the
+ * worker's pedigree.
+ *
+ * @param w The __cilkrts_worker for we're executing on.
+ *
+ * @return 1 if the current replay entry matches the current pedigree.
+ * @return 0 if there's no match, or if we're not replaying a log.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+int replay_match_sync_pedigree(__cilkrts_worker *w)
+{
+ // If we're not replaying, assume no match
+ if (REPLAY_LOG != w->g->record_or_replay)
+ return 0;
+
+ return replay_match_sync_pedigree_internal(w);
+}
+#else
+__CILKRTS_INLINE
+int replay_match_sync_pedigree(__cilkrts_worker *w)
+{
+ // Record/replay is disabled. Assume no match
+ return 0;
+}
+#endif
+
+/**
+ * Marks a sync record seen, advancing to the next record in the replay list.
+ *
+ * This function will only advance to the next record if:
+ * - Record/replay hasn't been compiled out AND
+ * - We're replaying a log AND
+ * - A match was found AND
+ * - The sync is not being abandoned
+ *
+ * @param w The __cilkrts_worker for we're executing on.
+ * @param match_found The value returned by replay_match_sync_pedigree(). If
+ * match_found is false, nothing is done.
+ * @param continuing Flag indicating whether this worker will continue from
+ * the sync (it's the last worker to the sync) or if it will abandon the work
+ * and go to the scheduling loop to look for more work it can steal.
+ */
+#ifdef CILK_RECORD_REPLAY
+__CILKRTS_INLINE
+void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
+{
+ // If we're replaying a log, and the current sync wasn't abandoned, and we
+ // found a match in the log, mark the sync record seen.
+ if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing)
+ replay_advance_from_sync_internal(w);
+}
+#else
+__CILKRTS_INLINE
+void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing)
+{
+}
+#endif
+
+/**
+ * Release any resources used to read or write a replay log.
+ *
+ * @param g Cilk runtime global state
+ */
+void replay_term(global_state_t *g);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H)
diff --git a/libcilkrts/runtime/reducer_impl.cpp b/libcilkrts/runtime/reducer_impl.cpp
index be749c5072c..ec5a1e4037c 100644
--- a/libcilkrts/runtime/reducer_impl.cpp
+++ b/libcilkrts/runtime/reducer_impl.cpp
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
* Patents Pending, Intel Corporation.
**************************************************************************/
@@ -41,6 +46,7 @@
#include "reducer_impl.h"
#include "scheduler.h"
#include "bug.h"
+#include "os.h"
#include "global_state.h"
#include "frame_malloc.h"
@@ -71,6 +77,11 @@ static inline void verify_current_wkr(__cilkrts_worker *w)
#endif
}
+// Suppress clang warning that the expression result is unused
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wunused-value"
+#endif // __clang__
/// Helper class to disable and re-enable Cilkscreen
struct DisableCilkscreen
@@ -86,13 +97,23 @@ struct EnableCilkscreen
~EnableCilkscreen () { __cilkscreen_disable_checking(); }
};
-/** Element for a hyperobject */
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic pop
+#endif // __clang__
+
+/**
+ * @brief Element for a hyperobject
+ */
struct elem {
- void *key; // Shared key for this hyperobject
- __cilkrts_hyperobject_base *hb; // Base of the hyperobject.
- void *val; // Strand-private view of this hyperobject
- /// Destructor for an instance of this hyperobject
+ void *key; ///< Shared key for this hyperobject
+ __cilkrts_hyperobject_base *hb; ///< Base of the hyperobject.
+ void *view; ///< Strand-private view of this hyperobject
+ /// Destroy and deallocate the view object for this element and set view to
+ /// null.
void destroy();
+
+ /// Returns true if this element contains a leftmost view.
+ bool is_leftmost() const;
};
/** Bucket containing at most NMAX elements */
@@ -132,36 +153,40 @@ struct cilkred_map {
/** Set true for leftmost reducer map */
bool is_leftmost;
- /* Return element mapped to 'key' or null if not found. */
+ /** @brief Return element mapped to 'key' or null if not found. */
elem *lookup(void *key);
- /* Insert key/value element into hash map without rehashing. Does not
- * check for duplicate key. */
+ /**
+ * @brief Insert key/value element into hash map without rehashing.
+ * Does not check for duplicate key.
+ */
elem *insert_no_rehash(__cilkrts_worker *w,
void *key,
__cilkrts_hyperobject_base *hb,
void *value);
- /* Insert key/value element into hash map, rehashing if necessary. Does not
- * check for duplicate key. */
+ /**
+ * @brief Insert key/value element into hash map, rehashing if necessary.
+ * Does not check for duplicate key.
+ */
inline elem *rehash_and_insert(__cilkrts_worker *w,
void *key,
__cilkrts_hyperobject_base *hb,
void *value);
- /** Grow bucket by one element, reallocating bucket if necessary */
+ /** @brief Grow bucket by one element, reallocating bucket if necessary */
static elem *grow(__cilkrts_worker *w, bucket **bp);
- /** Rehash a worker's reducer map */
+ /** @brief Rehash a worker's reducer map */
void rehash(__cilkrts_worker *);
/**
- * Returns true if a rehash is needed due to the number of elements that
+ * @brief Returns true if a rehash is needed due to the number of elements that
* have been inserted.
*/
inline bool need_rehash_p() const;
- /** Allocate and initialize the buckets */
+ /** @brief Allocate and initialize the buckets */
void make_buckets(__cilkrts_worker *w, size_t nbuckets);
/**
@@ -176,17 +201,17 @@ struct cilkred_map {
};
/**
- * Merge another reducer map into this one, destroying the other map in
+ * @brief Merge another reducer map into this one, destroying the other map in
* the process.
*/
__cilkrts_worker* merge(__cilkrts_worker *current_wkr,
cilkred_map *other_map,
enum merge_kind kind);
- /** check consistency of a reducer map */
- void check(bool allow_null_val);
+ /** @brief check consistency of a reducer map */
+ void check(bool allow_null_view);
- /** Test whether the cilkred_map is empty */
+ /** @brief Test whether the cilkred_map is empty */
bool is_empty() { return nelem == 0; }
};
@@ -313,24 +338,25 @@ static inline size_t hashfun(const cilkred_map *h, void *key)
return k & (h->nbuckets - 1);
}
-// Given a __cilkrts_hyperobject_base, return a pointer to the leftmost view
-// object.
-static inline void* get_leftmost_view(__cilkrts_hyperobject_base *hb)
+// Given a __cilkrts_hyperobject_base, return the key to that hyperobject in
+// the reducer map.
+static inline void* get_hyperobject_key(__cilkrts_hyperobject_base *hb)
{
+ // The current implementation uses the address of the lefmost view as the
+ // key.
return reinterpret_cast<char*>(hb) + hb->__view_offset;
}
// Given a hyperobject key, return a pointer to the leftmost object. In the
// current implementation, the address of the leftmost object IS the key, so
-// this function is an effective noop. The key is passed by reference so that
-// conversion of arbitrary pointers to 'void*' are supressed.
-static inline void* get_leftmost_view(void *&key)
+// this function is an effective noop.
+static inline void* get_leftmost_view(void *key)
{
return key;
}
/* debugging support: check consistency of a reducer map */
-void cilkred_map::check(bool allow_null_val)
+void cilkred_map::check(bool allow_null_view)
{
size_t count = 0;
@@ -339,7 +365,7 @@ void cilkred_map::check(bool allow_null_val)
bucket *b = buckets[i];
if (b)
for (elem *el = b->el; el->key; ++el) {
- CILK_ASSERT(allow_null_val || el->val);
+ CILK_ASSERT(allow_null_view || el->view);
++count;
}
}
@@ -391,7 +417,7 @@ elem *cilkred_map::grow(__cilkrts_worker *w,
elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w,
void *key,
__cilkrts_hyperobject_base *hb,
- void *val)
+ void *view)
{
#if REDPAR_DEBUG >= 2
@@ -402,18 +428,18 @@ elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w,
CILK_ASSERT((w == 0 && g == 0) || w->g == g);
CILK_ASSERT(key != 0);
- CILK_ASSERT(val != 0);
+ CILK_ASSERT(view != 0);
elem *el = grow(w, &(buckets[hashfun(this, key)]));
#if REDPAR_DEBUG >= 3
- fprintf(stderr, "[W=%d, this=%p, inserting key=%p, val=%p, el = %p]\n",
- w->self, this, key, val, el);
+ fprintf(stderr, "[W=%d, this=%p, inserting key=%p, view=%p, el = %p]\n",
+ w->self, this, key, view, el);
#endif
el->key = key;
el->hb = hb;
- el->val = val;
+ el->view = view;
++nelem;
return el;
@@ -441,7 +467,7 @@ void cilkred_map::rehash(__cilkrts_worker *w)
if (b) {
elem *oel;
for (oel = b->el; oel->key; ++oel)
- insert_no_rehash(w, oel->key, oel->hb, oel->val);
+ insert_no_rehash(w, oel->key, oel->hb, oel->view);
}
}
@@ -453,19 +479,19 @@ void cilkred_map::rehash(__cilkrts_worker *w)
elem *cilkred_map::rehash_and_insert(__cilkrts_worker *w,
void *key,
__cilkrts_hyperobject_base *hb,
- void *val)
+ void *view)
{
#if REDPAR_DEBUG >= 1
- fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, val=%p\n",
- w->self, this, key, val);
+ fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, view=%p\n",
+ w->self, this, key, view);
verify_current_wkr(w);
#endif
if (need_rehash_p())
rehash(w);
- return insert_no_rehash(w, key, hb, val);
+ return insert_no_rehash(w, key, hb, view);
}
@@ -477,7 +503,7 @@ elem *cilkred_map::lookup(void *key)
elem *el;
for (el = b->el; el->key; ++el) {
if (el->key == key) {
- CILK_ASSERT(el->val);
+ CILK_ASSERT(el->view);
return el;
}
}
@@ -488,17 +514,27 @@ elem *cilkred_map::lookup(void *key)
void elem::destroy()
{
- // Call destroy_fn and deallocate_fn on all but the leftmost value
- if (val != key)
- {
- cilk_c_monoid *monoid = &(hb->__c_monoid);
+ if (! is_leftmost()) {
+
+ // Call destroy_fn and deallocate_fn on the view, but not if it's the
+ // leftmost view.
+ cilk_c_monoid *monoid = &(hb->__c_monoid);
cilk_c_reducer_destroy_fn_t destroy_fn = monoid->destroy_fn;
cilk_c_reducer_deallocate_fn_t deallocate_fn = monoid->deallocate_fn;
- destroy_fn((void*)hb, val);
- deallocate_fn((void*)hb, val);
+ destroy_fn((void*)hb, view);
+ deallocate_fn((void*)hb, view);
}
- val = 0;
+
+ view = 0;
+}
+
+inline
+bool elem::is_leftmost() const
+{
+ // implementation uses the address of the leftmost view as the key, so if
+ // key == view, then this element refers to the leftmost view.
+ return key == view;
}
/* remove the reducer from the current reducer map. If the reducer
@@ -521,35 +557,42 @@ CILK_EXPORT void __CILKRTS_STRAND_STALE(
return;
}
+const char *UNSYNCED_REDUCER_MSG =
+ "Destroying a reducer while it is visible to unsynced child tasks, or\n"
+ "calling CILK_C_UNREGISTER_REDUCER() on an unregistered reducer.\n"
+ "Did you forget a _Cilk_sync or CILK_C_REGISTER_REDUCER()?";
+
cilkred_map* h = w->reducer_map;
- CILK_ASSERT(h);
+ if (NULL == h)
+ cilkos_error(UNSYNCED_REDUCER_MSG); // Does not return
if (h->merging) {
verify_current_wkr(w);
__cilkrts_bug("User error: hyperobject used by another hyperobject");
}
- void* key = get_leftmost_view(hb);
+ void* key = get_hyperobject_key(hb);
elem *el = h->lookup(key);
- if (el) {
- /* found. */
+
+ // Verify that the reducer is being destroyed from the leftmost strand for
+ // which the reducer is defined.
+ if (! (el && el->is_leftmost()))
+ cilkos_error(UNSYNCED_REDUCER_MSG);
#if REDPAR_DEBUG >= 3
- fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n",
- w->self, key, h, el);
+ fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n",
+ w->self, key, h, el);
#endif
- /* Destroy view and remove element from bucket. */
- el->destroy();
-
- /* Shift all subsequent elements. Do not bother
- shrinking the bucket */
- do {
- el[0] = el[1];
- ++el;
- } while (el->key);
- --h->nelem;
- }
+ // Remove the element from the hash bucket. Do not bother shrinking
+ // the bucket. Note that the destroy() function does not actually
+ // call the destructor for the leftmost view.
+ el->destroy();
+ do {
+ el[0] = el[1];
+ ++el;
+ } while (el->key);
+ --h->nelem;
#if REDPAR_DEBUG >= 2
fprintf(stderr, "[W=%d, desc=hyper_destroy_finish, key=%p, w->reducer_map=%p]\n",
@@ -577,29 +620,30 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb)
// will prevent Cilkscreen from reporting apparent races in reducers
DisableCilkscreen x;
- void* val = get_leftmost_view(hb);
+ void* key = get_hyperobject_key(hb);
+ void* view = get_leftmost_view(key);
cilkred_map *h = w->reducer_map;
if (__builtin_expect(!h, 0)) {
h = install_new_reducer_map(w);
#if REDPAR_DEBUG >= 2
- fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, val=%p]\n",
- w->self, hb, h, val);
+ fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, view=%p]\n",
+ w->self, hb, h, view);
#endif
}
/* Must not exist. */
- CILK_ASSERT(h->lookup(val) == NULL);
+ CILK_ASSERT(h->lookup(key) == NULL);
#if REDPAR_DEBUG >= 3
verify_current_wkr(w);
- fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of val %p, should be null]\n",
- w->self, hb, h, val);
- fprintf(stderr, "W=%d, h=%p, inserting key %p, val%p\n",
+ fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of view %p, should be null]\n",
+ w->self, hb, h, view);
+ fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
w->self,
h,
&(hb->__c_monoid),
- val);
+ view);
#endif
if (h->merging)
@@ -607,7 +651,7 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb)
CILK_ASSERT(w->reducer_map == h);
// The address of the leftmost value is the same as the key for lookup.
- (void) h->rehash_and_insert(w, val, hb, val);
+ (void) h->rehash_and_insert(w, view, hb, view);
}
extern "C"
@@ -615,7 +659,7 @@ CILK_EXPORT void* __CILKRTS_STRAND_PURE(
__cilkrts_hyper_lookup(__cilkrts_hyperobject_base *hb))
{
__cilkrts_worker* w = __cilkrts_get_tls_worker_fast();
- void* key = get_leftmost_view(hb);
+ void* key = get_hyperobject_key(hb);
if (! w)
return get_leftmost_view(key);
@@ -658,7 +702,7 @@ CILK_EXPORT void* __CILKRTS_STRAND_PURE(
}
#if REDPAR_DEBUG >= 3
- fprintf(stderr, "W=%d, h=%p, inserting key %p, val%p\n",
+ fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n",
w->self,
h,
&(hb->__c_monoid),
@@ -668,7 +712,7 @@ CILK_EXPORT void* __CILKRTS_STRAND_PURE(
el = h->rehash_and_insert(w, key, hb, rep);
}
- return el->val;
+ return el->view;
}
extern "C" CILK_EXPORT
@@ -718,10 +762,10 @@ void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h)
CILK_ASSERT((w == 0 && h->g == 0) || w->g == h->g);
verify_current_wkr(w);
- /* the reducer map is allowed to contain el->val == NULL here (and
- only here). We set el->val == NULL only when we know that the
+ /* the reducer map is allowed to contain el->view == NULL here (and
+ only here). We set el->view == NULL only when we know that the
map will be destroyed immediately afterwards. */
- DBG h->check(/*allow_null_val=*/true);
+ DBG h->check(/*allow_null_view=*/true);
bucket *b;
size_t i;
@@ -731,7 +775,7 @@ void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h)
if (b) {
elem *el;
for (el = b->el; el->key; ++el) {
- if (el->val)
+ if (el->view)
el->destroy();
}
}
@@ -783,8 +827,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
bool merge_to_leftmost = (this->is_leftmost
/* && !other_map->is_leftmost */);
- DBG check(/*allow_null_val=*/false);
- DBG other_map->check(/*allow_null_val=*/false);
+ DBG check(/*allow_null_view=*/false);
+ DBG other_map->check(/*allow_null_view=*/false);
for (size_t i = 0; i < other_map->nbuckets; ++i) {
bucket *b = other_map->buckets[i];
@@ -792,8 +836,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
for (elem *other_el = b->el; other_el->key; ++other_el) {
/* Steal the value from the other map, which will be
destroyed at the end of this operation. */
- void *other_val = other_el->val;
- CILK_ASSERT(other_val);
+ void *other_view = other_el->view;
+ CILK_ASSERT(other_view);
void *key = other_el->key;
__cilkrts_hyperobject_base *hb = other_el->hb;
@@ -802,7 +846,7 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
if (this_el == 0 && merge_to_leftmost) {
/* Initialize leftmost view before merging. */
void* leftmost = get_leftmost_view(key);
- // leftmost == other_val can be true if the initial view
+ // leftmost == other_view can be true if the initial view
// was created in other than the leftmost strand of the
// spawn tree, but then made visible to subsequent strands
// (E.g., the reducer was allocated on the heap and the
@@ -811,17 +855,17 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
// strands will always result in 'this_el' being null,
// thus propagating the initial view up the spawn tree
// until it reaches the leftmost strand. When synching
- // with the leftmost strand, leftmost == other_val will be
+ // with the leftmost strand, leftmost == other_view will be
// true and we must avoid reducing the initial view with
// itself.
- if (leftmost != other_val)
+ if (leftmost != other_view)
this_el = rehash_and_insert(w, key, hb, leftmost);
}
if (this_el == 0) {
/* move object from other map into this one */
- rehash_and_insert(w, key, hb, other_val);
- other_el->val = 0;
+ rehash_and_insert(w, key, hb, other_view);
+ other_el->view = 0;
continue; /* No element-level merge necessary */
}
@@ -835,8 +879,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
case MERGE_INTO_RIGHT:
/* Swap elements in order to preserve object
identity */
- other_el->val = this_el->val;
- this_el->val = other_val;
+ other_el->view = this_el->view;
+ this_el->view = other_view;
/* FALL THROUGH */
case MERGE_INTO_LEFT: {
/* Stealing should be disabled during reduce
@@ -854,8 +898,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w,
/* TBD: if reduce throws an exception we need to stop it
here. */
hb->__c_monoid.reduce_fn((void*)hb,
- this_el->val,
- other_el->val);
+ this_el->view,
+ other_el->view);
w = current_sf->worker;
#if REDPAR_DEBUG >= 2
diff --git a/libcilkrts/runtime/reducer_impl.h b/libcilkrts/runtime/reducer_impl.h
index f088b969293..8e51da0dd0b 100644
--- a/libcilkrts/runtime/reducer_impl.h
+++ b/libcilkrts/runtime/reducer_impl.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -41,8 +46,6 @@
__CILKRTS_BEGIN_EXTERN_C
-typedef struct cilkred_map cilkred_map;
-
/**
* Construct an empty reducer map from the memory pool associated with the
* given worker. This reducer map must be destroyed before the worker's
diff --git a/libcilkrts/runtime/rts-common.h b/libcilkrts/runtime/rts-common.h
index 837c22546ab..f8e33d6c1fd 100644
--- a/libcilkrts/runtime/rts-common.h
+++ b/libcilkrts/runtime/rts-common.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#ifndef INCLUDED_RTS_COMMON_DOT_H
@@ -89,9 +94,27 @@
# define inline __inline
#endif
-/* Compilers that build the Cilk runtime are assumed to know about
- zero-cost intrinsics. For those that don't, comment out the
- following definition: */
-#define ENABLE_NOTIFY_ZC_INTRINSIC
+/* Compilers that build the Cilk runtime are assumed to know about zero-cost
+ * intrinsics (__notify_intrinsic()). For those that don't, #undef the
+ * following definition:
+ */
+#define ENABLE_NOTIFY_ZC_INTRINSIC 1
+
+#if defined(__INTEL_COMPILER)
+/* The notify intrinsic was introduced in ICC 12.0. */
+# if __INTEL_COMPILER <= 1200
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+# endif
+#elif defined(__VXWORKS__)
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+#elif defined(__clang__)
+# if !defined(__has_extension) || !__has_extension(notify_zc_intrinsic)
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+# endif
+#elif defined(__arm__)
+// __notify_zc_intrinsic not yet supported by gcc for ARM
+# undef ENABLE_NOTIFY_ZC_INTRINSIC
+#endif
+
#endif // ! defined(INCLUDED_RTS_COMMON_DOT_H)
diff --git a/libcilkrts/runtime/scheduler.c b/libcilkrts/runtime/scheduler.c
index 0a19aea1933..54bec2cf9f5 100644
--- a/libcilkrts/runtime/scheduler.c
+++ b/libcilkrts/runtime/scheduler.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2007-2012
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2007-2012
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
@@ -38,12 +43,13 @@
#include "local_state.h"
#include "signal_node.h"
#include "full_frame.h"
-#include "stacks.h"
#include "sysdep.h"
#include "except.h"
#include "cilk_malloc.h"
#include "pedigrees.h"
+#include "record-replay.h"
+#include <limits.h>
#include <string.h> /* memcpy */
#include <stdio.h> // sprintf
#include <stdlib.h> // malloc, free, abort
@@ -51,6 +57,7 @@
#ifdef _WIN32
# pragma warning(disable:1786) // disable warning: sprintf is deprecated
# include "sysdep-win.h"
+# include "except-win32.h"
#endif // _WIN32
// ICL: Don't complain about conversion from pointer to same-sized integral
@@ -66,6 +73,7 @@
#include "cilk-tbb-interop.h"
#include "cilk-ittnotify.h"
#include "stats.h"
+
// ICL: Don't complain about loss of precision in myrand
// I tried restoring the warning after the function, but it didn't
// suppress it
@@ -77,6 +85,12 @@
# include <unistd.h>
#endif
+#ifdef __VXWORKS__
+// redeclare longjmp() with noreturn to stop warnings
+extern __attribute__((noreturn))
+ void longjmp(jmp_buf, int);
+#endif
+
//#define DEBUG_LOCKS 1
#ifdef DEBUG_LOCKS
// The currently executing worker must own this worker's lock
@@ -94,12 +108,22 @@ enum schedule_t { SCHEDULE_RUN,
SCHEDULE_WAIT,
SCHEDULE_EXIT };
+// Return values for provably_good_steal()
+enum provably_good_steal_t
+{
+ ABANDON_EXECUTION, // Not the last child to the sync - attempt to steal work
+ CONTINUE_EXECUTION, // Last child to the sync - continue executing on this worker
+ WAIT_FOR_CONTINUE // The replay log indicates that this was the worker
+ // which continued. Loop until we are the last worker
+ // to the sync.
+};
+
// Verify that "w" is the worker we are currently executing on.
// Because this check is expensive, this method is usually a no-op.
static inline void verify_current_wkr(__cilkrts_worker *w)
{
-#if REDPAR_DEBUG >= 3
+#if ((REDPAR_DEBUG >= 3) || (FIBER_DEBUG >= 1))
// Lookup the worker from TLS and compare to w.
__cilkrts_worker* tmp = __cilkrts_get_tls_worker();
if (w != tmp) {
@@ -113,7 +137,7 @@ static inline void verify_current_wkr(__cilkrts_worker *w)
static enum schedule_t worker_runnable(__cilkrts_worker *w);
-// Scheduling-stack functions:
+// Scheduling-fiber functions:
static void do_return_from_spawn (__cilkrts_worker *w,
full_frame *ff,
__cilkrts_stack_frame *sf);
@@ -121,7 +145,8 @@ static void do_sync (__cilkrts_worker *w,
full_frame *ff,
__cilkrts_stack_frame *sf);
-#ifndef _WIN32
+// max is defined on Windows and VxWorks
+#if (! defined(_WIN32)) && (! defined(__VXWORKS__))
// TBD: definition of max() for Linux.
# define max(a, b) ((a) < (b) ? (b) : (a))
#endif
@@ -130,8 +155,15 @@ void __cilkrts_dump_stats_to_stderr(global_state_t *g)
{
#ifdef CILK_PROFILE
int i;
- for (i = 0; i < g->total_workers; ++i)
- __cilkrts_accum_stats(&g->stats, &g->workers[i]->l->stats);
+ for (i = 0; i < g->total_workers; ++i) {
+ // Print out statistics for each worker. We collected them,
+ // so why not print them out?
+ fprintf(stderr, "Stats for worker %d\n", i);
+ dump_stats_to_file(stderr, g->workers[i]->l->stats);
+ __cilkrts_accum_stats(&g->stats, g->workers[i]->l->stats);
+ }
+
+ // Also print out aggregate statistics.
dump_stats_to_file(stderr, &g->stats);
#endif
fprintf(stderr,
@@ -196,6 +228,12 @@ static int decjoin(full_frame *ff)
return (--ff->join_counter);
}
+static int simulate_decjoin(full_frame *ff)
+{
+ CILK_ASSERT(ff->join_counter > 0);
+ return (ff->join_counter - 1);
+}
+
/*
* Pseudo-random generator defined by the congruence S' = 69070 * S
* mod (2^32 - 5). Marsaglia (CACM July 1993) says on page 107 that
@@ -338,7 +376,7 @@ static void make_runnable(__cilkrts_worker *w, full_frame *ff)
static void make_unrunnable(__cilkrts_worker *w,
full_frame *ff,
__cilkrts_stack_frame *sf,
- int state_valid,
+ int is_loot,
const char *why)
{
/* CALL_STACK becomes valid again */
@@ -352,12 +390,12 @@ static void make_unrunnable(__cilkrts_worker *w,
sf->flags |= CILK_FRAME_STOLEN | CILK_FRAME_SUSPENDED;
sf->worker = 0;
- if (state_valid)
+ if (is_loot)
__cilkrts_put_stack(ff, sf);
/* perform any system-dependent action, such as saving the
state of the stack */
- __cilkrts_make_unrunnable_sysdep(w, ff, sf, state_valid, why);
+ __cilkrts_make_unrunnable_sysdep(w, ff, sf, is_loot, why);
}
}
@@ -435,34 +473,38 @@ static void unset_sync_master(__cilkrts_worker *w, full_frame *ff)
w->l->last_full_frame = NULL;
}
-/*************************************************************
- THE protocol:
-*************************************************************/
+/********************************************************************
+ * THE protocol:
+ ********************************************************************/
/*
- This is a protocol for work stealing that minimize the
- overhead on the victim.
-
- The protocol uses three shared pointes into the victim's deque: T
- (the ``tail''), H (the ``head'') and E (the ``exception''),
- with H <= E, H <= T. (NB: "exception," in this case has nothing to do with
- C++ throw-catch exceptions -- it refers only to a non-normal return, i.e., a
- steal or similar scheduling exception.)
-
- Stack frames P, where H <= E < T, are available for stealing.
-
- The victim operates on the T end of the stack. The frame being
- worked on by the victim is not on the stack. To push, the victim
- stores *T++=frame. To pop, it obtains frame=*--T.
-
- After decrementing T, the condition E > T signals to the victim that
- it should invoke the runtime system ``THE'' exception handler. The
- pointer E can become INFINITY, in which case the victim must invoke
- the THE exception handler as soon as possible.
-
- See "The implementation of the Cilk-5 multithreaded language", PLDI 1998,
- http://portal.acm.org/citation.cfm?doid=277652.277725, for more information
- on the THE protocol.
-*/
+ * This is a protocol for work stealing that minimizes the overhead on
+ * the victim.
+ *
+ * The protocol uses three shared pointers into the worker's deque:
+ * - T - the "tail"
+ * - H - the "head"
+ * - E - the "exception" NB: In this case, "exception" has nothing to do
+ * with C++ throw-catch exceptions -- it refers only to a non-normal return,
+ * i.e., a steal or similar scheduling exception.
+ *
+ * with H <= E, H <= T.
+ *
+ * Stack frames SF, where H <= E < T, are available for stealing.
+ *
+ * The worker operates on the T end of the stack. The frame being
+ * worked on is not on the stack. To make a continuation available for
+ * stealing the worker pushes a from onto the stack: stores *T++ = SF.
+ * To return, it pops the frame off the stack: obtains SF = *--T.
+ *
+ * After decrementing T, the condition E > T signals to the victim that
+ * it should invoke the runtime system's "THE" exception handler. The
+ * pointer E can become INFINITY, in which case the victim must invoke
+ * the THE exception handler as soon as possible.
+ *
+ * See "The implementation of the Cilk-5 multithreaded language", PLDI 1998,
+ * http://portal.acm.org/citation.cfm?doid=277652.277725, for more information
+ * on the THE protocol.
+ */
/* the infinity value of E */
#define EXC_INFINITY ((__cilkrts_stack_frame **) (-1))
@@ -553,11 +595,12 @@ static int dekker_protocol(__cilkrts_worker *victim)
}
}
+
/* Link PARENT and CHILD in the spawn tree */
static full_frame *make_child(__cilkrts_worker *w,
full_frame *parent_ff,
__cilkrts_stack_frame *child_sf,
- __cilkrts_stack *sd)
+ cilk_fiber *fiber)
{
full_frame *child_ff = __cilkrts_make_full_frame(w, child_sf);
@@ -570,15 +613,14 @@ static full_frame *make_child(__cilkrts_worker *w,
// w->self, child, parent, child_sf,
// parent->parent, parent->left_sibling, parent->right_sibling, parent->rightmost_child,
// child->parent, child->left_sibling, child->right_sibling, child->rightmost_child);
-
CILK_ASSERT(parent_ff->call_stack);
- child_ff->is_call_child = (sd == NULL);
+ child_ff->is_call_child = (fiber == NULL);
- /* PLACEHOLDER_STACK is used as non-null marker indicating that
+ /* PLACEHOLDER_FIBER is used as non-null marker indicating that
child should be treated as a spawn child even though we have not
- yet assigned a real stack to its parent. */
- if (sd == PLACEHOLDER_STACK)
- sd = NULL; /* Parent actually gets a null stack, for now */
+ yet assigned a real fiber to its parent. */
+ if (fiber == PLACEHOLDER_FIBER)
+ fiber = NULL; /* Parent actually gets a null fiber, for now */
/* perform any system-dependent actions, such as capturing
parameter passing information */
@@ -586,19 +628,15 @@ static full_frame *make_child(__cilkrts_worker *w,
/* Child gets reducer map and stack of parent.
Parent gets a new map and new stack. */
- child_ff->stack_self = parent_ff->stack_self;
+ child_ff->fiber_self = parent_ff->fiber_self;
child_ff->sync_master = NULL;
if (child_ff->is_call_child) {
/* Cause segfault on any attempted access. The parent gets
the child map and stack when the child completes. */
- parent_ff->stack_self = 0;
+ parent_ff->fiber_self = 0;
} else {
- parent_ff->stack_self = sd;
- __cilkrts_bind_stack(parent_ff,
- __cilkrts_stack_to_pointer(parent_ff->stack_self, child_sf),
- child_ff->stack_self,
- child_ff->sync_master);
+ parent_ff->fiber_self = fiber;
}
incjoin(parent_ff);
@@ -677,7 +715,7 @@ static full_frame *unroll_call_stack(__cilkrts_worker *w,
CHILD frame in its place */
static void detach_for_steal(__cilkrts_worker *w,
__cilkrts_worker *victim,
- __cilkrts_stack *sd)
+ cilk_fiber* fiber)
{
/* ASSERT: we own victim->lock */
@@ -741,7 +779,7 @@ static void detach_for_steal(__cilkrts_worker *w,
__cilkrts_push_next_frame(w, loot_ff);
// After this "push_next_frame" call, w now owns loot_ff.
- child_ff = make_child(w, loot_ff, 0, sd);
+ child_ff = make_child(w, loot_ff, 0, fiber);
BEGIN_WITH_FRAME_LOCK(w, child_ff) {
/* install child in the victim's work queue, taking
@@ -764,12 +802,90 @@ static void detach_for_steal(__cilkrts_worker *w,
} END_WITH_FRAME_LOCK(w, parent_ff);
}
+/**
+ * @brief cilk_fiber_proc that resumes user code after a successful
+ * random steal.
+
+ * This function longjmps back into the user code whose state is
+ * stored in cilk_fiber_get_data(fiber)->resume_sf. The stack pointer
+ * is adjusted so that the code resumes on the specified fiber stack
+ * instead of its original stack.
+ *
+ * This method gets executed only on a fiber freshly allocated from a
+ * pool.
+ *
+ * @param fiber The fiber being used to resume user code.
+ * @param arg Unused.
+ */
+static
+void fiber_proc_to_resume_user_code_for_random_steal(cilk_fiber *fiber)
+{
+ cilk_fiber_data *data = cilk_fiber_get_data(fiber);
+ __cilkrts_stack_frame* sf = data->resume_sf;
+ full_frame *ff;
+
+ CILK_ASSERT(sf);
+
+ // When we pull the resume_sf out of the fiber to resume it, clear
+ // the old value.
+ data->resume_sf = NULL;
+ CILK_ASSERT(sf->worker == data->owner);
+ ff = sf->worker->l->frame_ff;
+
+ // For Win32, we need to overwrite the default exception handler
+ // in this function, so that when the OS exception handling code
+ // walks off the top of the current Cilk stack, it reaches our stub
+ // handler.
+
+ // Also, this function needs to be wrapped into a try-catch block
+ // so the compiler generates the appropriate exception information
+ // in this frame.
+
+ // TBD: IS THIS HANDLER IN THE WRONG PLACE? Can we longjmp out of
+ // this function (and does it matter?)
+#if defined(_WIN32) && !defined(_WIN64)
+ install_exception_stub_handler();
+ __try
+#endif
+ {
+ char* new_sp = sysdep_reset_jump_buffers_for_resume(fiber, ff, sf);
+
+ // Notify the Intel tools that we're stealing code
+ ITT_SYNC_ACQUIRED(sf->worker);
+#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
+ __notify_zc_intrinsic("cilk_continue", sf);
+#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC
+
+ // TBD: We'd like to move TBB-interop methods into the fiber
+ // eventually.
+ cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT);
+
+ sf->flags &= ~CILK_FRAME_SUSPENDED;
+
+ // longjmp to user code. Don't process exceptions here,
+ // because we are resuming a stolen frame.
+ sysdep_longjmp_to_sf(new_sp, sf, NULL);
+ /*NOTREACHED*/
+ // Intel's C compiler respects the preceding lint pragma
+ }
+#if defined(_WIN32) && !defined(_WIN64)
+ __except (CILK_ASSERT(!"should not execute the the stub filter"),
+ EXCEPTION_EXECUTE_HANDLER)
+ {
+ // If we are here, that means something very wrong
+ // has happened in our exception processing...
+ CILK_ASSERT(! "should not be here!");
+ }
+#endif
+}
+
static void random_steal(__cilkrts_worker *w)
{
- __cilkrts_worker *victim;
- __cilkrts_stack *sd;
+ __cilkrts_worker *victim = NULL;
+ cilk_fiber *fiber = NULL;
int n;
int success = 0;
+ int32_t victim_id;
// Nothing's been stolen yet. When true, this will flag
// setup_for_execution_pedigree to increment the pedigree
@@ -785,16 +901,35 @@ static void random_steal(__cilkrts_worker *w)
There must be only one worker to prevent stealing. */
CILK_ASSERT(w->g->total_workers > 1);
- /* Verify that we can get a stack. If not, no need to continue. */
- sd = __cilkrts_get_stack(w);
- if (NULL == sd) {
+ /* pick random *other* victim */
+ n = myrand(w) % (w->g->total_workers - 1);
+ if (n >= w->self)
+ ++n;
+
+ // If we're replaying a log, override the victim. -1 indicates that
+ // we've exhausted the list of things this worker stole when we recorded
+ // the log so just return. If we're not replaying a log,
+ // replay_get_next_recorded_victim() just returns the victim ID passed in.
+ n = replay_get_next_recorded_victim(w, n);
+ if (-1 == n)
return;
- }
- /* pick random *other* victim */
- n = myrand(w) % (w->g->total_workers - 1); if (n >= w->self) ++n;
victim = w->g->workers[n];
+ START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) {
+ /* Verify that we can get a stack. If not, no need to continue. */
+ fiber = cilk_fiber_allocate(&w->l->fiber_pool);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE);
+
+
+ if (NULL == fiber) {
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "w=%d: failed steal because we could not get a fiber\n",
+ w->self);
+#endif
+ return;
+ }
+
/* do not steal from self */
CILK_ASSERT (victim != w);
@@ -802,10 +937,15 @@ static void random_steal(__cilkrts_worker *w)
Avoid grabbing locks if there is nothing to steal. */
if (!can_steal_from(victim)) {
NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_EMPTYQ);
- __cilkrts_release_stack(w, sd);
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
+ int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool);
+ // Fibers we use when trying to steal should not be active,
+ // and thus should not have any other references.
+ CILK_ASSERT(0 == ref_count);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
return;
}
-
+
/* Attempt to steal work from the victim */
if (worker_trylock_other(w, victim)) {
if (w->l->type == WORKER_USER && victim->l->team != w) {
@@ -828,24 +968,42 @@ static void random_steal(__cilkrts_worker *w)
// though the victim may be executing. Thus, the lock on
// the victim's deque is also protecting victim->frame_ff.
if (dekker_protocol(victim)) {
- START_INTERVAL(w, INTERVAL_STEAL_SUCCESS) {
- success = 1;
- detach_for_steal(w, victim, sd);
- #if REDPAR_DEBUG >= 1
- fprintf(stderr, "Wkr %d stole from victim %d, sd = %p\n",
- w->self, victim->self, sd);
- #endif
-
- // The use of victim->self contradicts our
- // classification of the "self" field as
- // local. But since this code is only for
- // debugging, it is ok.
- DBGPRINTF ("%d-%p: Stealing work from worker %d\n"
- " sf: %p, call parent: %p\n",
- w->self, GetCurrentFiber(), victim->self,
- w->l->next_frame_ff->call_stack,
- w->l->next_frame_ff->call_stack->call_parent);
- } STOP_INTERVAL(w, INTERVAL_STEAL_SUCCESS);
+ int proceed_with_steal = 1; // optimistic
+
+ // If we're replaying a log, verify that this the correct frame
+ // to steal from the victim
+ if (! replay_match_victim_pedigree(w, victim))
+ {
+ // Abort the steal attempt. decrement_E(victim) to
+ // counter the increment_E(victim) done by the
+ // dekker protocol
+ decrement_E(victim);
+ proceed_with_steal = 0;
+ }
+
+ if (proceed_with_steal)
+ {
+ START_INTERVAL(w, INTERVAL_STEAL_SUCCESS) {
+ success = 1;
+ detach_for_steal(w, victim, fiber);
+ victim_id = victim->self;
+
+ #if REDPAR_DEBUG >= 1
+ fprintf(stderr, "Wkr %d stole from victim %d, fiber = %p\n",
+ w->self, victim->self, fiber);
+ #endif
+
+ // The use of victim->self contradicts our
+ // classification of the "self" field as
+ // local. But since this code is only for
+ // debugging, it is ok.
+ DBGPRINTF ("%d-%p: Stealing work from worker %d\n"
+ " sf: %p, call parent: %p\n",
+ w->self, GetCurrentFiber(), victim->self,
+ w->l->next_frame_ff->call_stack,
+ w->l->next_frame_ff->call_stack->call_parent);
+ } STOP_INTERVAL(w, INTERVAL_STEAL_SUCCESS);
+ } // end if(proceed_with_steal)
} else {
NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_DEKKER);
}
@@ -862,11 +1020,28 @@ static void random_steal(__cilkrts_worker *w)
w->l->work_stolen = success;
if (0 == success) {
- // failed to steal work. Return the stack to the pool.
- __cilkrts_release_stack(w, sd);
+ // failed to steal work. Return the fiber to the pool.
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
+ int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool);
+ // Fibers we use when trying to steal should not be active,
+ // and thus should not have any other references.
+ CILK_ASSERT(0 == ref_count);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
+ }
+ else
+ {
+ // Since our steal was successful, finish initialization of
+ // the fiber.
+ cilk_fiber_reset_state(fiber,
+ fiber_proc_to_resume_user_code_for_random_steal);
+ // Record the pedigree of the frame that w has stolen.
+ // record only if CILK_RECORD_LOG is set
+ replay_record_steal(w, victim_id);
}
}
+
+
/**
* At a provably good steal, we need to transfer the child reducer map
* from ff->children_reducer_map into v->reducer_map, where v is the
@@ -903,17 +1078,9 @@ static void provably_good_steal_exceptions(__cilkrts_worker *w,
/* At sync discard the frame's old stack and take the leftmost child's. */
static void provably_good_steal_stacks(__cilkrts_worker *w, full_frame *ff)
{
- __cilkrts_stack *s;
- s = ff->stack_self;
- ff->stack_self = ff->stack_child;
- ff->stack_child = NULL;
- if (s) {
- __cilkrts_release_stack(w, s);
- }
-
- /* We don't have a stack to bind right now, so use the
- BIND_PROVABLY_GOOD_STACK magic number, instead */
- __cilkrts_bind_stack(ff, ff->sync_sp, BIND_PROVABLY_GOOD_STACK, NULL);
+ CILK_ASSERT(NULL == ff->fiber_self);
+ ff->fiber_self = ff->fiber_child;
+ ff->fiber_child = NULL;
}
static void __cilkrts_mark_synched(full_frame *ff)
@@ -922,14 +1089,21 @@ static void __cilkrts_mark_synched(full_frame *ff)
ff->simulated_stolen = 0;
}
-static int provably_good_steal(__cilkrts_worker *w,
- full_frame *ff)
+static
+enum provably_good_steal_t provably_good_steal(__cilkrts_worker *w,
+ full_frame *ff)
{
// ASSERT: we hold w->lock and ff->lock
- int abandoned = 1; // True if we can't make any more progress on this
- // thread and are going to attempt to steal work from
- // someone else
+ enum provably_good_steal_t result = ABANDON_EXECUTION;
+
+ // If the current replay entry is a sync record matching the worker's
+ // pedigree, AND this isn't the last child to the sync, return
+ // WAIT_FOR_CONTINUE to indicate that the caller should loop until
+ // we find the right frame to steal and CONTINUE_EXECUTION is returned.
+ int match_found = replay_match_sync_pedigree(w);
+ if (match_found && (0 != simulate_decjoin(ff)))
+ return WAIT_FOR_CONTINUE;
START_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL) {
if (decjoin(ff) == 0) {
@@ -953,10 +1127,10 @@ static int provably_good_steal(__cilkrts_worker *w,
// If this is the team leader we're not abandoning the work
if (w == w->l->team)
- abandoned = 0;
+ result = CONTINUE_EXECUTION;
} else {
__cilkrts_push_next_frame(w, ff);
- abandoned = 0; // Continue working on this thread
+ result = CONTINUE_EXECUTION; // Continue working on this thread
}
// The __cilkrts_push_next_frame() call changes ownership
@@ -964,7 +1138,16 @@ static int provably_good_steal(__cilkrts_worker *w,
}
} STOP_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL);
- return abandoned;
+ // Only write a SYNC record if:
+ // - We're recording a log *AND*
+ // - We're the worker continuing from this sync
+ replay_record_sync(w, result == CONTINUE_EXECUTION);
+
+ // If we're replaying a log, and matched a sync from the log, mark the
+ // sync record seen if the sync isn't going to be abandoned.
+ replay_advance_from_sync (w, match_found, result == CONTINUE_EXECUTION);
+
+ return result;
}
static void unconditional_steal(__cilkrts_worker *w,
@@ -1038,15 +1221,15 @@ static inline void splice_stacks_for_call(__cilkrts_worker *w,
#endif
/* A synched frame does not have accumulated child reducers. */
- CILK_ASSERT(!child_ff->stack_child);
+ CILK_ASSERT(!child_ff->fiber_child);
CILK_ASSERT(child_ff->is_call_child);
- /* An attached parent has no self stack. It may have
- accumulated child stacks or child owners, which should be
+ /* An attached parent has no self fiber. It may have
+ accumulated child fibers or child owners, which should be
ignored until sync. */
- CILK_ASSERT(!parent_ff->stack_self);
- parent_ff->stack_self = child_ff->stack_self;
- child_ff->stack_self = NULL;
+ CILK_ASSERT(!parent_ff->fiber_self);
+ parent_ff->fiber_self = child_ff->fiber_self;
+ child_ff->fiber_self = NULL;
}
static void finalize_child_for_call(__cilkrts_worker *w,
@@ -1221,15 +1404,178 @@ static void setup_for_execution(__cilkrts_worker *w,
make_runnable(w, ff);
}
+
+/*
+ * Called by the scheduling fiber, right before
+ * resuming a sf/ff for user code.
+ *
+ * This method associates the specified sf with the worker.
+ *
+ * It also asserts that w, ff, and sf all have the expected properties
+ * for resuming user code.
+ */
+void scheduling_fiber_prepare_to_resume_user_code(__cilkrts_worker *w,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+ w->current_stack_frame = sf;
+ sf->worker = w;
+
+ // Lots of debugging checks on the state of the fiber we might be
+ // resuming.
+#if FIBER_DEBUG >= 1
+# if FIBER_DEBUG >= 3
+ {
+ fprintf(stderr, "w=%d: ff=%p, sf=%p. about to resume user code\n",
+ w->self, ff, sf);
+ }
+# endif
+
+ const int flags = sf->flags;
+ CILK_ASSERT(flags & CILK_FRAME_SUSPENDED);
+ CILK_ASSERT(!sf->call_parent);
+ CILK_ASSERT(w->head == w->tail);
+
+ /* A frame can not be resumed unless it was suspended. */
+ CILK_ASSERT(ff->sync_sp != NULL);
+
+ /* The leftmost frame has no allocated stack */
+ if (ff->simulated_stolen)
+ CILK_ASSERT(flags & CILK_FRAME_UNSYNCHED);
+ else if (flags & CILK_FRAME_UNSYNCHED)
+ /* XXX By coincidence sync_sp could be null. */
+ CILK_ASSERT(ff->fiber_self != NULL);
+ else
+ /* XXX This frame could be resumed unsynched on the leftmost stack */
+ CILK_ASSERT((ff->sync_master == 0 || ff->sync_master == w));
+ CILK_ASSERT(w->l->frame_ff == ff);
+#endif
+}
+
+
+/**
+ * This method is the first method that should execute after we've
+ * switched to a scheduling fiber from user code.
+ *
+ * @param fiber The scheduling fiber for the current worker.
+ * @param wptr The current worker.
+ */
+static void enter_runtime_transition_proc(cilk_fiber *fiber)
+{
+ // We can execute this method for one of three reasons:
+ // 1. Undo-detach finds parent stolen.
+ // 2. Sync suspends frame.
+ // 3. Return from Cilk entry point.
+ //
+ //
+ // In cases 1 and 2, the frame may be truly suspended or
+ // may be immediately executed by this worker after provably_good_steal.
+ //
+ //
+ // There is a fourth case, which can, but does not need to execute
+ // this function:
+ // 4. Starting up the scheduling loop on a user or
+ // system worker. In this case, we won't have
+ // a scheduling stack function to run.
+ __cilkrts_worker* w = cilk_fiber_get_owner(fiber);
+ if (w->l->post_suspend) {
+ // Run the continuation function passed to longjmp_into_runtime
+ run_scheduling_stack_fcn(w);
+
+ // After we have jumped into the runtime and run the
+ // scheduling function, any reducer map the worker had before entering the runtime
+ // should have already been saved into the appropriate full
+ // frame.
+ CILK_ASSERT(NULL == w->reducer_map);
+
+ // There shouldn't be any uncaught exceptions.
+ //
+ // In Windows, the OS catches any exceptions not caught by the
+ // user code. Thus, we are omitting the check on Windows.
+ //
+ // On Android, calling std::uncaught_exception with the stlport
+ // library causes a seg fault. Since we're not supporting
+ // exceptions there at this point, just don't do the check
+ //
+ // TBD: Is this check also safe to do on Windows?
+ CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION();
+ }
+}
+
+
+/**
+ * Method called to jump back to executing user code.
+ *
+ * A normal return from the runtime back to resuming user code calls
+ * this method. A computation executed using force_reduce also calls
+ * this method to return to user code.
+ *
+ * This function should not contain any code that depends on a fiber.
+ * In a force-reduce case, the user worker may not have a fiber. In
+ * the force-reduce case, we call this method directly instead of
+ * calling @c user_code_resume_after_switch_into_runtime.
+ */
+static inline NORETURN
+cilkrts_resume(__cilkrts_stack_frame *sf, full_frame *ff)
+{
+ // Save the sync stack pointer, and do the bookkeeping
+ char* sync_sp = ff->sync_sp;
+ __cilkrts_take_stack(ff, sync_sp); // leaves ff->sync_sp null
+
+ sf->flags &= ~CILK_FRAME_SUSPENDED;
+ // Actually longjmp to the user code.
+ // We may have exceptions to deal with, since we are resuming
+ // a previous-suspended frame.
+ sysdep_longjmp_to_sf(sync_sp, sf, ff);
+}
+
+
+/**
+ * Called by the user-code fiber right before resuming a full frame
+ * (sf/ff).
+ *
+ * This method pulls sf/ff out of the worker, and then calls
+ * cilkrts_resume to jump to user code.
+ */
+static NORETURN
+user_code_resume_after_switch_into_runtime(cilk_fiber *fiber)
+{
+ __cilkrts_worker *w = cilk_fiber_get_owner(fiber);
+ __cilkrts_stack_frame *sf;
+ full_frame *ff;
+ sf = w->current_stack_frame;
+ ff = sf->worker->l->frame_ff;
+
+#if FIBER_DEBUG >= 1
+ CILK_ASSERT(ff->fiber_self == fiber);
+ cilk_fiber_data *fdata = cilk_fiber_get_data(fiber);
+ DBGPRINTF ("%d-%p: resume_after_switch_into_runtime, fiber=%p\n",
+ w->self, w, fiber);
+ CILK_ASSERT(sf == fdata->resume_sf);
+#endif
+
+ // Notify the Intel tools that we're stealing code
+ ITT_SYNC_ACQUIRED(sf->worker);
+#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
+ __notify_zc_intrinsic("cilk_continue", sf);
+#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC
+ cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT);
+
+ // Actually jump to user code.
+ cilkrts_resume(sf, ff);
+ }
+
+
/* The current stack is about to either be suspended or destroyed. This
* function will switch to the stack on which the scheduler is suspended and
* resume running the scheduler within function do_work(). Upon waking up,
* the scheduler will run the 'cont' function, using the supplied worker and
* frame.
*/
-static NORETURN longjmp_into_runtime(__cilkrts_worker *w,
- scheduling_stack_fcn_t fcn,
- __cilkrts_stack_frame *sf)
+static NORETURN
+longjmp_into_runtime(__cilkrts_worker *w,
+ scheduling_stack_fcn_t fcn,
+ __cilkrts_stack_frame *sf)
{
full_frame *ff, *ff2;
@@ -1237,7 +1583,6 @@ static NORETURN longjmp_into_runtime(__cilkrts_worker *w,
ff = w->l->frame_ff;
// If we've got only one worker, stealing shouldn't be possible.
- //
// Assume that this is a steal or return from spawn in a force-reduce case.
// We don't have a scheduling stack to switch to, so call the continuation
// function directly.
@@ -1251,8 +1596,19 @@ static NORETURN longjmp_into_runtime(__cilkrts_worker *w,
ff2 = pop_next_frame(w);
setup_for_execution(w, ff2, 0);
- __cilkrts_resume(w, ff2, w->current_stack_frame); /* no return */
- CILK_ASSERT(("returned from __cilkrts_resume", 0));
+ scheduling_fiber_prepare_to_resume_user_code(w, ff2, w->current_stack_frame);
+ cilkrts_resume(w->current_stack_frame, ff2);
+
+// Suppress clang warning that the expression result is unused
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic push
+# pragma clang diagnostic ignored "-Wunused-value"
+#endif // __clang__
+ /* no return */
+ CILK_ASSERT(((void)"returned from __cilkrts_resume", 0));
+#if defined(__clang__) && (! defined(__INTEL_COMPILER))
+# pragma clang diagnostic pop
+#endif // __clang__
}
w->l->post_suspend = fcn;
@@ -1261,43 +1617,91 @@ static NORETURN longjmp_into_runtime(__cilkrts_worker *w,
ITT_SYNC_RELEASING(w);
ITT_SYNC_PREPARE(w);
- // If this is a user worker, and it's the first time that it's returned to
- // a stolen parent, we need to import the thread. This will create a
- // scheduling stack or fiber, switch to that, and run the scheduling loop
- // on it
- if ((WORKER_USER == w->l->type) && (0 == w->l->user_thread_imported))
- {
- // We're importing the thread
- w->l->user_thread_imported = 1;
- __cilkrts_sysdep_import_user_thread(w);
- CILK_ASSERT(0); // Should never reach this point.
- }
-
-
-#ifndef _WIN32
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime... w->l->frame_ff = %p, sf=%p\n",
+ cilkos_get_current_thread_id(),
+ w->self, w->l->frame_ff,
+ sf);
+#endif
- // Jump to this thread's scheduling stack.
- longjmp(w->l->env, 1);
-#else
- DBGPRINTF ("%d-%p: longjmp_into_runtime - "
- "Switching to scheduling fiber - %p\n"
- " continuation routine: %p, sf: %p\n",
- w->self, GetWorkerFiber(w), w->sysdep->scheduling_fiber,
- fcn, sf);
-#ifdef _DEBUG
- SetWorkerThreadName(w, NULL);
+ // Current fiber is either the (1) one we are about to free,
+ // or (2) it has been passed up to the parent.
+ cilk_fiber *current_fiber = ( w->l->fiber_to_free ?
+ w->l->fiber_to_free :
+ w->l->frame_ff->parent->fiber_child );
+ cilk_fiber_data* fdata = cilk_fiber_get_data(current_fiber);
+ CILK_ASSERT(NULL == w->l->frame_ff->fiber_self);
+
+ // Clear the sf in the current fiber for cleanliness, to prevent
+ // us from accidentally resuming a bad sf.
+ // Technically, resume_sf gets overwritten for a fiber when
+ // we are about to resume it anyway.
+ fdata->resume_sf = NULL;
+ CILK_ASSERT(fdata->owner == w);
+
+ // Set the function to execute immediately after switching to the
+ // scheduling fiber, but before freeing any fibers.
+ cilk_fiber_set_post_switch_proc(w->l->scheduling_fiber,
+ enter_runtime_transition_proc);
+ cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_ORPHAN);
+
+ if (w->l->fiber_to_free) {
+ // Case 1: we are freeing this fiber. We never
+ // resume this fiber again after jumping into the runtime.
+ w->l->fiber_to_free = NULL;
+
+ // Extra check. Normally, the fiber we are about to switch to
+ // should have a NULL owner.
+ CILK_ASSERT(NULL == cilk_fiber_get_data(w->l->scheduling_fiber)->owner);
+#if FIBER_DEBUG >= 4
+ fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ current_fiber, w->l->scheduling_fiber);
#endif
- SwitchToFiber(w->sysdep->scheduling_fiber);
-
- /* Since we switched away from the fiber on which this function was
- * entered, we will not get here until either the initial fiber is
- * resumed. If the initial fiber belonged to a thief at a sync, then
- * the longjmp below will re-initialize the fiber for another steal.
- * If this fiber belonged to a victim, then the longjmp below will
- * resume the victim after the sync.
- */
- __cilkrts_resume_after_longjmp_into_runtime();
+ cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_RELEASE);
+ NOTE_INTERVAL(w, INTERVAL_DEALLOCATE_RESUME_OTHER);
+ cilk_fiber_remove_reference_from_self_and_resume_other(current_fiber,
+ &w->l->fiber_pool,
+ w->l->scheduling_fiber);
+ // We should never come back here!
+ CILK_ASSERT(0);
+ }
+ else {
+ // Case 2: We are passing the fiber to our parent because we
+ // are leftmost. We should come back later to
+ // resume execution of user code.
+ //
+ // If we are not freeing a fiber, there we must be
+ // returning from a spawn or processing an exception. The
+ // "sync" path always frees a fiber.
+ //
+ // We must be the leftmost child, and by left holder logic, we
+ // have already moved the current fiber into our parent full
+ // frame.
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "ThreadId=%p, W=%d: about to suspend self into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ current_fiber, w->l->scheduling_fiber);
+#endif
+
+ NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER);
+
+ cilk_fiber_suspend_self_and_resume_other(current_fiber,
+ w->l->scheduling_fiber);
+ // Resuming this fiber returns control back to
+ // this function because our implementation uses OS fibers.
+ //
+ // On Unix, we could have the choice of passing the
+ // user_code_resume_after_switch_into_runtime as an extra "resume_proc"
+ // that resumes execution of user code instead of the
+ // jumping back here, and then jumping back to user code.
+#if FIBER_DEBUG >= 2
+ CILK_ASSERT(fdata->owner == __cilkrts_get_tls_worker());
#endif
+ user_code_resume_after_switch_into_runtime(current_fiber);
+ }
}
/*
@@ -1340,73 +1744,15 @@ static void notify_children_run(__cilkrts_worker *w)
notify_children(w, 1);
}
-static void do_work(__cilkrts_worker *w, full_frame *ff)
-{
- __cilkrts_stack_frame *sf;
-
-#ifndef _WIN32
- cilkbug_assert_no_uncaught_exception();
-#endif
-
- BEGIN_WITH_WORKER_LOCK(w) {
- CILK_ASSERT(!w->l->frame_ff);
- BEGIN_WITH_FRAME_LOCK(w, ff) {
- sf = ff->call_stack;
- CILK_ASSERT(sf && !sf->call_parent);
- setup_for_execution(w, ff, 0);
- } END_WITH_FRAME_LOCK(w, ff);
- } END_WITH_WORKER_LOCK(w);
-
-#if CILK_LIB_DEBUG
- if (!(sf->flags & CILK_FRAME_UNSYNCHED))
- CILK_ASSERT(!ff->stack_child);
- if (sf->flags & CILK_FRAME_EXITING) {
- __cilkrts_bug("W%d: resuming frame %p/%p suspended in exit\n",
- w->self, ff, sf);
- }
-#endif
-
- /* run it */
- if (setjmp(w->l->env) == 0) {
- __cilkrts_resume(w, ff, sf);
-
- /* unreached---the call to cilk_resume exits through longjmp */
- CILK_ASSERT(0);
- }
-
- /* This point is reached for three reasons:
-
- 1. Undo-detach finds parent stolen.
-
- 2. Sync suspends frame.
-
- 3. Return from Cilk entry point.
-
- In the first two cases the frame may be truly suspended or
- may be immediately executed by this worker after provably_good_steal.
-
- The active frame and call_stack may have changed since _resume. */
- run_scheduling_stack_fcn(w);
-
- /* The worker borrowed the full frame's reducer map.
- Clear the extra reference. Bookkeeping uses the
- copy in the frame, not the worker. */
- w->reducer_map = 0;
-
-#ifndef _WIN32
- cilkbug_assert_no_uncaught_exception();
-#endif
-}
-
-/*
- * Try to do work. If there is none available, try to steal some and do it.
+/**
+ * A single "check" to find work, either on our queue or through a
+ * steal attempt. This method checks our local queue once, and
+ * performs one steal attempt.
*/
-static void schedule_work(__cilkrts_worker *w)
+static full_frame* check_for_work(__cilkrts_worker *w)
{
- full_frame *ff;
-
+ full_frame *ff = NULL;
ff = pop_next_frame(w);
-
// If there is no work on the queue, try to steal some.
if (NULL == ff) {
START_INTERVAL(w, INTERVAL_STEALING) {
@@ -1418,6 +1764,10 @@ static void schedule_work(__cilkrts_worker *w)
w->l->team = NULL;
__cilkrts_worker_unlock(w);
}
+
+ // If we are about to do a random steal, we should have no
+ // full frame...
+ CILK_ASSERT(NULL == w->l->frame_ff);
random_steal(w);
} STOP_INTERVAL(w, INTERVAL_STEALING);
@@ -1429,82 +1779,315 @@ static void schedule_work(__cilkrts_worker *w)
// No quantum for you!
__cilkrts_yield();
w->l->steal_failure_count++;
- return;
} else {
// Reset steal_failure_count since there is obviously still work to
// be done.
w->l->steal_failure_count = 0;
}
}
- CILK_ASSERT(ff);
+ return ff;
+}
- // Do the work that was on the queue or was stolen.
- START_INTERVAL(w, INTERVAL_WORKING) {
- do_work(w, ff);
- ITT_SYNC_SET_NAME_AND_PREPARE(w, w->l->sync_return_address);
- } STOP_INTERVAL(w, INTERVAL_WORKING);
+/**
+ * Keep stealing or looking on our queue.
+ *
+ * Returns either when a full frame is found, or NULL if the
+ * computation is done.
+ */
+static full_frame* search_until_work_found_or_done(__cilkrts_worker *w)
+{
+ full_frame *ff = NULL;
+ // Find a full frame to execute (either through random stealing,
+ // or because we pull it off w's 1-element queue).
+ while (!ff) {
+ // Check worker state to figure out our next action.
+ switch (worker_runnable(w))
+ {
+ case SCHEDULE_RUN: // One attempt at checking for work.
+ ff = check_for_work(w);
+ break;
+ case SCHEDULE_WAIT: // go into wait-mode.
+ CILK_ASSERT(WORKER_SYSTEM == w->l->type);
+ // If we are about to wait, then we better not have
+ // a frame that we should execute...
+ CILK_ASSERT(NULL == w->l->next_frame_ff);
+ notify_children_wait(w);
+ signal_node_wait(w->l->signal_node);
+ // ...
+ // Runtime is waking up.
+ notify_children_run(w);
+ w->l->steal_failure_count = 0;
+ break;
+ case SCHEDULE_EXIT: // exit the scheduler.
+ CILK_ASSERT(WORKER_USER != w->l->type);
+ return NULL;
+ default:
+ CILK_ASSERT(0);
+ abort();
+ }
+ }
+ return ff;
}
-static void __cilkrts_scheduler(__cilkrts_worker *w)
+/**
+ * The proc method for a scheduling fiber on a user worker.
+ *
+ * When a user worker jumps into the runtime, it jumps into this
+ * method by either starting it if the scheduling fiber has never run
+ * before, or resuming the fiber if it was previously suspended.
+ */
+COMMON_PORTABLE
+void scheduler_fiber_proc_for_user_worker(cilk_fiber *fiber)
{
- ITT_SYNC_PREPARE(w);
+ __cilkrts_worker* w = cilk_fiber_get_owner(fiber);
+ CILK_ASSERT(w);
- START_INTERVAL(w, INTERVAL_IN_SCHEDULER) {
+ // This must be a user worker
+ CILK_ASSERT(WORKER_USER == w->l->type);
- /* this thread now becomes a worker---associate the thread
- with the worker state */
- __cilkrts_set_tls_worker(w);
+ // If we aren't the current worker, then something is very wrong
+ // here..
+ verify_current_wkr(w);
- /* Notify tools about the new worker. Inspector needs this, but we
- don't want to confuse Cilkscreen with system threads. User threads
- do this notification in bind_thread */
- if (! w->g->under_ptool)
- __cilkrts_cilkscreen_establish_worker(w);
+ __cilkrts_run_scheduler_with_exceptions(w);
+}
- mysrand(w, (w->self + 1));
- if (WORKER_SYSTEM == w->l->type) {
- // Runtime begins in a wait-state and is woken up by the first user
- // worker when the runtime is ready.
- signal_node_wait(w->l->signal_node);
- // ...
- // Runtime is waking up.
- notify_children_run(w);
- w->l->steal_failure_count = 0;
+/**
+ * The body of the runtime scheduling loop. This function executes in
+ * 4 stages:
+ *
+ * 1. Transitions from the user code into the runtime by
+ * executing any scheduling-stack functions.
+ * 2. Looks for a full frame enqueued from a successful provably
+ * good steal.
+ * 3. If no full frame is found in step 2, steal until
+ * a frame is found or we are done. If we are done, finish
+ * the scheduling loop.
+ * 4. When a frame is found, setup to resume user code.
+ * In particular, suspend the current fiber and resume the
+ * user fiber to execute the frame.
+ *
+ * Returns a fiber object that we should switch to after completing
+ * the body of the loop, or NULL if we should continue executing on
+ * this fiber.
+ *
+ * @pre @c current_fiber should equal @c wptr->l->scheduling_fiber
+ *
+ * @param current_fiber The currently executing (scheduling_ fiber
+ * @param wptr The currently executing worker.
+ * @param return The next fiber we should switch to.
+ */
+static cilk_fiber* worker_scheduling_loop_body(cilk_fiber* current_fiber,
+ void* wptr)
+{
+ __cilkrts_worker *w = (__cilkrts_worker*) wptr;
+ CILK_ASSERT(current_fiber == w->l->scheduling_fiber);
+
+ // Stage 1: Transition from executing user code to the runtime code.
+ // We don't need to do this call here any more, because
+ // every switch to the scheduling fiber should make this call
+ // using a post_switch_proc on the fiber.
+ //
+ // enter_runtime_transition_proc(w->l->scheduling_fiber, wptr);
+
+ // After Stage 1 is complete, w should no longer have
+ // an associated full frame.
+ CILK_ASSERT(NULL == w->l->frame_ff);
+
+ // Stage 2. First do a quick check of our 1-element queue.
+ full_frame *ff = pop_next_frame(w);
+
+ if (!ff) {
+ // Stage 3. We didn't find anything from our 1-element
+ // queue. Now go through the steal loop to find work.
+ ff = search_until_work_found_or_done(w);
+ if (!ff) {
+ CILK_ASSERT(w->g->work_done);
+ return NULL;
}
+ }
- while (!w->g->work_done) {
+ // Stage 4. Now that we have found a full frame to work on,
+ // actually execute it.
+ __cilkrts_stack_frame *sf;
- switch (worker_runnable(w))
- {
- case SCHEDULE_RUN: // do some work.
- schedule_work(w);
- break;
-
- case SCHEDULE_WAIT: // go into wait-mode.
- CILK_ASSERT(WORKER_SYSTEM == w->l->type);
- notify_children_wait(w);
- signal_node_wait(w->l->signal_node);
- // ...
- // Runtime is waking up.
- notify_children_run(w);
- w->l->steal_failure_count = 0;
- break;
+ // There shouldn't be any uncaught exceptions.
+ //
+ // In Windows, the OS catches any exceptions not caught by the
+ // user code. Thus, we are omitting the check on Windows.
+ //
+ // On Android, calling std::uncaught_exception with the stlport
+ // library causes a seg fault. Since we're not supporting
+ // exceptions there at this point, just don't do the check
+ CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION();
+
+ BEGIN_WITH_WORKER_LOCK(w) {
+ CILK_ASSERT(!w->l->frame_ff);
+ BEGIN_WITH_FRAME_LOCK(w, ff) {
+ sf = ff->call_stack;
+ CILK_ASSERT(sf && !sf->call_parent);
+ setup_for_execution(w, ff, 0);
+ } END_WITH_FRAME_LOCK(w, ff);
+ } END_WITH_WORKER_LOCK(w);
- case SCHEDULE_EXIT: // exit the scheduler.
- CILK_ASSERT(WORKER_USER != w->l->type);
- break;
+ /* run it */
+ //
+ // Prepare to run the full frame. To do so, we need to:
+ // (a) Execute some code on this fiber (the scheduling
+ // fiber) to set up data structures, and
+ // (b) Suspend the scheduling fiber, and resume the
+ // user-code fiber.
- default:
- CILK_ASSERT(0);
- abort();
- }
+ // Part (a). Set up data structures.
+ scheduling_fiber_prepare_to_resume_user_code(w, ff, sf);
- } // while (!w->g->work_done)
+ cilk_fiber *other = w->l->frame_ff->fiber_self;
+ cilk_fiber_data* other_data = cilk_fiber_get_data(other);
+ cilk_fiber_data* current_fiber_data = cilk_fiber_get_data(current_fiber);
- } STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
+ // I believe two cases are possible here, both of which
+ // should have other_data->resume_sf as NULL.
+ //
+ // 1. Resuming a fiber that was previously executing
+ // user code (i.e., a provably-good-steal).
+ // In this case, resume_sf should have been
+ // set to NULL when it was suspended.
+ //
+ // 2. Resuming code on a steal. In this case, since we
+ // grabbed a new fiber, resume_sf should be NULL.
+ CILK_ASSERT(NULL == other_data->resume_sf);
+
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "W=%d: other fiber=%p, setting resume_sf to %p\n",
+ w->self, other, other_data->resume_sf);
+#endif
+ // Update our own fiber's data.
+ current_fiber_data->resume_sf = NULL;
+ // The scheduling fiber should have the right owner from before.
+ CILK_ASSERT(current_fiber_data->owner == w);
+ other_data->resume_sf = sf;
+
- CILK_ASSERT(WORKER_SYSTEM == w->l->type);
+#if FIBER_DEBUG >= 3
+ fprintf(stderr, "ThreadId=%p (about to suspend self resume other), W=%d: current_fiber=%p, other=%p, current_fiber->resume_sf = %p, other->resume_sf = %p\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ current_fiber, other,
+ current_fiber_data->resume_sf,
+ other_data->resume_sf);
+#endif
+ return other;
+}
+
+
+/**
+ * This function is executed once by each worker, to initialize its
+ * scheduling loop.
+ */
+static void worker_scheduler_init_function(__cilkrts_worker *w)
+{
+ // First, execute the startup tasks that must happen for all
+ // worker types.
+ ITT_SYNC_PREPARE(w);
+ /* Notify tools about the new worker. Inspector needs this, but we
+ don't want to confuse Cilkscreen with system threads. User threads
+ do this notification in bind_thread */
+ if (! w->g->under_ptool)
+ __cilkrts_cilkscreen_establish_worker(w);
+
+ // Seed the initial random number generator.
+ // If we forget to do this, then the worker always steals from 0.
+ // Programs will still execute correctly, but
+ // you may see a subtle performance bug...
+ mysrand(w, (w->self + 1));
+
+ // The startup work varies, depending on the worker type.
+ switch (w->l->type) {
+ case WORKER_USER:
+ // Stop working once we've entered the scheduler.
+ // For user workers, INTERVAL_IN_SCHEDULER counts the time
+ // since we called bind_thread.
+ break;
+
+ case WORKER_SYSTEM:
+ // If a system worker is starting, we must also be starting
+ // the runtime.
+
+ // Runtime begins in a wait-state and is woken up by the first user
+ // worker when the runtime is ready.
+ signal_node_wait(w->l->signal_node);
+ // ...
+ // Runtime is waking up.
+ notify_children_run(w);
+ w->l->steal_failure_count = 0;
+
+ // For system threads, count all the time this thread is
+ // alive in the scheduling loop.
+ START_INTERVAL(w, INTERVAL_IN_SCHEDULER);
+ START_INTERVAL(w, INTERVAL_WORKING);
+ break;
+ default:
+ __cilkrts_bug("Unknown worker %p of type %d entering scheduling loop\n",
+ w, w->l->type);
+ }
+}
+
+/**
+ * This function is executed once by each worker, to finish its
+ * scheduling loop.
+ *
+ * @note Currently, only system workers finish their loops. User
+ * workers will jump away to user code without exiting their
+ * scheduling loop.
+ */
+static void worker_scheduler_terminate_function(__cilkrts_worker *w)
+{
+ // A user worker should never finish by falling through the
+ // scheduling loop.
+ CILK_ASSERT(WORKER_USER != w->l->type);
+ STOP_INTERVAL(w, INTERVAL_IN_RUNTIME);
+ STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
+}
+
+/**
+ * The main scheduler function executed by a worker's scheduling
+ * fiber.
+ *
+ * This method is started by either a new system worker, or a user
+ * worker that has stalled and just been imported into the runtime.
+ */
+static void worker_scheduler_function(__cilkrts_worker *w)
+{
+ worker_scheduler_init_function(w);
+
+ // The main scheduling loop body.
+
+ while (!w->g->work_done) {
+ // Set intervals. Now we are in the runtime instead of working.
+ START_INTERVAL(w, INTERVAL_IN_RUNTIME);
+ STOP_INTERVAL(w, INTERVAL_WORKING);
+
+ // Execute the "body" of the scheduling loop, and figure
+ // out the fiber to jump to next.
+ cilk_fiber* fiber_to_resume
+ = worker_scheduling_loop_body(w->l->scheduling_fiber, w);
+
+ if (fiber_to_resume) {
+ // Suspend the current fiber and resume next one.
+ NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER);
+ STOP_INTERVAL(w, INTERVAL_IN_RUNTIME);
+ START_INTERVAL(w, INTERVAL_WORKING);
+ cilk_fiber_suspend_self_and_resume_other(w->l->scheduling_fiber,
+ fiber_to_resume);
+
+ // Return here only when this (scheduling) fiber is
+ // resumed (i.e., this worker wants to reenter the runtime).
+ }
+ }
+
+ // Finish the scheduling loop.
+ worker_scheduler_terminate_function(w);
}
@@ -1617,44 +2200,35 @@ NORETURN __cilkrts_c_sync(__cilkrts_worker *w,
w = execute_reductions_for_sync(w, ff, sf_at_sync);
+#if FIBER_DEBUG >= 3
+ fprintf(stderr, "ThreadId=%p, w->self = %d. about to longjmp_into_runtim[c_sync] with ff=%p\n",
+ cilkos_get_current_thread_id(), w->self, ff);
+#endif
+
longjmp_into_runtime(w, do_sync, sf_at_sync);
}
static void do_sync(__cilkrts_worker *w, full_frame *ff,
__cilkrts_stack_frame *sf)
{
- int abandoned = 1;
+ //int abandoned = 1;
+ enum provably_good_steal_t steal_result = ABANDON_EXECUTION;
+
START_INTERVAL(w, INTERVAL_SYNC_CHECK) {
BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
- ff = w->l->frame_ff;
- w->l->frame_ff = NULL;
- // Conceptually, after clearing w->l->frame_ff,
- // w no longer owns the full frame ff.
- // The next time another (possibly different) worker takes
- // ownership of ff will be at a provably_good_steal on ff.
CILK_ASSERT(ff);
BEGIN_WITH_FRAME_LOCK(w, ff) {
CILK_ASSERT(sf->call_parent == 0);
CILK_ASSERT(sf->flags & CILK_FRAME_UNSYNCHED);
- /* A frame entering a nontrivial sync always has a
- stack_self. A topmost frame after a sync does
- not; it is back on the caller's stack. */
- CILK_ASSERT(ff->stack_self || ff->simulated_stolen);
-
- // Notify TBB that we're orphaning the stack. We'll reclaim it
- // again if we continue
- __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_ORPHAN, ff->stack_self);
+ // Before switching into the scheduling fiber, we should have
+ // already taken care of deallocating the current
+ // fiber.
+ CILK_ASSERT(NULL == ff->fiber_self);
- /* if (ff->stack_self) see above comment */ {
- __cilkrts_stack *s = ff->stack_self;
- ff->stack_self = NULL;
- __cilkrts_release_stack(w, s);
- }
-
- // Update the frame's pedigree information if this is an ABI 1 or later
- // frame
+ // Update the frame's pedigree information if this is an ABI 1
+ // or later frame
if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
{
sf->parent_pedigree.rank = w->pedigree.rank;
@@ -1666,16 +2240,43 @@ static void do_sync(__cilkrts_worker *w, full_frame *ff,
}
/* the decjoin() occurs in provably_good_steal() */
- abandoned = provably_good_steal(w, ff);
-
+ steal_result = provably_good_steal(w, ff);
+
} END_WITH_FRAME_LOCK(w, ff);
+ // set w->l->frame_ff = NULL after checking abandoned
+ if (WAIT_FOR_CONTINUE != steal_result) {
+ w->l->frame_ff = NULL;
+ }
} END_WITH_WORKER_LOCK_OPTIONAL(w);
} STOP_INTERVAL(w, INTERVAL_SYNC_CHECK);
+ // Now, if we are in a replay situation and provably_good_steal() returned
+ // WAIT_FOR_CONTINUE, we should sleep, reacquire locks, call
+ // provably_good_steal(), and release locks until we get a value other
+ // than WAIT_FOR_CONTINUE from the function.
+#ifdef CILK_RECORD_REPLAY
+ // We don't have to explicitly check for REPLAY_LOG below because
+ // steal_result can only be set to WAIT_FOR_CONTINUE during replay
+ while(WAIT_FOR_CONTINUE == steal_result)
+ {
+ __cilkrts_sleep();
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w)
+ {
+ ff = w->l->frame_ff;
+ BEGIN_WITH_FRAME_LOCK(w, ff)
+ {
+ steal_result = provably_good_steal(w, ff);
+ } END_WITH_FRAME_LOCK(w, ff);
+ if (WAIT_FOR_CONTINUE != steal_result)
+ w->l->frame_ff = NULL;
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+ }
+#endif // CILK_RECORD_REPLAY
+
#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
// If we can't make any further progress on this thread, tell Inspector
// that we're abandoning the work and will go find something else to do.
- if (abandoned)
+ if (ABANDON_EXECUTION == steal_result)
{
__notify_zc_intrinsic("cilk_sync_abandon", 0);
}
@@ -1690,15 +2291,38 @@ static void do_sync(__cilkrts_worker *w, full_frame *ff,
purposes. */
void __cilkrts_promote_own_deque(__cilkrts_worker *w)
{
+ // Remember the fiber we start this method on.
+ CILK_ASSERT(w->l->frame_ff);
+ cilk_fiber* starting_fiber = w->l->frame_ff->fiber_self;
+
BEGIN_WITH_WORKER_LOCK(w) {
while (dekker_protocol(w)) {
- /* PLACEHOLDER_STACK is used as non-null marker to tell detach()
+ /* PLACEHOLDER_FIBER is used as non-null marker to tell detach()
and make_child() that this frame should be treated as a spawn
parent, even though we have not assigned it a stack. */
- detach_for_steal(w, w, PLACEHOLDER_STACK);
-
+ detach_for_steal(w, w, PLACEHOLDER_FIBER);
}
} END_WITH_WORKER_LOCK(w);
+
+
+ // TBD: The management of full frames and fibers is a bit
+ // sketchy here. We are promoting stack frames into full frames,
+ // and pretending they are stolen away, but no other worker is
+ // actually working on them. Some runtime invariants
+ // may be broken here.
+ //
+ // Technically, if we are simulating a steal from w
+ // w should get a new full frame, but
+ // keep the same fiber. A real thief would be taking the
+ // loot frame away, get a new fiber, and starting executing the
+ // loot frame.
+ //
+ // What should a fake thief do? Where does the frame go?
+
+ // In any case, we should be finishing the promotion process with
+ // the same fiber with.
+ CILK_ASSERT(w->l->frame_ff);
+ CILK_ASSERT(w->l->frame_ff->fiber_self == starting_fiber);
}
@@ -1717,6 +2341,7 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w,
full_frame *ff;
int stolen_p;
__cilkrts_stack_frame *saved_sf = NULL;
+
START_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK);
BEGIN_WITH_WORKER_LOCK(w) {
@@ -1761,6 +2386,13 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w,
{
w = execute_reductions_for_spawn_return(w, ff, returning_sf);
+ // "Mr. Policeman? My parent always told me that if I was in trouble
+ // I should ask a nice policeman for help. I can't find my parent
+ // anywhere..."
+ //
+ // Write a record to the replay log for an attempt to return to a stolen parent
+ replay_record_orphaned(w);
+
// Update the pedigree only after we've finished the
// reductions.
update_pedigree_on_leave_frame(w, returning_sf);
@@ -1772,9 +2404,9 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w,
__notify_zc_intrinsic("cilk_leave_stolen", saved_sf);
#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC
- DBGPRINTF ("%d-%p: longjmp_into_runtime from __cilkrts_c_THE_exception_check\n", w->self, GetWorkerFiber(w));
+ DBGPRINTF ("%d: longjmp_into_runtime from __cilkrts_c_THE_exception_check\n", w->self);
longjmp_into_runtime(w, do_return_from_spawn, 0);
- DBGPRINTF ("%d-%p: returned from longjmp_into_runtime from __cilkrts_c_THE_exception_check?!\n", w->self, GetWorkerFiber(w));
+ DBGPRINTF ("%d: returned from longjmp_into_runtime from __cilkrts_c_THE_exception_check?!\n", w->self);
}
else
{
@@ -1803,44 +2435,47 @@ static void do_return_from_spawn(__cilkrts_worker *w,
__cilkrts_stack_frame *sf)
{
full_frame *parent_ff;
+ enum provably_good_steal_t steal_result = ABANDON_EXECUTION;
+
BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) {
CILK_ASSERT(ff);
CILK_ASSERT(!ff->is_call_child);
- CILK_ASSERT(ff == w->l->frame_ff);
CILK_ASSERT(sf == NULL);
parent_ff = ff->parent;
BEGIN_WITH_FRAME_LOCK(w, ff) {
- if( ff->stack_self )
- {
- // Notify TBB that we're returning from a spawn and orphaning
- // the stack. We'll re-adopt it if we continue
- __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_ORPHAN,
- ff->stack_self);
- }
decjoin(ff);
} END_WITH_FRAME_LOCK(w, ff);
BEGIN_WITH_FRAME_LOCK(w, parent_ff) {
- __cilkrts_stack* stack_to_free = w->l->stack_to_free;
- w->l->stack_to_free = NULL;
- w->l->frame_ff = NULL;
-
- if (stack_to_free) {
- __cilkrts_release_stack(w, stack_to_free);
- }
- ff->stack_self = NULL;
-
- if (parent_ff->simulated_stolen) {
+ if (parent_ff->simulated_stolen)
unconditional_steal(w, parent_ff);
- }
- else {
- provably_good_steal(w, parent_ff);
- }
+ else
+ steal_result = provably_good_steal(w, parent_ff);
} END_WITH_FRAME_LOCK(w, parent_ff);
} END_WITH_WORKER_LOCK_OPTIONAL(w);
+ // Loop here in replay mode
+#ifdef CILK_RECORD_REPLAY
+ // We don't have to explicitly check for REPLAY_LOG below because
+ // steal_result can only get set to WAIT_FOR_CONTINUE during replay.
+ // We also don't have to worry about the simulated_stolen flag
+ // because steal_result can only be set to WAIT_FOR_CONTINUE by
+ // provably_good_steal().
+ while(WAIT_FOR_CONTINUE == steal_result)
+ {
+ __cilkrts_sleep();
+ BEGIN_WITH_WORKER_LOCK_OPTIONAL(w)
+ {
+ BEGIN_WITH_FRAME_LOCK(w, parent_ff)
+ {
+ steal_result = provably_good_steal(w, parent_ff);
+ } END_WITH_FRAME_LOCK(w, parent_ff);
+ } END_WITH_WORKER_LOCK_OPTIONAL(w);
+ }
+#endif // CILK_RECORD_REPLAY
+
// Cleanup the child frame.
__cilkrts_destroy_full_frame(w, ff);
return;
@@ -1897,6 +2532,22 @@ __cilkrts_stack_frame *__cilkrts_pop_tail(__cilkrts_worker *w)
return sf;
}
+#ifdef CILK_RECORD_REPLAY
+__cilkrts_stack_frame *simulate_pop_tail(__cilkrts_worker *w)
+{
+ __cilkrts_stack_frame *sf;
+ BEGIN_WITH_WORKER_LOCK(w) {
+ if (w->head < w->tail) {
+ sf = *(w->tail-1);
+ } else {
+ sf = 0;
+ }
+ } END_WITH_WORKER_LOCK(w);
+ return sf;
+}
+#endif
+
+
/* Return from a call, not a spawn. */
void __cilkrts_return(__cilkrts_worker *w)
{
@@ -1977,7 +2628,6 @@ static void __cilkrts_unbind_thread()
STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
}
- __cilkrts_sysdep_unbind_thread(w);
__cilkrts_set_tls_worker(0);
if (w->self == -1) {
@@ -2023,10 +2673,39 @@ void __cilkrts_c_return_from_initial(__cilkrts_worker *w)
CILK_ASSERT(ff->join_counter == 1);
w->l->frame_ff = 0;
- CILK_ASSERT(ff->stack_self);
+ CILK_ASSERT(ff->fiber_self);
// Save any TBB interop data for the next time this thread enters Cilk
- tbb_interop_save_info_from_stack(ff->stack_self);
- sysdep_destroy_user_stack(ff->stack_self);
+ cilk_fiber_tbb_interop_save_info_from_stack(ff->fiber_self);
+
+ // Deallocate cilk_fiber that mapped to the user stack. The stack
+ // itself does not get deallocated (of course) but our data
+ // structure becomes divorced from it.
+
+#if FIBER_DEBUG >= 1
+ fprintf(stderr, "ThreadId=%p: w=%d: We are about to deallocate ff->fiber_self = %p here. w->l->scheduling_fiber = %p. w->l->type = %d\n",
+ cilkos_get_current_thread_id(),
+ w->self,
+ ff->fiber_self,
+ w->l->scheduling_fiber,
+ w->l->type);
+#endif
+ // The fiber in ff is a user-code fiber. The fiber in
+ // w->l->scheduling_fiber is a scheduling fiber. These fibers should
+ // never be equal. When a user worker returns (and will unbind), we
+ // should destroy only the fiber in ff. The scheduling fiber will be
+ // re-used.
+
+ CILK_ASSERT(ff->fiber_self != w->l->scheduling_fiber);
+
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) {
+ // This fiber might not be deallocated here if there
+ // is a pending exception on Windows that refers
+ // to this fiber.
+ //
+ // First "suspend" the fiber, and then try to delete it.
+ cilk_fiber_deallocate_from_thread(ff->fiber_self);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE);
+ ff->fiber_self = NULL;
/* Save reducer map into global_state object */
rm = w->reducer_map;
@@ -2052,9 +2731,25 @@ void __cilkrts_c_return_from_initial(__cilkrts_worker *w)
__cilkrts_destroy_reducer_map(w, rm);
}
+
+#if FIBER_DEBUG >= 1
+ __cilkrts_worker* tmp = w;
+ int tmp_id = w->self;
+ fprintf(stderr, "w=%d: We are about unbind thread (w= %p)\n",
+ w->self,
+ w);
+#endif
+
w = NULL;
+
__cilkrts_unbind_thread();
+#if FIBER_DEBUG >= 1
+
+ fprintf(stderr, "w=%p, %d: Finished unbind\n",
+ tmp, tmp_id);
+#endif
+
/* Other workers will stop trying to steal if this was the last worker. */
return;
@@ -2128,62 +2823,75 @@ __cilkrts_worker *make_worker(global_state_t *g,
w->l = (local_state *)__cilkrts_malloc(sizeof(*w->l));
- __cilkrts_init_stats(&w->l->stats);
-
__cilkrts_frame_malloc_per_worker_init(w);
+ w->reducer_map = NULL;
+ w->current_stack_frame = NULL;
+ w->reserved = NULL;
+
w->l->worker_magic_0 = WORKER_MAGIC_0;
+ w->l->team = NULL;
+ w->l->type = WORKER_FREE;
+
__cilkrts_mutex_init(&w->l->lock);
__cilkrts_mutex_init(&w->l->steal_lock);
w->l->do_not_steal = 0;
w->l->frame_ff = 0;
+ w->l->next_frame_ff = 0;
+ w->l->last_full_frame = NULL;
+
w->l->ltq = (__cilkrts_stack_frame **)
__cilkrts_malloc(g->ltqsize * sizeof(*w->l->ltq));
w->ltq_limit = w->l->ltq + g->ltqsize;
-
- w->l->original_pedigree_leaf = NULL;
+ w->head = w->tail = w->l->ltq;
+ cilk_fiber_pool_init(&w->l->fiber_pool,
+ &g->fiber_pool,
+ g->stack_size,
+ g->fiber_pool_size,
+ 0, // alloc_max is 0. We don't allocate from the heap directly without checking the parent pool.
+ 0);
+#if FIBER_DEBUG >= 2
+ fprintf(stderr, "ThreadId=%p: Making w=%d (%p), pool = %p\n",
+ cilkos_get_current_thread_id(),
+ w->self, w,
+ &w->l->fiber_pool);
+#endif
+ w->l->scheduling_fiber = NULL;
+ w->l->original_pedigree_leaf = NULL;
w->l->rand_seed = 0; /* the scheduler will overwrite this field */
- w->l->next_frame_ff = 0;
- __cilkrts_init_stack_cache(w, &w->l->stack_cache, g->stack_cache_size);
-
- w->head = w->tail = w->l->ltq;
-
- w->reducer_map = NULL;
-
- w->current_stack_frame = NULL;
-
- w->l->pending_exception = NULL;
- w->l->worker_magic_1 = WORKER_MAGIC_1;
w->l->post_suspend = 0;
w->l->suspended_stack = 0;
- w->l->stack_to_free = NULL;
+ w->l->fiber_to_free = NULL;
+ w->l->pending_exception = NULL;
+#if CILK_PROFILE
+ w->l->stats = __cilkrts_malloc(sizeof(statistics));
+ __cilkrts_init_stats(w->l->stats);
+#else
+ w->l->stats = NULL;
+#endif
w->l->steal_failure_count = 0;
- w->l->team = NULL;
- w->l->last_full_frame = NULL;
-
- w->l->scheduler_stack = NULL;
+ w->l->work_stolen = 0;
+ // Initialize record/replay assuming we're doing neither
+ w->l->record_replay_fptr = NULL;
+ w->l->replay_list_root = NULL;
+ w->l->replay_list_entry = NULL;
w->l->signal_node = NULL;
+ // Nothing's been stolen yet
+ w->l->worker_magic_1 = WORKER_MAGIC_1;
- w->reserved = NULL;
/*w->parallelism_disabled = 0;*/
// Allow stealing all frames. Sets w->saved_protected_tail
__cilkrts_restore_stealing(w, w->ltq_limit);
-
- w->l->type = WORKER_FREE;
- w->l->user_thread_imported = 0;
-
- // Nothing's been stolen yet
- w->l->work_stolen = 0;
-
+
__cilkrts_init_worker_sysdep(w);
- reset_THE_exception(w);
+ reset_THE_exception(w);
return w;
}
@@ -2192,13 +2900,39 @@ void destroy_worker(__cilkrts_worker *w)
{
CILK_ASSERT (NULL == w->l->pending_exception);
- /* Free any cached stack. */
- __cilkrts_destroy_stack_cache(w, w->g, &w->l->stack_cache);
+ // Deallocate the scheduling fiber
+ if (NULL != w->l->scheduling_fiber)
+ {
+ // The scheduling fiber is the main fiber for system workers and must
+ // be deallocated by the thread that created it. Thus, we can
+ // deallocate only free workers' (formerly user workers) scheduling
+ // fibers here.
+ CILK_ASSERT(WORKER_FREE == w->l->type);
+
+#if FIBER_DEBUG >=1
+ fprintf(stderr, "ThreadId=%p, w=%p, %d, deallocating scheduling fiber = %p, \n",
+ cilkos_get_current_thread_id(),
+ w,
+ w->self,
+ w->l->scheduling_fiber);
+#endif
+ int ref_count = cilk_fiber_remove_reference(w->l->scheduling_fiber, NULL);
+ // Scheduling fiber should never have extra references because of exceptions.
+ CILK_ASSERT(0 == ref_count);
+ w->l->scheduling_fiber = NULL;
+ }
- if (w->l->scheduler_stack) {
- sysdep_destroy_tiny_stack(w->l->scheduler_stack);
- w->l->scheduler_stack = NULL;
+#if CILK_PROFILE
+ if (w->l->stats) {
+ __cilkrts_free(w->l->stats);
}
+#else
+ CILK_ASSERT(NULL == w->l->stats);
+#endif
+
+ /* Free any cached fibers. */
+ cilk_fiber_pool_destroy(&w->l->fiber_pool);
+
__cilkrts_destroy_worker_sysdep(w);
if (w->l->signal_node) {
@@ -2210,6 +2944,7 @@ void destroy_worker(__cilkrts_worker *w)
__cilkrts_mutex_destroy(0, &w->l->lock);
__cilkrts_mutex_destroy(0, &w->l->steal_lock);
__cilkrts_frame_malloc_per_worker_cleanup(w);
+
__cilkrts_free(w->l);
// The caller is responsible for freeing the worker memory
@@ -2243,6 +2978,9 @@ void __cilkrts_deinit_internal(global_state_t *g)
w->l->frame_ff = 0;
}
+ // Release any resources used for record/replay
+ replay_term(g);
+
// Destroy any system dependent global state
__cilkrts_destroy_global_sysdep(g);
@@ -2253,8 +2991,10 @@ void __cilkrts_deinit_internal(global_state_t *g)
__cilkrts_free(g->workers[0]);
__cilkrts_free(g->workers);
- __cilkrts_destroy_stack_cache(0, g, &g->stack_cache);
+
+ cilk_fiber_pool_destroy(&g->fiber_pool);
__cilkrts_frame_malloc_global_cleanup(g);
+
cilkg_deinit_global_state();
}
@@ -2353,6 +3093,8 @@ static enum schedule_t worker_runnable(__cilkrts_worker *w)
return SCHEDULE_RUN;
}
+
+
// Initialize the worker structs, but don't start the workers themselves.
static void init_workers(global_state_t *g)
{
@@ -2364,8 +3106,15 @@ static void init_workers(global_state_t *g)
} *workers_memory;
/* not needed if only one worker */
- __cilkrts_init_stack_cache(0, &g->stack_cache,
- 2*total_workers * g->global_stack_cache_size);
+ cilk_fiber_pool_init(&g->fiber_pool,
+ NULL,
+ g->stack_size,
+ g->global_fiber_pool_size, // buffer_size
+ g->max_stacks, // maximum # to allocate
+ 1);
+
+ cilk_fiber_pool_set_fiber_limit(&g->fiber_pool,
+ (g->max_stacks ? g->max_stacks : INT_MAX));
g->workers = (__cilkrts_worker **)
__cilkrts_malloc(total_workers * sizeof(*g->workers));
@@ -2395,7 +3144,6 @@ static void init_workers(global_state_t *g)
void __cilkrts_init_internal(int start)
{
- int i;
global_state_t *g = NULL;
if (cilkg_is_published()) {
@@ -2416,7 +3164,7 @@ void __cilkrts_init_internal(int start)
g = cilkg_init_global_state();
// Set the scheduler pointer
- g->scheduler = &__cilkrts_scheduler;
+ g->scheduler = worker_scheduler_function;
// If we're running under a sequential P-Tool (Cilkscreen or
// Cilkview) then there's only one worker and we need to tell
@@ -2425,9 +3173,13 @@ void __cilkrts_init_internal(int start)
__cilkrts_establish_c_stack();
init_workers(g);
+ // Initialize per-work record/replay logging
+ replay_init_workers(g);
+
// Initialize any system dependent global state
__cilkrts_init_global_sysdep(g);
+
cilkg_publish_global_state(g);
}
@@ -2575,14 +3327,17 @@ void __cilkrts_init_internal(int start)
*****************************************************************/
-// Struct storing pointers to the fields in our "left" sibling
-// that we should update when splicing out a full frame or stalling at
-// a sync.
+/**
+ * @brief Locations to store the result of a reduction.
+ *
+ * Struct storing pointers to the fields in our "left" sibling that we
+ * should update when splicing out a full frame or stalling at a sync.
+ */
typedef struct {
- // A pointer to the location of our left reducer map.
+ /** A pointer to the location of our left reducer map. */
struct cilkred_map **map_ptr;
- // A pointer to the location of our left exception.
+ /** A pointer to the location of our left exception. */
struct pending_exception_info **exception_ptr;
} splice_left_ptrs;
@@ -2650,8 +3405,8 @@ splice_left_ptrs compute_left_ptrs_for_sync(__cilkrts_worker *w,
* 1. Perform the "reduction" on stacks, i.e., execute the left
* holder logic to pass the leftmost stack up.
*
- * w->l->stack_to_free holds any stack that needs to be freed
- * after control longjmps into the runtime.
+ * w->l->fiber_to_free holds any stack that needs to be freed
+ * when control switches into the runtime fiber.
*
* 2. Unlink and remove child_ff from the tree of full frames.
*
@@ -2664,29 +3419,26 @@ void finish_spawn_return_on_user_stack(__cilkrts_worker *w,
full_frame *parent_ff,
full_frame *child_ff)
{
- CILK_ASSERT(w->l->stack_to_free == NULL);
-
+ CILK_ASSERT(w->l->fiber_to_free == NULL);
+
// Execute left-holder logic for stacks.
- if (child_ff->left_sibling || parent_ff->stack_child) {
+ if (child_ff->left_sibling || parent_ff->fiber_child) {
// Case where we are not the leftmost stack.
- CILK_ASSERT(parent_ff->stack_child != child_ff->stack_self);
+ CILK_ASSERT(parent_ff->fiber_child != child_ff->fiber_self);
- // Remember any stack we need to free in the worker.
+ // Remember any fiber we need to free in the worker.
// After we jump into the runtime, we will actually do the
// free.
- w->l->stack_to_free = child_ff->stack_self;
+ w->l->fiber_to_free = child_ff->fiber_self;
}
else {
- // We are leftmost, pass stack up to parent.
- // Thus, no stack to free.
- parent_ff->stack_child = child_ff->stack_self;
- w->l->stack_to_free = NULL;
+ // We are leftmost, pass stack/fiber up to parent.
+ // Thus, no stack/fiber to free.
+ parent_ff->fiber_child = child_ff->fiber_self;
+ w->l->fiber_to_free = NULL;
}
- // We cannot NULL this out yet. Importing a user worker on Windows
- // depends on this field in the full_frame being valid in
- // __cilkrts_sysdep_import_user_thread()
-// child_ff->stack_self = NULL;
+ child_ff->fiber_self = NULL;
unlink_child(parent_ff, child_ff);
}
@@ -2727,7 +3479,6 @@ fast_path_reductions_for_spawn_return(__cilkrts_worker *w,
full_frame *ff)
{
// ASSERT: we hold ff->parent->lock.
- full_frame *parent_ff = ff->parent;
splice_left_ptrs left_ptrs;
CILK_ASSERT(NULL == w->l->pending_exception);
@@ -3137,6 +3888,7 @@ execute_reductions_for_sync(__cilkrts_worker *w,
// we start any reductions, since the reductions might push more
// data onto the stack.
CILK_ASSERT(sf_at_sync->flags | CILK_FRAME_STOLEN);
+
__cilkrts_put_stack(ff, sf_at_sync);
__cilkrts_make_unrunnable_sysdep(w, ff, sf_at_sync, 1,
"execute_reductions_for_sync");
@@ -3172,6 +3924,10 @@ execute_reductions_for_sync(__cilkrts_worker *w,
ff->call_stack = sf_at_sync;
sf_at_sync->flags |= CILK_FRAME_SUSPENDED;
+ // At a nontrivial sync, we should always free the current fiber,
+ // because it can not be leftmost.
+ w->l->fiber_to_free = ff->fiber_self;
+ ff->fiber_self = NULL;
return w;
}
diff --git a/libcilkrts/runtime/scheduler.h b/libcilkrts/runtime/scheduler.h
index b0cc2797621..d7c3b1340df 100644
--- a/libcilkrts/runtime/scheduler.h
+++ b/libcilkrts/runtime/scheduler.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -44,19 +49,34 @@
#include "reducer_impl.h"
#include "global_state.h"
+#ifdef CILK_RECORD_REPLAY
+#include "record-replay.h"
+#endif
+
__CILKRTS_BEGIN_EXTERN_C
-// Set to 0 to allow parallel reductions.
+
+/**
+ * @brief Flag to disable parallel reductions.
+ *
+ * Set to 0 to allow parallel reductions.
+ */
#define DISABLE_PARALLEL_REDUCERS 0
+
+/**
+ * @brief Debugging level for parallel reductions.
+ *
+ * Print debugging messages and assertions for parallel reducers. 0 is
+ * no debugging. A higher value generates more output.
+ */
#define REDPAR_DEBUG 0
/**
- * Lock the worker mutex to allow exclusive access to the values in the
- * __cilkrts_worker and local_state structures.
+ * @brief Lock the worker mutex to allow exclusive access to the
+ * values in the @c __cilkrts_worker and local_state structures.
*
- * Preconditions:
- * - local_state.don_not_steal must not be set. Essentially this asserts
- * that the worker is not locked recursively.
+ * @pre @c w->l->do_not_steal must not be set. Essentially this
+ * condition asserts that the worker is not locked recursively.
*
* @param w The worker to lock.
*/
@@ -64,11 +84,10 @@ COMMON_PORTABLE
void __cilkrts_worker_lock(__cilkrts_worker *w);
/**
- * Unlock the worker mutex.
+ * @brief Unlock the worker mutex.
*
- * Preconditions:
- * - local_state.don_not_steal must be set. Essentially this asserts
- * that the worker has been previously locked.
+ * @pre @c w->l->do_not_steal must be set. Essentially this condition
+ * asserts that the worker has been previously locked.
*
* @param w The worker to unlock.
*/
@@ -76,8 +95,8 @@ COMMON_PORTABLE
void __cilkrts_worker_unlock(__cilkrts_worker *w);
/**
- * Push the next full frame to be made active in this worker and increment
- * its join counter.
+ * @brief Push the next full frame to be made active in this worker
+ * and increment its join counter.
*
* __cilkrts_push_next_frame and pop_next_frame work on a one-element queue.
* This queue is used to communicate across the runtime from the code that
@@ -86,9 +105,9 @@ void __cilkrts_worker_unlock(__cilkrts_worker *w);
* counter but pop does not decrement it. Rather, a single push/pop
* combination makes a frame active and increments its join counter once.
*
- * Note that a system worker may chose to push work onto a user worker if
- * the work is the continuation from a sync which only the user worker may
- * complete.
+ * @note A system worker may chose to push work onto a user worker if
+ * the work is the continuation from a sync which only the user worker
+ * may complete.
*
* @param w The worker which the frame is to be pushed onto.
* @param ff The full_frame which is to be continued by the worker.
@@ -98,11 +117,14 @@ void __cilkrts_push_next_frame(__cilkrts_worker *w,
full_frame *ff);
/**
- * Sync on this worker. If this is the last worker to reach the sync,
- * execution may resume on this worker after the sync. If this is not
- * the last spawned child to reach the sync, then execution is suspended
- * and the worker will re-enter the scheduling loop, looking for work
- * it can steal.
+ * @brief Sync on this worker.
+ *
+ * If this worker is the last to reach the sync, execution may resume
+ * on this worker after the sync.
+ *
+ * If this worker is not the last spawned child to reach the sync,
+ * then execution is suspended and the worker will re-enter the
+ * scheduling loop, looking for work it can steal.
*
* This function will jump into the runtime to switch to the scheduling
* stack to implement most of its logic.
@@ -115,13 +137,14 @@ NORETURN __cilkrts_c_sync(__cilkrts_worker *w,
__cilkrts_stack_frame *sf);
/**
- * Worker W completely promotes its own deque, simulating the case
- * where the whole deque is stolen. We use this mechanism to force
- * the allocation of new storage for reducers for race-detection
- * purposes.
+ * @brief Worker @c w completely promotes its own deque, simulating the case
+ * where the whole deque is stolen.
*
- * This is called from the reducer lookup logic when g->force_reduce
- * is set.
+ * We use this mechanism to force the allocation of new storage for
+ * reducers for race-detection purposes.
+ *
+ * This method is called from the reducer lookup logic when
+ * @c g->force_reduce is set.
*
* @warning Use of "force_reduce" is known to have bugs when run with
* more than 1 worker.
@@ -133,10 +156,22 @@ COMMON_PORTABLE
void __cilkrts_promote_own_deque(__cilkrts_worker *w);
/**
- * Called when a function attempts to return from a spawn and the
- * parent has been stolen. While this function can return, it
- * will most likely jump into the runtime to switch onto the
- * scheduling stack to execute do_return_from_spawn().
+ * Called when a spawned function attempts to return and
+ * __cilkrts_undo_detach() fails. This can happen for two reasons:
+ *
+ * @li If another worker is considering stealing our parent, it bumps the
+ * exception pointer while it did so, which will cause __cilkrts_undo_detach()
+ * to fail. If the other worker didn't complete the steal of our parent, we
+ * still may be able to return to it, either because the steal attempt failed,
+ * or we won the race for the tail pointer.
+ *
+ * @li If the function's parent has been stolen then we cannot return. Instead
+ * we'll longjmp into the runtime to switch onto the scheduling stack to
+ * execute do_return_from_spawn() and determine what to do. Either this
+ * worker is the last one to the sync, in which case we need to jump to the
+ * sync, or this worker is not the last one to the sync, in which case we'll
+ * abandon this work and jump to the scheduling loop to search for more work
+ * we can steal.
*
* @param w The worker which attempting to return from a spawn to
* a stolen parent.
@@ -147,22 +182,27 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w,
__cilkrts_stack_frame *returning_sf);
/**
+ * @brief Return an exception to an stolen parent.
+ *
* Used by the gcc implementation of exceptions to return an exception
* to a stolen parent
*
* @param w The worker which attempting to return from a spawn with an
* exception to a stolen parent.
+ * @param returning_sf The stack frame which is returning.
*/
COMMON_PORTABLE
NORETURN __cilkrts_exception_from_spawn(__cilkrts_worker *w,
__cilkrts_stack_frame *returning_sf);
/**
- * Used by the Windows implementations of exceptions to migrate an exception
- * across fibers. Call this function when an exception has been thrown and
- * has to traverse across a steal. The exception has already been wrapped up,
- * so all that remains is to longjmp() into the continuation, sync, and
- * re-raise it.
+ * @brief Used by the Windows implementations of exceptions to migrate an exception
+ * across fibers.
+ *
+ * Call this function when an exception has been thrown and has to
+ * traverse across a steal. The exception has already been wrapped
+ * up, so all that remains is to longjmp() into the continuation,
+ * sync, and re-raise it.
*
* @param sf The __cilkrts_stack_frame for the frame that is attempting to
* return an exception to a stolen parent.
@@ -170,7 +210,8 @@ NORETURN __cilkrts_exception_from_spawn(__cilkrts_worker *w,
void __cilkrts_migrate_exception (__cilkrts_stack_frame *sf);
/**
- * Return from a call, not a spawn, where this frame has ever been stolen.
+ * @brief Return from a call, not a spawn, where this frame has ever
+ * been stolen.
*
* @param w The worker that is returning from a frame which was ever stolen.
*/
@@ -178,16 +219,18 @@ COMMON_PORTABLE
void __cilkrts_return(__cilkrts_worker *w);
/**
- * Special return from the initial frame. Will be called from
- * __cilkrts_leave_frame if CILK_FRAME_LAST is set.
+ * @brief Special return from the initial frame.
+ *
+ * This method will be called from @c __cilkrts_leave_frame if
+ * @c CILK_FRAME_LAST is set.
*
* This function will do the things necessary to cleanup, and unbind the
* thread from the Intel Cilk Plus runtime. If this is the last user
* worker unbinding from the runtime, all system worker threads will be
* suspended.
*
- * Preconditions:
- * - This must be a user worker.
+ * @pre @c w must be the currently executing worker, and must be a user
+ * worker.
*
* @param w The worker that's returning from the initial frame.
*/
@@ -195,7 +238,8 @@ COMMON_PORTABLE
void __cilkrts_c_return_from_initial(__cilkrts_worker *w);
/**
- * Used by exception handling code to pop an entry from the worker's deque.
+ * @brief Used by exception handling code to pop an entry from the
+ * worker's deque.
*
* @param w Worker to pop the entry from
*
@@ -206,7 +250,8 @@ COMMON_PORTABLE
__cilkrts_stack_frame *__cilkrts_pop_tail(__cilkrts_worker *w);
/**
- * Modifies the worker's protected_tail to prevent frames from being stolen.
+ * @brief Modifies the worker's protected_tail to prevent frames from
+ * being stolen.
*
* The Dekker protocol has been extended to only steal if head+1 is also
* less than protected_tail.
@@ -223,8 +268,8 @@ __cilkrts_stack_frame *volatile *__cilkrts_disallow_stealing(
__cilkrts_stack_frame *volatile *new_protected_tail);
/**
- * Restores the protected tail to a previous state, possibly allowing frames
- * to be stolen.
+ * @brief Restores the protected tail to a previous state, possibly
+ * allowing frames to be stolen.
*
* @param w The worker to be modified.
* @param saved_protected_tail A previous setting for protected_tail that is
@@ -236,8 +281,10 @@ void __cilkrts_restore_stealing(
__cilkrts_stack_frame *volatile *saved_protected_tail);
/**
- * Initialize a __cilkrts_worker. The memory for the worker must have been
- * allocated outside this call.
+ * @brief Initialize a @c __cilkrts_worker.
+ *
+ * @note The memory for the worker must have been allocated outside
+ * this call.
*
* @param g The global_state_t.
* @param self The index into the global_state's array of workers for this
@@ -253,8 +300,10 @@ __cilkrts_worker *make_worker(global_state_t *g,
__cilkrts_worker *w);
/**
- * Free up any resources allocated for a worker. The memory for the
- * __cilkrts_worker itself must be deallocated outside this call.
+ * @brief Free up any resources allocated for a worker.
+ *
+ * @note The memory for the @c __cilkrts_worker itself must be
+ * deallocated outside this call.
*
* @param w The worker to be destroyed.
*/
@@ -262,8 +311,10 @@ COMMON_PORTABLE
void destroy_worker (__cilkrts_worker *w);
/**
- * Initialize the runtime. If necessary, allocates and initializes the
- * global state. If necessary, unsuspends the system workers.
+ * @brief Initialize the runtime.
+ *
+ * If necessary, allocates and initializes the global state. If
+ * necessary, unsuspends the system workers.
*
* @param start Specifies whether the workers are to be unsuspended if
* they are suspended. Allows __cilkrts_init() to start up the runtime without
@@ -273,8 +324,9 @@ COMMON_PORTABLE
void __cilkrts_init_internal(int start);
/**
- * Part of the sequence to shutdown the runtime. Specifically frees the
- * global_state_t for the runtime.
+ * @brief Part of the sequence to shutdown the runtime.
+ *
+ * Specifically, this call frees the @c global_state_t for the runtime.
*
* @param g The global_state_t.
*/
@@ -289,34 +341,49 @@ cilkred_map *__cilkrts_xchg_reducer(
__cilkrts_worker *w, cilkred_map *newmap) cilk_nothrow;
/**
- * Called when a user thread is bound to the runtime. If this increments the
- * count of bound user threads from 0 to 1, the system worker threads are
- * unsuspended.
+ * @brief Called when a user thread is bound to the runtime.
*
- * @param g The runtime global state.
+ * If this action increments the count of bound user threads from 0 to
+ * 1, the system worker threads are unsuspended.
+ *
+ * If this action increments the count of bound user threads from 0 to
+ * 1, the system worker threads are unsuspended.
*
- * Preconditions:
- * - Global lock must be held.
+ * @pre Global lock must be held.
+ * @param g The runtime global state.
*/
COMMON_PORTABLE
void __cilkrts_enter_cilk(global_state_t *g);
/**
- * Called when a user thread is unbound from the runtime. If this decrements
- * the count of bound user threads to 0, the system worker threads are
- * suspended.
+ * @brief Called when a user thread is unbound from the runtime.
+ *
+ * If this action decrements the count of bound user threads to 0, the
+ * system worker threads are suspended.
*
- * @param g The runtime global state.
*
- * Preconditions:
- * - Global lock must be held.
+ * @pre Global lock must be held.
+ *
+ * @param g The runtime global state.
*/
COMMON_PORTABLE
void __cilkrts_leave_cilk(global_state_t *g);
/**
- * Prints out Cilk runtime statistics.
+ * @brief cilk_fiber_proc that runs the main scheduler loop on a
+ * user worker.
+ *
+ * @pre fiber's owner field should be set to the correct __cilkrts_worker
+ * @pre fiber must be a user worker.
+ *
+ * @param fiber The scheduling fiber object.
+ */
+void scheduler_fiber_proc_for_user_worker(cilk_fiber *fiber);
+
+
+/**
+ * @brief Prints out Cilk runtime statistics.
*
* @param g The runtime global state.
*
@@ -326,6 +393,23 @@ void __cilkrts_leave_cilk(global_state_t *g);
COMMON_PORTABLE
void __cilkrts_dump_stats_to_stderr(global_state_t *g);
+#ifdef CILK_RECORD_REPLAY
+COMMON_PORTABLE
+char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode);
+
+/**
+ * @brief Used by exception handling code to simulate the popping of
+ * an entry from the worker's deque.
+ *
+ * @param w Worker whose deque we want to check
+ *
+ * @return @c __cilkrts_stack_frame of parent call
+ * @return NULL if the deque is empty
+ */
+COMMON_PORTABLE
+__cilkrts_stack_frame *simulate_pop_tail(__cilkrts_worker *w);
+
+#endif
__CILKRTS_END_EXTERN_C
diff --git a/libcilkrts/runtime/signal_node.c b/libcilkrts/runtime/signal_node.c
index bcce9dbf254..4743bc03cb8 100644
--- a/libcilkrts/runtime/signal_node.c
+++ b/libcilkrts/runtime/signal_node.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************/
@@ -57,14 +62,14 @@
* cilk_semaphore_t is implemented as an auto-reset event on Windows, and
* as a semaphore_t on Linux and MacOS.
*/
-typedef struct signal_node_t
+struct signal_node_t
{
/** 0 if the worker should wait, 1 if it should be running. */
volatile unsigned int run;
/** OS-specific semaphore on which the worker can wait. */
cilk_semaphore_t sem;
-} signal_node_t;
+};
/******************************************************************************/
/* Semaphore-abstraction functions */
diff --git a/libcilkrts/runtime/signal_node.h b/libcilkrts/runtime/signal_node.h
index 6b05234b6e6..d11b3d85cf7 100644
--- a/libcilkrts/runtime/signal_node.h
+++ b/libcilkrts/runtime/signal_node.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -45,7 +50,7 @@
__CILKRTS_BEGIN_EXTERN_C
-/* Opaque type. */
+/** Opaque type. */
typedef struct signal_node_t signal_node_t;
/**
diff --git a/libcilkrts/runtime/spin_mutex.c b/libcilkrts/runtime/spin_mutex.c
new file mode 100644
index 00000000000..0a63ede7cba
--- /dev/null
+++ b/libcilkrts/runtime/spin_mutex.c
@@ -0,0 +1,104 @@
+/* spin_mutex.c -*-C-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+#include "spin_mutex.h"
+#include "bug.h"
+#include "os.h"
+#include "stats.h"
+
+// TBD (11/30/12): We should be doing a conditional test-xchg instead
+// of an unconditional xchg operation for the spin mutex.
+
+/* m->lock == 1 means that mutex M is locked */
+#define TRY_ACQUIRE(m) (__cilkrts_xchg(&(m)->lock, 1) == 0)
+
+/* ICC 11.1+ understands release semantics and generates an
+ ordinary store with a software memory barrier. */
+#if __ICC >= 1110
+#define RELEASE(m) __sync_lock_release(&(m)->lock)
+#else
+#define RELEASE(m) __cilkrts_xchg(&(m)->lock, 0)
+#endif
+
+
+spin_mutex* spin_mutex_create()
+{
+ spin_mutex* mutex = (spin_mutex*)__cilkrts_malloc(sizeof(spin_mutex));
+ spin_mutex_init(mutex);
+ return mutex;
+}
+
+void spin_mutex_init(struct spin_mutex *m)
+{
+ // Use a simple assignment so Inspector doesn't bug us about the
+ // interlocked exchange doing a read of an uninitialized variable.
+ // By definition there can't be a race when we're initializing the
+ // lock...
+ m->lock = 0;
+}
+
+void spin_mutex_lock(struct spin_mutex *m)
+{
+ int count;
+ const int maxspin = 1000; /* SWAG */
+ if (!TRY_ACQUIRE(m)) {
+ count = 0;
+ do {
+ do {
+ __cilkrts_short_pause();
+ if (++count >= maxspin) {
+ /* let the OS reschedule every once in a while */
+ __cilkrts_yield();
+ count = 0;
+ }
+ } while (m->lock != 0);
+ } while (!TRY_ACQUIRE(m));
+ }
+}
+
+int spin_mutex_trylock(struct spin_mutex *m)
+{
+ return TRY_ACQUIRE(m);
+}
+
+void spin_mutex_unlock(struct spin_mutex *m)
+{
+ RELEASE(m);
+}
+
+void spin_mutex_destroy(struct spin_mutex *m)
+{
+ __cilkrts_free(m);
+}
+
+/* End spin_mutex.c */
diff --git a/libcilkrts/runtime/spin_mutex.h b/libcilkrts/runtime/spin_mutex.h
new file mode 100644
index 00000000000..f5612b97c69
--- /dev/null
+++ b/libcilkrts/runtime/spin_mutex.h
@@ -0,0 +1,124 @@
+/* spin_mutex.h -*-C++-*-
+ *
+ *************************************************************************
+ *
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
+ **************************************************************************/
+
+/**
+ * @file spin_mutex.h
+ *
+ * @brief Support for Cilk runtime mutexes.
+ *
+ * Cilk runtime mutexes are implemented as simple spin loops.
+ *
+ * This file is similar to a worker_mutex, except it does not have an
+ * owner field.
+ *
+ * TBD: This class, worker_mutex, and os_mutex overlap quite a bit in
+ * functionality. Can we unify these mutexes somehow?
+ */
+#ifndef INCLUDED_SPIN_MUTEX_DOT_H
+#define INCLUDED_SPIN_MUTEX_DOT_H
+
+#include <cilk/common.h>
+#include "rts-common.h"
+#include "cilk_malloc.h"
+
+__CILKRTS_BEGIN_EXTERN_C
+
+/**
+ * Mutexes are treated as an abstract data type within the Cilk
+ * runtime system. They are implemented as simple spin loops.
+ */
+typedef struct spin_mutex {
+ /** Mutex spin loop variable. 0 if unowned, 1 if owned. */
+ volatile int lock;
+
+ /** Padding so the mutex takes up a cache line. */
+ char pad[64/sizeof(int) - 1];
+} spin_mutex;
+
+
+/**
+ * @brief Create a new Cilk spin_mutex.
+ *
+ * @return Returns an initialized spin mutex.
+ */
+COMMON_PORTABLE
+spin_mutex* spin_mutex_create();
+
+/**
+ * @brief Initialize a Cilk spin_mutex.
+ *
+ * @param m Spin_Mutex to be initialized.
+ */
+COMMON_PORTABLE
+void spin_mutex_init(spin_mutex *m);
+
+/**
+ * @brief Acquire a Cilk spin_mutex.
+ *
+ * If statistics are being gathered, the time spent
+ * acquiring the spin_mutex will be attributed to the specified worker.
+ *
+ * @param m Spin_Mutex to be initialized.
+ */
+COMMON_PORTABLE
+void spin_mutex_lock(struct spin_mutex *m);
+/**
+ * @brief Attempt to lock a Cilk spin_mutex and fail if it isn't available.
+ *
+ * @param m Spin_Mutex to be acquired.
+ *
+ * @return 1 if the spin_mutex was acquired.
+ * @return 0 if the spin_mutex was not acquired.
+ */
+COMMON_PORTABLE
+int spin_mutex_trylock(struct spin_mutex *m);
+
+/**
+ * @brief Release a Cilk spin_mutex.
+ *
+ * @param m Spin_Mutex to be released.
+ */
+COMMON_PORTABLE
+void spin_mutex_unlock(struct spin_mutex *m);
+
+/**
+ * @brief Deallocate a Cilk spin_mutex. Currently does nothing.
+ *
+ * @param m Spin_Mutex to be deallocated.
+ */
+COMMON_PORTABLE
+void spin_mutex_destroy(struct spin_mutex *m);
+
+__CILKRTS_END_EXTERN_C
+
+#endif // ! defined(INCLUDED_SPIN_MUTEX_DOT_H)
diff --git a/libcilkrts/runtime/stacks.c b/libcilkrts/runtime/stacks.c
deleted file mode 100644
index e8be685ac8f..00000000000
--- a/libcilkrts/runtime/stacks.c
+++ /dev/null
@@ -1,192 +0,0 @@
-/* stacks.c -*-C-*-
- *
- *************************************************************************
- *
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
- **************************************************************************/
-
-#include "stacks.h"
-#include "sysdep.h"
-#include "local_state.h"
-#include "frame_malloc.h"
-#include "cilk-tbb-interop.h"
-
-static void move_to_global(__cilkrts_worker *w, unsigned int until)
-{
- __cilkrts_stack_cache *local = &w->l->stack_cache;
- __cilkrts_stack_cache *global = &w->g->stack_cache;
-
- /* If the global cache appears to be full do not take out the lock. */
- if (global->n >= global->size)
- return;
-
- __cilkrts_mutex_lock(w, &global->lock);
- while (global->n < global->size && local->n > until) {
- global->stacks[global->n++] = local->stacks[--local->n];
- }
- __cilkrts_mutex_unlock(w, &global->lock);
-}
-
-static void push(__cilkrts_worker *w, __cilkrts_stack *sd)
-{
- __cilkrts_stack_cache *local = &w->l->stack_cache;
- const unsigned int local_size = local->size;
-
- /* If room in local, push sd to local stack-of-stacks */
- if (local->n < local_size) {
- local->stacks[local->n++] = sd;
- return;
- }
-
- if (local_size == 0) {
- __cilkrts_free_stack(w->g, sd);
- return;
- }
-
- /* No room in local stack-of-stacks.
- * Push half (round down) of the free stacks */
- move_to_global(w, local_size / 2);
-
- /* If some of the stacks didn't get moved (i.e., because the global
- * stack-of-stacks is full), then permanently destroy some stacks until we
- * are back down to half */
- while (local->n > local_size / 2)
- __cilkrts_free_stack(w->g, local->stacks[--local->n]);
-
- /* Push the stack onto our local stack-of-stacks */
- local->stacks[local->n++] = sd;
- return;
-}
-
-static __cilkrts_stack *pop(__cilkrts_worker *w)
-{
- __cilkrts_stack_cache *local = &w->l->stack_cache;
- __cilkrts_stack_cache *global = &w->g->stack_cache;
- __cilkrts_stack *sd = 0;
- if (local->n > 0)
- return local->stacks[--local->n];
- if (global->n > 0) {
- __cilkrts_mutex_lock(w, &global->lock);
- if (global->n > 0)
- sd = global->stacks[--global->n];
- __cilkrts_mutex_unlock(w, &global->lock);
- }
- return sd;
-}
-
-#ifdef _WIN32
-# include "stacks-win.h"
-# define okay_to_release(stack) (0 == (stack)->outstanding_references)
-#else
-# define okay_to_release(stack) (1)
-#endif // _WIN32
-
-void __cilkrts_release_stack(__cilkrts_worker *w,
- __cilkrts_stack *sd)
-{
- START_INTERVAL(w, INTERVAL_FREE_STACK);
- if (sd && okay_to_release(sd)) {
- __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_RELEASE,sd);
- push(w, sd);
- }
- STOP_INTERVAL(w, INTERVAL_FREE_STACK);
- return;
-}
-
-__cilkrts_stack *__cilkrts_get_stack(__cilkrts_worker *w)
-{
- __cilkrts_stack *sd;
-
- START_INTERVAL(w, INTERVAL_ALLOC_STACK);
- sd = pop (w);
- if (sd == NULL)
- sd = __cilkrts_make_stack(w);
- else
- __cilkrts_sysdep_reset_stack(sd);
- STOP_INTERVAL(w, INTERVAL_ALLOC_STACK);
- return sd;
-}
-
-static void flush(global_state_t *g,
- __cilkrts_stack_cache *c)
-{
- /*START_INTERVAL(w, INTERVAL_FREE_STACK);*/
- while (c->n > 0)
- __cilkrts_free_stack(g, c->stacks[--c->n]);
- /*STOP_INTERVAL(w, INTERVAL_FREE_STACK);*/
-}
-
-void __cilkrts_init_stack_cache(__cilkrts_worker *w,
- __cilkrts_stack_cache *c,
- unsigned int size)
-{
- __cilkrts_mutex_init(&c->lock);
- c->size = size;
- c->n = 0;
- c->stacks = __cilkrts_frame_malloc(w, size * sizeof(__cilkrts_stack *));
-#if 0 /* Causes problems on Linux due to generated call to intel_fast_memset */
- {
- unsigned int i;
- /* Not really needed -- only indices < n are valid */
- for (i = 0; i < size; i++)
- c->stacks[i] = 0;
- }
-#else
- if (size > 0)
- c->stacks[0] = 0;
-#endif
-}
-
-void __cilkrts_destroy_stack_cache(__cilkrts_worker *w,
- global_state_t *g,
- __cilkrts_stack_cache *c)
-{
- flush(g, c);
- __cilkrts_frame_free(w, c->stacks, c->size * sizeof(__cilkrts_stack *));
- c->stacks = 0;
- c->n = 0;
- c->size = 0;
- __cilkrts_mutex_destroy(w, &c->lock);
-}
-
-/* Free all but one local stack, returning to the global pool if possible. */
-
-void __cilkrts_trim_stack_cache(__cilkrts_worker *w)
-{
- __cilkrts_stack_cache *local = &w->l->stack_cache;
-
- if (local->n <= 1)
- return;
-
- START_INTERVAL(w, INTERVAL_FREE_STACK);
-
- move_to_global(w, 1);
-
- while (local->n > 1)
- __cilkrts_free_stack(w->g, local->stacks[--local->n]);
-
- STOP_INTERVAL(w, INTERVAL_FREE_STACK);
-}
-
-/* End stacks.c */
diff --git a/libcilkrts/runtime/stats.c b/libcilkrts/runtime/stats.c
index a8c597437f2..2659ed5b25f 100644
--- a/libcilkrts/runtime/stats.c
+++ b/libcilkrts/runtime/stats.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "stats.h"
@@ -40,7 +45,8 @@
static const char *names[] = {
/*[INTERVAL_IN_SCHEDULER]*/ "in scheduler",
/*[INTERVAL_WORKING]*/ " of which: working",
- /*[INTERVAL_STEALING]*/ " of which: stealing",
+ /*[INTERVAL_IN_RUNTIME]*/ " of which: in runtime",
+ /*[INTERVAL_STEALING]*/ " of which: stealing",
/*[INTERVAL_STEAL_SUCCESS]*/ "steal success: detach",
/*[INTERVAL_STEAL_FAIL_EMPTYQ]*/ "steal fail: empty queue",
/*[INTERVAL_STEAL_FAIL_LOCK]*/ "steal fail: victim locked",
@@ -64,15 +70,18 @@ static const char *names[] = {
/*[INTERVAL_MUTEX_LOCK_SPINNING]*/ " spinning",
/*[INTERVAL_MUTEX_LOCK_YIELDING]*/ " yielding",
/*[INTERVAL_MUTEX_TRYLOCK]*/ "mutex trylock",
- /*[INTERVAL_ALLOC_STACK]*/ "alloc stack",
- /*[INTERVAL_FREE_STACK]*/ "free stack",
+ /*[INTERVAL_FIBER_ALLOCATE]*/ "fiber_allocate",
+ /*[INTERVAL_FIBER_DEALLOCATE]*/ "fiber_deallocate",
+ /*[INTERVAL_FIBER_ALLOCATE_FROM_THREAD]*/ "fiber_allocate_from_thread",
+ /*[INTERVAL_FIBER_DEALLOCATE_FROM_THREAD]*/ "fiber_deallocate (thread)",
+ /*[INTERVAL_SUSPEND_RESUME_OTHER]*/ "fiber suspend self + resume",
+ /*[INTERVAL_DEALLOCATE_RESUME_OTHER]*/ "fiber deallocate self + resume",
};
#endif
void __cilkrts_init_stats(statistics *s)
{
int i;
-
for (i = 0; i < INTERVAL_N; ++i) {
s->start[i] = INVALID_START;
s->accum[i] = 0;
@@ -87,7 +96,7 @@ void __cilkrts_accum_stats(statistics *to, statistics *from)
{
int i;
- for (i = 0; i < INTERVAL_N; ++i) {
+ for (i = 0; i < INTERVAL_N; ++i) {
to->accum[i] += from->accum[i];
to->count[i] += from->count[i];
from->accum[i] = 0;
@@ -102,7 +111,7 @@ void __cilkrts_accum_stats(statistics *to, statistics *from)
void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i)
{
if (w) {
- statistics *s = &w->l->stats;
+ statistics *s = w->l->stats;
CILK_ASSERT(s->start[i] == INVALID_START);
s->count[i]++;
}
@@ -111,7 +120,7 @@ void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i)
void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i)
{
if (w) {
- statistics *s = &w->l->stats;
+ statistics *s = w->l->stats;
CILK_ASSERT(s->start[i] == INVALID_START);
s->start[i] = __cilkrts_getticks();
s->count[i]++;
@@ -121,7 +130,7 @@ void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i)
void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i)
{
if (w) {
- statistics *s = &w->l->stats;
+ statistics *s = w->l->stats;
CILK_ASSERT(s->start[i] != INVALID_START);
s->accum[i] += __cilkrts_getticks() - s->start[i];
s->start[i] = INVALID_START;
diff --git a/libcilkrts/runtime/stats.h b/libcilkrts/runtime/stats.h
index 98130d2463f..1fa0346b602 100644
--- a/libcilkrts/runtime/stats.h
+++ b/libcilkrts/runtime/stats.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -39,6 +44,9 @@
#define INCLUDED_STATS_DOT_H
/* #define CILK_PROFILE 1 */
+// @note The CILK_PROFILE flag and intervals is known to be broken
+// in at least programs with Windows exceptions.
+// Enable this flag at your own peril. :)
#include <cilk/common.h>
#include "rts-common.h"
@@ -50,11 +58,12 @@
__CILKRTS_BEGIN_EXTERN_C
-/** Events that we measure. */
+/** @brief Events that we measure. */
enum interval
{
- INTERVAL_IN_SCHEDULER, ///< Time spent in the scheduler
+ INTERVAL_IN_SCHEDULER, ///< Time threads spend "bound" to Cilk
INTERVAL_WORKING, ///< Time spent working
+ INTERVAL_IN_RUNTIME, ///< Time spent executing runtime scheduling loop
INTERVAL_STEALING, ///< Time spent stealing work
INTERVAL_STEAL_SUCCESS, ///< Time to do a successful steal
INTERVAL_STEAL_FAIL_EMPTYQ, ///< Count of steal failures due to lack of stealable work
@@ -79,16 +88,21 @@ enum interval
INTERVAL_MUTEX_LOCK_SPINNING, ///< Time spent spinning in __cilkrts_mutex_lock for a worker
INTERVAL_MUTEX_LOCK_YIELDING, ///< Time spent yielding in __cilkrts_mutex_lock for a worker
INTERVAL_MUTEX_TRYLOCK, ///< Count of calls to __cilkrts_mutex_trylock
- INTERVAL_ALLOC_STACK, ///< Time spent allocating stacks
- INTERVAL_FREE_STACK, ///< Time spent freeing stacks
-
+ INTERVAL_FIBER_ALLOCATE, ///< Time spent calling cilk_fiber_allocate
+ INTERVAL_FIBER_DEALLOCATE, ///< Time spent calling cilk_fiber_deallocate (not from thread)
+ INTERVAL_FIBER_ALLOCATE_FROM_THREAD, ///< Time spent calling cilk_fiber_allocate_from_thread
+ INTERVAL_FIBER_DEALLOCATE_FROM_THREAD, ///< Time spent calling cilk_fiber_deallocate (from thread)
+ INTERVAL_SUSPEND_RESUME_OTHER, ///< Count of fiber suspend_self_and_resume_other
+ INTERVAL_DEALLOCATE_RESUME_OTHER, ///< Count of fiber deallocate_self_and_resume_other
INTERVAL_N ///< Number of intervals, must be last
};
/**
- * Struct that collects of all runtime statistics. There is an instance of this
- * structure in each worker's local_state, as well as one in the global_state_t
- * which will be used to accumulate the per-worker stats.
+ * @brief Struct that collects of all runtime statistics.
+ *
+ * There is an instance of this structure in each worker's
+ * local_state, as well as one in the @c global_state_t which will be
+ * used to accumulate the per-worker stats.
*/
typedef struct statistics
{
@@ -115,26 +129,25 @@ typedef struct statistics
/**
* Initializes a statistics structure
*
- * @param to The statistics structure to initialize
+ * @param s The statistics structure to be initialized.
*/
COMMON_PORTABLE void __cilkrts_init_stats(statistics *s);
/**
- * Sums statistics from worker to the global struct
+ * @brief Sums statistics from worker to the global struct
*
- * @param to The statistics structure that will accumulate the information.
- * This is g->stats.
- * @param to The statistics structure that will be accumulated. This is the
- * statistics kept per-worker.
+ * @param to The statistics structure that will accumulate the information.
+ * This structure is usually @c g->stats.
+ * @param from The statistics structure that will be accumulated.
+ * This structure is usually statistics kept per worker.
*/
COMMON_PORTABLE
void __cilkrts_accum_stats(statistics *to, statistics *from);
/**
- * Mark the start of an interval by saving the current tick count.
+ * @brief Mark the start of an interval by saving the current tick count.
*
- * Precondition:
- * - Start time == INVALID_START
+ * @pre Start time == INVALID_START
*
* @param w The worker we're accumulating stats for.
* @param i The interval we're accumulating stats for.
@@ -143,11 +156,10 @@ COMMON_PORTABLE
void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i);
/**
- * Mark the end of an interval by adding the ticks since the start to the
- * accumulated time.
+ * @brief Mark the end of an interval by adding the ticks since the
+ * start to the accumulated time.
*
- * Precondition:
- * - Start time != INVALID_START
+ * @pre Start time != INVALID_START
*
* @param w The worker we're accumulating stats for.
* @param i The interval we're accumulating stats for.
@@ -156,7 +168,7 @@ COMMON_PORTABLE
void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i);
/**
- * Start and stop interval I, charging zero time against it
+ * @brief Start and stop interval I, charging zero time against it
*
* Precondition:
* - Start time == INVALID_START
@@ -167,15 +179,6 @@ void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i);
COMMON_PORTABLE
void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i);
-
-/**
- * Initialize an instance of the statistics structure
- *
- * @param s The statistics structure to be initialized.
- */
-COMMON_PORTABLE
-void __cilkrts_init_stats(statistics *s);
-
#ifdef CILK_PROFILE
COMMON_PORTABLE
void dump_stats_to_file(FILE *stat_file, statistics *s);
diff --git a/libcilkrts/runtime/symbol_test.c b/libcilkrts/runtime/symbol_test.c
index c5c8eb49a79..644bff62aa6 100644
--- a/libcilkrts/runtime/symbol_test.c
+++ b/libcilkrts/runtime/symbol_test.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/* simple program to verify that there are no undefined symbols in the runtime.
diff --git a/libcilkrts/runtime/sysdep-unix.c b/libcilkrts/runtime/sysdep-unix.c
index 9b827502be5..b3a895a712a 100644
--- a/libcilkrts/runtime/sysdep-unix.c
+++ b/libcilkrts/runtime/sysdep-unix.c
@@ -3,28 +3,33 @@
*
*************************************************************************
*
- * Copyright (C) 2010-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2010-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
*
**************************************************************************
*/
@@ -48,6 +53,15 @@
#include "metacall_impl.h"
+// On x86 processors (but not MIC processors), the compiler generated code to
+// save the FP state (rounding mode and the like) before calling setjmp. We
+// will need to restore that state when we resume.
+#ifndef __MIC__
+# if defined(__i386__) || defined(__x86_64)
+# define RESTORE_X86_FP_STATE
+# endif // defined(__i386__) || defined(__x86_64)
+#endif // __MIC__
+
// contains notification macros for VTune.
#include "cilk-ittnotify.h"
@@ -61,28 +75,36 @@
#include <string.h>
#include <pthread.h>
#include <unistd.h>
+#include <alloca.h>
#ifdef __APPLE__
//# include <scheduler.h> // Angle brackets include Apple's scheduler.h, not ours.
#endif
+
#ifdef __linux__
# include <sys/resource.h>
# include <sys/sysinfo.h>
#endif
+
#ifdef __FreeBSD__
# include <sys/resource.h>
// BSD does not define MAP_ANONYMOUS, but *does* define MAP_ANON. Aren't standards great!
# define MAP_ANONYMOUS MAP_ANON
#endif
-
-static void internal_enforce_global_visibility();
+#ifdef __VXWORKS__
+# include <vxWorks.h>
+# include <vxCpuLib.h>
+#endif
struct global_sysdep_state
{
- pthread_t *threads;
- size_t pthread_t_size; /* for cilk_db */
-};
+ pthread_t *threads; ///< Array of pthreads for system workers
+ size_t pthread_t_size; ///< for cilk_db
+};
+
+static void internal_enforce_global_visibility();
+
COMMON_SYSDEP
void __cilkrts_init_worker_sysdep(struct __cilkrts_worker *w)
@@ -136,15 +158,15 @@ static void internal_run_scheduler_with_exceptions(__cilkrts_worker *w)
__cilkrts_run_scheduler_with_exceptions(w);
}
+
+
/*
- * __cilkrts_worker_stub
+ * scheduler_thread_proc_for_system_worker
*
* Thread start function called when we start a new worker.
*
- * This function is exported so Piersol's stack trace displays
- * reasonable information
*/
-NON_COMMON void* __cilkrts_worker_stub(void *arg)
+NON_COMMON void* scheduler_thread_proc_for_system_worker(void *arg)
{
/*int status;*/
__cilkrts_worker *w = (__cilkrts_worker *)arg;
@@ -162,13 +184,72 @@ NON_COMMON void* __cilkrts_worker_stub(void *arg)
CILK_ASSERT(w->l->type == WORKER_SYSTEM);
/*status = pthread_mutex_unlock(&__cilkrts_global_mutex);
CILK_ASSERT(status == 0);*/
-
+
__cilkrts_set_tls_worker(w);
+
+ // Create a cilk fiber for this worker on this thread.
+ START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE_FROM_THREAD) {
+ w->l->scheduling_fiber = cilk_fiber_allocate_from_thread();
+ cilk_fiber_set_owner(w->l->scheduling_fiber, w);
+ } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE_FROM_THREAD);
+
internal_run_scheduler_with_exceptions(w);
+ START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD) {
+ // Deallocate the scheduling fiber. This operation reverses the
+ // effect cilk_fiber_allocate_from_thread() and must be done in this
+ // thread before it exits.
+ int ref_count = cilk_fiber_deallocate_from_thread(w->l->scheduling_fiber);
+ // Scheduling fibers should never have extra references to them.
+ // We only get extra references into fibers because of Windows
+ // exceptions.
+ CILK_ASSERT(0 == ref_count);
+ w->l->scheduling_fiber = NULL;
+ } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD);
+
return 0;
}
+
+/*
+ * __cilkrts_user_worker_scheduling_stub
+ *
+ * Routine for the scheduling fiber created for an imported user
+ * worker thread. This method is analogous to
+ * scheduler_thread_proc_for_system_worker.
+ *
+ */
+void __cilkrts_user_worker_scheduling_stub(cilk_fiber* fiber, void* null_arg)
+{
+ __cilkrts_worker *w = __cilkrts_get_tls_worker();
+
+ // Sanity check.
+ CILK_ASSERT(WORKER_USER == w->l->type);
+
+ // Enter the scheduling loop on the user worker.
+ // This function will never return.
+ __cilkrts_run_scheduler_with_exceptions(w);
+
+ // A WORKER_USER, at some point, will resume on the original stack and leave
+ // Cilk. Under no circumstances do we ever exit off of the bottom of this
+ // stack.
+ CILK_ASSERT(0);
+}
+
+/**
+ * We are exporting a function with this name to Inspector?
+ * What a confusing name...
+ *
+ * This function is exported so Piersol's stack trace displays
+ * reasonable information.
+ */
+void* __cilkrts_worker_stub(void* arg)
+{
+ return scheduler_thread_proc_for_system_worker(arg);
+}
+
+
+
// /* Return the lesser of the argument and the operating system
// limit on the number of workers (threads) that may or ought
// to be created. */
@@ -199,12 +280,13 @@ static void write_version_file (global_state_t *, int);
*/
static void create_threads(global_state_t *g, int base, int top)
{
- int i;
-
- for (i = base; i < top; i++) {
- int status;
-
- status = pthread_create(&g->sysdep->threads[i], NULL, __cilkrts_worker_stub, g->workers[i]);
+ // TBD(11/30/12): We want to insert code providing the option of
+ // pinning system workers to cores.
+ for (int i = base; i < top; i++) {
+ int status = pthread_create(&g->sysdep->threads[i],
+ NULL,
+ scheduler_thread_proc_for_system_worker,
+ g->workers[i]);
if (status != 0)
__cilkrts_bug("Cilk runtime error: thread creation (%d) failed: %d\n", i, status);
}
@@ -224,7 +306,7 @@ static void * create_threads_and_work (void * arg)
threads_created = 1;
// Ideally this turns into a tail call that wipes out this stack frame.
- return __cilkrts_worker_stub (arg);
+ return scheduler_thread_proc_for_system_worker(arg);
}
#endif
void __cilkrts_start_workers(global_state_t *g, int n)
@@ -304,261 +386,47 @@ void __cilkrts_stop_workers(global_state_t *g)
return;
}
+#ifdef RESTORE_X86_FP_STATE
+
/*
* Restore the floating point state that is stored in a stack frame at each
* spawn. This should be called each time a frame is resumed.
+ *
+ * Only valid for IA32 and Intel64 processors.
*/
-static inline void restore_fp_state (__cilkrts_stack_frame *sf) {
-#if defined __i386__ || defined __x86_64
+static inline void restore_x86_fp_state (__cilkrts_stack_frame *sf) {
__asm__ ( "ldmxcsr %0\n\t"
"fnclex\n\t"
"fldcw %1"
:
: "m" (sf->mxcsr), "m" (sf->fpcsr));
-#else
-# warning "unimplemented: code to restore the floating point state"
-#endif
}
+#endif // RESTORE_X86_FP_STATE
-/* Resume user code after a spawn or sync, possibly on a different stack.
-
- Note: Traditional BSD longjmp would fail with a "longjmp botch"
- error rather than change the stack pointer in the wrong direction.
- Linux appears to let the program take the chance.
-
- This function is called to resume after a sync or steal. In both cases
- ff->sync_sp starts out containing the original stack pointer of the loot.
- In the case of a steal, the stack pointer stored in sf points to the
- thief's new stack. In the case of a sync, the stack pointer stored in sf
- points into original stack (i.e., it is either the same as ff->sync_sp or a
- small offset from it caused by pushes and pops between the spawn and the
- sync). */
-NORETURN __cilkrts_resume(__cilkrts_worker *w, full_frame *ff,
- __cilkrts_stack_frame *sf)
-{
- // Assert: w is the only worker that knows about ff right now, no
- // lock is needed on ff.
-
- const int flags = sf->flags;
- void *sp;
-
- w->current_stack_frame = sf;
- sf->worker = w;
- CILK_ASSERT(flags & CILK_FRAME_SUSPENDED);
- CILK_ASSERT(!sf->call_parent);
- CILK_ASSERT(w->head == w->tail);
-
- if (ff->simulated_stolen)
- /* We can't prevent __cilkrts_make_unrunnable_sysdep from discarding
- * the stack pointer because there is no way to tell it that we are
- * doing a simulated steal. Thus, we must recover the stack pointer
- * here. */
- SP(sf) = ff->sync_sp;
-
- sp = SP(sf);
-
- /* Debugging: make sure stack is accessible. */
- ((volatile char *)sp)[-1];
-
- __cilkrts_take_stack(ff, sp);
-
- /* The leftmost frame has no allocated stack */
- if (ff->simulated_stolen)
- CILK_ASSERT(flags & CILK_FRAME_UNSYNCHED && ff->sync_sp == NULL);
- else if (flags & CILK_FRAME_UNSYNCHED)
- /* XXX By coincidence sync_sp could be null. */
- CILK_ASSERT(ff->stack_self != NULL && ff->sync_sp != NULL);
- else
- /* XXX This frame could be resumed unsynched on the leftmost stack */
- CILK_ASSERT((ff->sync_master == 0 || ff->sync_master == w) &&
- ff->sync_sp == 0);
- /*if (w->l->type == WORKER_USER)
- CILK_ASSERT(ff->stack_self == NULL);*/
-
- // Notify the Intel tools that we're stealing code
- ITT_SYNC_ACQUIRED(sf->worker);
-#ifdef ENABLE_NOTIFY_ZC_INTRINSIC
- __notify_zc_intrinsic("cilk_continue", sf);
-#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC
-
- if (ff->stack_self) {
- // Notify TBB that we are resuming.
- __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_ADOPT, ff->stack_self);
- }
-
- sf->flags &= ~CILK_FRAME_SUSPENDED;
-
-#ifndef __MIC__
- if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) {
- // Restore the floating point state that was set in this frame at the
- // last spawn.
- //
- // This feature is only available in ABI 1 or later frames.
- restore_fp_state(sf);
- }
-#endif
-
- CILK_LONGJMP(sf->ctx);
- /*NOTREACHED*/
- /* Intel's C compiler respects the preceding lint pragma */
-}
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <string.h>
-#include <sys/mman.h>
-#include <errno.h>
-
-struct __cilkrts_stack
-{
- /* If /size/ and /top/ are zero this is the system stack for thread /owner/.
- If /top/ and /size/ are both nonzero this is an allocated stack and
- /owner/ is undefined. */
- char *top;
- size_t size;
- pthread_t owner;
-
- /* Cilk/TBB interop callback routine/data. */
- __cilk_tbb_pfn_stack_op stack_op_routine;
- void *stack_op_data;
-};
-
-void __cilkrts_set_stack_op(__cilkrts_stack *sd,
- __cilk_tbb_stack_op_thunk o)
-{
- sd->stack_op_routine = o.routine;
- sd->stack_op_data = o.data;
-}
-
-void __cilkrts_invoke_stack_op(__cilkrts_worker *w,
- enum __cilk_tbb_stack_op op,
- __cilkrts_stack *sd)
-{
- // If we don't have a stack we can't do much, can we?
- if (NULL == sd)
- return;
-
- if (0 == sd->stack_op_routine)
- {
- return;
- }
-
- (*sd->stack_op_routine)(op,sd->stack_op_data);
- if (op == CILK_TBB_STACK_RELEASE)
- {
- sd->stack_op_routine = 0;
- sd->stack_op_data = 0;
- }
-}
-
-/*
- * tbb_interop_save_stack_op_info
- *
- * Save TBB interop information for an unbound thread. It will get picked
- * up when the thread is bound to the runtime.
- */
-void tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o)
-{
- __cilk_tbb_stack_op_thunk *saved_thunk =
- __cilkrts_get_tls_tbb_interop();
-
- // If there is not already space allocated, allocate some.
- if (NULL == saved_thunk) {
- saved_thunk = (__cilk_tbb_stack_op_thunk*)
- __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk));
- __cilkrts_set_tls_tbb_interop(saved_thunk);
- }
-
- *saved_thunk = o;
-}
/*
- * tbb_interop_save_info_from_stack
+ * @brief Returns the stack address for resuming execution of sf.
*
- * Save TBB interop information from the __cilkrts_stack. It will get picked
- * up when the thread is bound to the runtime next time.
- */
-void tbb_interop_save_info_from_stack(__cilkrts_stack *sd)
-{
- __cilk_tbb_stack_op_thunk *saved_thunk;
-
- // If there is no TBB interop data, just return
- if (NULL == sd || NULL == sd->stack_op_routine) return;
-
- saved_thunk = __cilkrts_get_tls_tbb_interop();
-
- // If there is not already space allocated, allocate some.
- if (NULL == saved_thunk) {
- saved_thunk = (__cilk_tbb_stack_op_thunk*)
- __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk));
- __cilkrts_set_tls_tbb_interop(saved_thunk);
- }
-
- saved_thunk->routine = sd->stack_op_routine;
- saved_thunk->data = sd->stack_op_data;
-}
-
-/*
- * tbb_interop_use_saved_stack_op_info
+ * This method takes in the top of the stack to use, and then returns
+ * a properly aligned address for resuming execution of sf.
*
- * If there's TBB interop information that was saved before the thread was
- * bound, apply it now
- */
-void tbb_interop_use_saved_stack_op_info(__cilkrts_worker *w,
- __cilkrts_stack *sd)
-{
- struct __cilk_tbb_stack_op_thunk *saved_thunk =
- __cilkrts_get_tls_tbb_interop();
-
- // If we haven't allocated a TBB interop index, we don't have any saved info
- if (NULL == saved_thunk) return;
-
- // Associate the saved info with the __cilkrts_stack
- __cilkrts_set_stack_op(sd, *saved_thunk);
-
- // Free the saved data. We'll save it again if needed when the code
- // returns from the initial function
- tbb_interop_free_stack_op_info();
-}
-
-/*
- * tbb_interop_free_stack_op_info
+ * @param sf - The stack frame we want to resume executing.
+ * @param stack_base - The top of the stack we want to execute sf on.
*
- * Free saved TBB interop memory. Should only be called when the thread is
- * not bound.
*/
-void tbb_interop_free_stack_op_info(void)
-{
- struct __cilk_tbb_stack_op_thunk *saved_thunk =
- __cilkrts_get_tls_tbb_interop();
-
- // If we haven't allocated a TBB interop index, we don't have any saved info
- if (NULL == saved_thunk) return;
-
- // Free the memory and wipe out the TLS value
- __cilkrts_free(saved_thunk);
- __cilkrts_set_tls_tbb_interop(NULL);
-}
-
-void __cilkrts_bind_stack(full_frame *ff, char *new_sp,
- __cilkrts_stack *parent_stack,
- __cilkrts_worker *owner)
-{
- __cilkrts_stack_frame *sf = ff->call_stack;
- __cilkrts_stack *sd = ff->stack_self;
- CILK_ASSERT(sizeof SP(sf) <= sizeof (size_t));
-
- SP(sf) = new_sp;
-
- // Need to do something with parent_stack and owner?
- return;
-}
-
-char *__cilkrts_stack_to_pointer(__cilkrts_stack *s, __cilkrts_stack_frame *sf)
-{
- if (!s)
- return NULL;
-
+static char* get_sp_for_executing_sf(char* stack_base,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+// The original calculation that had been done to correct the stack
+// pointer when resuming execution.
+//
+// But this code was never getting called in the eng branch anyway...
+//
+// TBD(11/30/12): This logic needs to be revisited to make sure that
+// we are doing the proper calculation in reserving space for outgoing
+// arguments on all platforms and architectures.
+#if 0
/* Preserve outgoing argument space and stack alignment on steal.
Outgoing argument space is bounded by the difference between
stack and frame pointers. Some user code is known to rely on
@@ -569,191 +437,139 @@ char *__cilkrts_stack_to_pointer(__cilkrts_stack *s, __cilkrts_stack_frame *sf)
char *fp = FP(sf), *sp = SP(sf);
int fp_align = (int)(size_t)fp & SMASK;
ptrdiff_t space = fp - sp;
- char *top_aligned = (char *)((((size_t)s->top - SMASK) & ~(size_t)SMASK) | fp_align);
+
+ fprintf(stderr, "Here: fp = %p, sp = %p\n", fp, sp);
+ char *top_aligned = (char *)((((size_t)stack_base - SMASK) & ~(size_t)SMASK) | fp_align);
/* Don't allocate an unreasonable amount of stack space. */
+
+ fprintf(stderr, "Here: stack_base = %p, top_aligned=%p, space=%ld\n",
+ stack_base, top_aligned, space);
if (space < 32)
space = 32 + (space & SMASK);
else if (space > 40 * 1024)
space = 40 * 1024 + (space & SMASK);
+
return top_aligned - space;
}
- return s->top - 256;
-}
-
-#define PAGE 4096
+#endif
-/*
- * Return a pointer to the top of a "tiny" stack that is 64 KB (plus a buffer
- * page on each end).
- *
- * No reasonable program should need more than 64 KB, so if we hit a buffer,
- * we're doing it wrong.
- */
-void *sysdep_make_tiny_stack (__cilkrts_worker *w)
-{
- char *p;
- __cilkrts_stack *s;
-
-#ifndef MAP_ANONYMOUS
-#define MAP_ANONYMOUS MAP_ANON
-#endif
-
- p = mmap(0, PAGE * 18, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
- -1, 0);
- if (MAP_FAILED == p) {
- // For whatever reason (probably ran out of memory), mmap() failed.
- // There is no stack to return, so the program loses parallelism.
- if (0 == __cilkrts_xchg(&w->g->failure_to_allocate_stack, 1)) {
- cilkos_warning("Failed to allocate memory for a new stack.\n"
- "Continuing with some loss of parallelism.\n");
+#define PERFORM_FRAME_SIZE_CALCULATION 0
+
+ char* new_stack_base = stack_base - 256;
+
+#if PERFORM_FRAME_SIZE_CALCULATION
+ // If there is a frame size saved, then use that as the
+ // correction instead of 256.
+ if (ff->frame_size > 0) {
+ if (ff->frame_size < 40*1024) {
+ new_stack_base = stack_base - ff->frame_size;
+ }
+ else {
+ // If for some reason, our frame size calculation is giving us
+ // a number which is bigger than about 10 pages, then
+ // there is likely something wrong here? Don't allocate
+ // an unreasonable amount of space.
+ new_stack_base = stack_base - 40*1024;
}
- return NULL;
}
- mprotect(p + (17 * PAGE), PAGE, PROT_NONE);
- mprotect(p, PAGE, PROT_NONE);
-
- return (void*)(p + (17 * PAGE));
-}
-
-/*
- * Free a "tiny" stack (created with sysdep_make_tiny_stack()).
- */
-void sysdep_destroy_tiny_stack (void *p)
-{
- char *s = (char*)p;
- s = s - (17 * PAGE);
- munmap((void*)s, 18 * PAGE);
-}
-
-__cilkrts_stack *__cilkrts_make_stack(__cilkrts_worker *w)
-{
- __cilkrts_stack *s;
- char *p;
- size_t stack_size;
-
-#if defined CILK_PROFILE && defined HAVE_SYNC_INTRINSICS
-#define PROFILING_STACKS 1
-#else
-#define PROFILING_STACKS 0
#endif
-
- if (PROFILING_STACKS || w->g->max_stacks > 0) {
- if (w->g->max_stacks > 0 && w->g->stacks > w->g->max_stacks) {
- /* No you can't have a stack. Not yours. */
- return NULL;
- } else {
- /* We think we are allowed to allocate a stack. Perform an atomic
- increment on the counter and verify that there really are enough
- stacks remaining for us. */
- long hwm = __sync_add_and_fetch(&w->g->stacks, 1);
- if (w->g->max_stacks > 0 && hwm > w->g->max_stacks) {
- /* Whoops! Another worker got to it before we did.
- C'est la vie. */
- return NULL;
- }
-
-#ifdef CILK_PROFILE
- /* Keeping track of the largest stack count observed by this worker
- is part of profiling. The copies will be merged at the end of
- execution. */
- if (PROFILING_STACKS && hwm > w->l->stats.stack_hwm) {
- w->l->stats.stack_hwm = hwm;
- }
+
+ // Whatever correction we choose, align the final stack top.
+ // This alignment seems to be necessary in particular on 32-bit
+ // Linux, and possibly Mac. (Is 32-byte alignment is sufficient?)
+ /* 256-byte alignment. Why not? */
+ const uintptr_t align_mask = ~(256 -1);
+ new_stack_base = (char*)((size_t)new_stack_base & align_mask);
+ return new_stack_base;
+}
+
+char* sysdep_reset_jump_buffers_for_resume(cilk_fiber* fiber,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf)
+{
+#if FIBER_DEBUG >= 4
+ fprintf(stderr, "ThreadId=%p (fiber_proc_to_resume), Fiber %p. sf = %p. ff=%p, ff->sync_sp=%p\n",
+ cilkos_get_current_thread_id(),
+ fiber,
+ sf,
+ ff, ff->sync_sp);
#endif
- }
- }
-
- stack_size = w->g->stack_size;
- CILK_ASSERT(stack_size > 0);
-
- p = mmap(0, stack_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS,
- -1, 0);
- if (MAP_FAILED == p) {
- // For whatever reason (probably ran out of memory), mmap() failed.
- // There is no stack to return, so the program loses parallelism.
- if (0 == __cilkrts_xchg(&w->g->failure_to_allocate_stack, 1)) {
- cilkos_warning("Failed to allocate memory for a new stack.\n"
- "Continuing with some loss of parallelism.\n");
- }
- return NULL;
- }
- mprotect(p + stack_size - PAGE, PAGE, PROT_NONE);
- mprotect(p, PAGE, PROT_NONE);
- s = __cilkrts_malloc(sizeof (struct __cilkrts_stack));
- CILK_ASSERT(s);
- s->top = p + stack_size - PAGE;
- s->size = stack_size - (PAGE + PAGE);
- memset(&s->owner, 0, sizeof s->owner);
+ CILK_ASSERT(fiber);
+ void* sp = (void*)get_sp_for_executing_sf(cilk_fiber_get_stack_base(fiber), ff, sf);
+ SP(sf) = sp;
- s->stack_op_routine = NULL;
- s->stack_op_data = NULL;
+ /* Debugging: make sure stack is accessible. */
+ ((volatile char *)sp)[-1];
- return s;
+ // Adjust the saved_sp to account for the SP we're about to run. This will
+ // allow us to track fluctations in the stack
+#if FIBER_DEBUG >= 4
+ fprintf(stderr, "ThreadId=%p, about to take stack ff=%p, sp=%p, sync_sp=%p\n",
+ cilkos_get_current_thread_id(),
+ ff,
+ sp,
+ ff->sync_sp);
+#endif
+ __cilkrts_take_stack(ff, sp);
+ return sp;
}
-void __cilkrts_free_stack(global_state_t *g,
- __cilkrts_stack *sd)
-{
- char *s;
- size_t size;
-
- CILK_ASSERT(g->max_stacks <= 0);
-#if defined CILK_PROFILE && defined HAVE_SYNC_INTRINSICS
- __sync_sub_and_fetch(&g->stacks, 1);
+NORETURN sysdep_longjmp_to_sf(char* new_sp,
+ __cilkrts_stack_frame *sf,
+ full_frame *ff_for_exceptions /* UNUSED on Unix */)
+{
+#if FIBER_DEBUG >= 3
+ fprintf(stderr,
+ "ThreadId=%p. resume user code, sf=%p, new_sp = %p, original SP(sf) = %p, FP(sf) = %p\n",
+ cilkos_get_current_thread_id(), sf, new_sp, SP(sf), FP(sf));
#endif
- s = sd->top;
- size = sd->size;
-
- CILK_ASSERT(s && size);
+ // Set the stack pointer.
+ SP(sf) = new_sp;
-#if __GNUC__
- {
- char *fp = __builtin_frame_address(0);
- CILK_ASSERT(fp < s - 10000 || fp > s);
+#ifdef RESTORE_X86_FP_STATE
+ if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) {
+ // Restore the floating point state that was set in this frame at the
+ // last spawn.
+ //
+ // This feature is only available in ABI 1 or later frames, and only
+ // needed on IA64 or Intel64 processors.
+ restore_x86_fp_state(sf);
}
#endif
- /* DEBUG: */
- ((volatile char *)s)[-1];
- s += PAGE;
- size += PAGE + PAGE;
-
- if (munmap(s - size, size) < 0)
- __cilkrts_bug("Cilk: stack release failed error %d", errno);
+ CILK_LONGJMP(sf->ctx);
+}
- sd->top = 0;
- sd->size = 0;
- __cilkrts_free(sd);
- return;
-}
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <errno.h>
-void __cilkrts_sysdep_reset_stack(__cilkrts_stack *sd)
-{
- CILK_ASSERT(sd->stack_op_routine == NULL);
- CILK_ASSERT(sd->stack_op_data == NULL);
- return;
-}
void __cilkrts_make_unrunnable_sysdep(__cilkrts_worker *w,
full_frame *ff,
__cilkrts_stack_frame *sf,
- int state_valid,
+ int is_loot,
const char *why)
{
(void)w; /* unused */
sf->except_data = 0;
- if (state_valid && ff->frame_size == 0)
+ if (is_loot)
+ {
+ if (ff->frame_size == 0)
ff->frame_size = __cilkrts_get_frame_size(sf);
+ // Null loot's sp for debugging purposes (so we'll know it's not valid)
SP(sf) = 0;
+ }
}
-
/*
* __cilkrts_sysdep_is_worker_thread_id
*
@@ -765,7 +581,7 @@ int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g,
int i,
void *thread_id)
{
-#ifdef __linux__
+#if defined( __linux__) || defined(__VXWORKS__)
pthread_t tid = *(pthread_t *)thread_id;
if (i < 0 || i > g->total_workers)
return 0;
@@ -776,42 +592,7 @@ int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g,
#endif
}
-int __cilkrts_sysdep_bind_thread(__cilkrts_worker *w)
-{
- if (w->self < 0) {
- // w->self < 0 means that this is an ad-hoc user worker not known to
- // the global state. Nobody will ever try to steal from it, so it
- // does not need a scheduler_stack.
- return 0; // success
- }
-
- // Allocate a scheduler_stack for this user worker if one does not
- // already exist.
- if (NULL == w->l->scheduler_stack) {
-
- // The scheduler stack does not need to be as large as a normal
- // programm stack. Returns null on failure (probably indicating that
- // we're out of memory).
- w->l->scheduler_stack = sysdep_make_tiny_stack(w);
- // Return success (zero) if we successfully allocated a scheduler
- // stack and failure (non-zero) if stack allocation returned NULL.
- return (NULL == w->l->scheduler_stack ? -1 : 0);
- }
-
- return 0; // success
-}
-
-void __cilkrts_sysdep_unbind_thread(__cilkrts_worker *w)
-{
- // Needs to be implemented
-}
-
-int __cilkrts_sysdep_get_stack_region_properties(__cilkrts_stack *sd,
- struct __cilkrts_region_properties *props)
-{
- return 0;
-}
/*************************************************************
@@ -823,6 +604,10 @@ int __cilkrts_sysdep_get_stack_region_properties(__cilkrts_stack *sd,
#include <stdio.h>
#include <sys/utsname.h>
+#ifdef __VXWORKS__
+#include <version.h>
+# endif
+
/* (Non-static) dummy function is used by get_runtime_path() to find the path
* to the .so containing the Cilk runtime.
*/
@@ -886,7 +671,14 @@ static void write_version_file (global_state_t *g, int n)
VERSION_MINOR,
VERSION_REV,
VERSION_BUILD);
+#ifdef __VXWORKS__
+ char * vxWorksVer = VXWORKS_VERSION;
+ fprintf(fp, "Cross compiled for %s\n",vxWorksVer);
+ // user and host not avalible if VxWorks cross compiled on windows build host
+#else
fprintf(fp, "Built by "BUILD_USER" on host "BUILD_HOST"\n");
+#endif
+
fprintf(fp, "Compilation date: "__DATE__" "__TIME__"\n");
#ifdef __INTEL_COMPILER
@@ -930,7 +722,11 @@ static void write_version_file (global_state_t *g, int n)
fprintf(fp, "\nThread information\n");
fprintf(fp, "==================\n");
+#ifdef __VXWORKS__
+ fprintf(fp, "System cores: %d\n", (int)__builtin_popcount(vxCpuEnabledGet()));
+#else
fprintf(fp, "System cores: %d\n", (int)sysconf(_SC_NPROCESSORS_ONLN));
+#endif
fprintf(fp, "Cilk workers requested: %d\n", n);
#if (PARALLEL_THREAD_CREATE)
fprintf(fp, "Thread creator: Private (parallel)\n");
@@ -973,6 +769,7 @@ void __cilkrts_establish_c_stack(void)
*/
}
+
/*
* internal_enforce_global_visibility
*
@@ -993,95 +790,18 @@ void internal_enforce_global_visibility()
if( handle) dlclose(handle);
}
-/*
- * Special scheduling entrypoint for a WORKER_USER. Ensure a new stack has been
- * created and the stack pointer has been placed on it before entering
- * worker_user_scheduler().
- *
- * Call this function the first time a WORKER_USER has returned to a stolen
- * parent and cannot continue. Every time after that, the worker can simply
- * longjmp() like any other worker.
- */
-static NOINLINE
-void worker_user_scheduler()
-{
- __cilkrts_worker *w = __cilkrts_get_tls_worker();
-
- // This must be a user worker
- CILK_ASSERT(WORKER_USER == w->l->type);
-
- // Run the continuation function passed to longjmp_into_runtime
- run_scheduling_stack_fcn(w);
- w->reducer_map = 0;
-
- cilkbug_assert_no_uncaught_exception();
-
- STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER);
- STOP_INTERVAL(w, INTERVAL_WORKING);
-
- // Enter the scheduling loop on the user worker. This function will
- // never return
- __cilkrts_run_scheduler_with_exceptions(w);
-
- // A WORKER_USER, at some point, will resume on the original stack and
- // leave Cilk. Under no circumstances do we ever exit off of the bottom
- // of this stack.
- CILK_ASSERT(0);
-}
-
-/*
- * __cilkrts_sysdep_import_user_thread
- *
- * Imports a user thread the first time it returns to a stolen parent
- */
-
-void __cilkrts_sysdep_import_user_thread(__cilkrts_worker *w)
+void sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf)
{
- void *ctx[5]; // Jump buffer for __builtin_setjmp/longjmp.
-
- CILK_ASSERT(w->l->scheduler_stack);
-
- // It may be that this stack has been used before (i.e., the worker was
- // bound to a thread), and in principle, we could just jump back into
- // the runtime, but we'd have to keep around extra data to do that, and
- // there is no harm in starting over, here.
-
- // Move the stack pointer onto the scheduler stack. The subsequent
- // call will move execution onto that stack. We never return from
- // that call, and every time we longjmp_into_runtime() after this,
- // the w->l->env jump buffer will be populated.
- if (0 == __builtin_setjmp(ctx)) {
- ctx[2] = w->l->scheduler_stack; // replace the stack pointer.
- __builtin_longjmp(ctx, 1);
- } else {
- // We can't just pass the worker through as a parameter to
- // worker_user_scheduler because the generated code might try to
- // retrieve w using stack-relative addressing instead of bp-relative
- // addressing and would get a bogus value.
- worker_user_scheduler(); // noinline, does not return.
+// If we're not going to restore, don't bother saving it
+#ifdef RESTORE_X86_FP_STATE
+ if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1)
+ {
+ __asm__ ("stmxcsr %0" : "=m" (sf->mxcsr));
+ __asm__ ("fnstsw %0" : "=m" (sf->fpcsr));
}
-
- CILK_ASSERT(0); // Should never reach this point.
-}
-
-/*
- * Make a fake user stack descriptor to correspond to the user's stack.
- */
-__cilkrts_stack *sysdep_make_user_stack (__cilkrts_worker *w)
-{
- return calloc(1, sizeof(struct __cilkrts_stack));
-}
-
-/*
- * Destroy the fake user stack descriptor that corresponds to the user's stack.
- */
-void sysdep_destroy_user_stack (__cilkrts_stack *sd)
-{
- free(sd);
+#endif
}
-
-
/*
Local Variables: **
c-file-style:"bsd" **
diff --git a/libcilkrts/runtime/sysdep.h b/libcilkrts/runtime/sysdep.h
index 9804654cf5b..5b3c94fd58c 100644
--- a/libcilkrts/runtime/sysdep.h
+++ b/libcilkrts/runtime/sysdep.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -43,34 +48,27 @@
#include "os.h"
#include "os_mutex.h"
-#include "cilk-tbb-interop.h"
-
-__CILKRTS_BEGIN_EXTERN_C
-
-// Part of inspector ABI
-typedef struct __cilkrts_region_properties __cilkrts_region_properties;
-
/**
- * Bind the __cilkrts_stack_frame to the stack
+ * @brief Default page size for Cilk stacks.
*
- * @param ff full_frame for the frame we're binding
- * @param new_sp Not used.
- * @param parent_stack __cilkrts_stack of this frames parent
- * @param owner __cilkrts_worker for the user worker thread that captains
- * the team that this stack is contributing to.
+ * All Cilk stacks should have size that is a multiple of this value.
*/
-COMMON_SYSDEP
-void __cilkrts_bind_stack(full_frame *ff,
- char *new_sp,
- __cilkrts_stack *parent_stack,
- __cilkrts_worker *owner);
+#define PAGE 4096
/**
- * Return an address on the specified stack. Mostly obsolete.
- */
-COMMON_SYSDEP
-char *__cilkrts_stack_to_pointer(__cilkrts_stack *sd,
- __cilkrts_stack_frame *sf);
+ * @brief Size of a scheduling stack.
+ *
+ * A scheduling stack is used to by system workers to execute runtime
+ * code. Since this stack is only executing runtime functions, we
+ * don't need it to be a full size stack.
+ *
+ * The number "18" should be small since the runtime doesn't require a
+ * large stack, but large enough to call "printf" for debugging.
+ */
+#define CILK_SCHEDULING_STACK_SIZE (18*PAGE)
+
+__CILKRTS_BEGIN_EXTERN_C
+
/**
* Code to initialize the system-dependent portion of the global_state_t
@@ -95,73 +93,6 @@ void __cilkrts_destroy_global_sysdep(global_state_t *g);
COMMON_SYSDEP
void __cilkrts_establish_c_stack(void);
-/**
- * Allocate and initialize a __cilkrts_stack.
- *
- * @param w The worker to attribute this stack to - mostly used for stats.
- *
- * @return Pointer to the initilaized __cilkrts_stack.
- * @return NULL if we failed to allocate the stack.
- */
-COMMON_SYSDEP
-__cilkrts_stack *__cilkrts_make_stack(__cilkrts_worker *w);
-
-/**
- * Release any resources associated with a __cilkrts_stack
- *
- * @param g The global state - used for stats
- * @param sd The __cilkrts_stack to be released
- */
-COMMON_SYSDEP
-void __cilkrts_free_stack(global_state_t *g, __cilkrts_stack *sd);
-
-/**
- * Allocate a __cilkrts_stack with a small size for use as a scheduling stack.
- *
- * @param w The worker to attribute this stack to - mostly used for stats.
- *
- * @return Pointer to the initilaized __cilkrts_stack.
- * @return NULL if we failed to allocate the stack.
- */
-COMMON_SYSDEP
-void *sysdep_make_tiny_stack (__cilkrts_worker *w);
-
-/**
- * Release any resources associated with a __cilkrts_stack created as a
- * scheduling stack.
- *
- * @param sd The __cilkrts_stack to be released
- */
-COMMON_SYSDEP
-void sysdep_destroy_tiny_stack (void *sd);
-
-/**
- * Allocate and initialize a __cilkrts_stack to use to run user code.
- *
- * @param w The worker to attribute this stack to - mostly used for stats.
- *
- * @return Pointer to the initilaized __cilkrts_stack.
- * @return NULL if we failed to allocate the stack.
- */
-COMMON_SYSDEP
-__cilkrts_stack *sysdep_make_user_stack (__cilkrts_worker *w);
-
-/**
- * Release any resources associated with a __cilkrts_stack created as a
- * user stack.
- *
- * @param sd The __cilkrts_stack to be released
- */
-COMMON_SYSDEP
-void sysdep_destroy_user_stack (__cilkrts_stack *sd);
-
-/**
- * Reset stack-specific information so the stack can be cached and reused
- *
- * @param sd The __cilkrts_stack to be reset.
- */
-COMMON_SYSDEP
-void __cilkrts_sysdep_reset_stack(__cilkrts_stack *sd);
/**
* Save system dependent information in the full_frame and
@@ -184,17 +115,6 @@ void __cilkrts_make_unrunnable_sysdep(__cilkrts_worker *w,
int state_valid,
const char *why);
-/**
- * Resume execution of the full frame.
- *
- * @param w The worker to resume execution on.
- * @param ff The full_frame to resume executing.
- * @param sf The __cilkrts_stack_frame to resume executing.
- */
-COMMON_SYSDEP
-NORETURN __cilkrts_resume(__cilkrts_worker *w,
- full_frame *ff,
- __cilkrts_stack_frame *sf);
/**
* OS-specific code to spawn worker threads.
@@ -206,7 +126,7 @@ COMMON_SYSDEP
void __cilkrts_start_workers(global_state_t *g, int n);
/**
- * OS-specific code to stop worker threads.
+ * @brief OS-specific code to stop worker threads.
*
* @param g The global state.
*/
@@ -214,27 +134,8 @@ COMMON_SYSDEP
void __cilkrts_stop_workers(global_state_t *g);
/**
- * System dependent function called when a thread is bound to a worker.
- *
- * @param w Worker to bind to the currently executing thread.
+ * @brief Imports a user thread the first time it returns to a stolen parent.
*
- * @return 0 on success.
- * @return non-zero on failure.
- */
-COMMON_SYSDEP
-int __cilkrts_sysdep_bind_thread(__cilkrts_worker *w);
-
-/**
- * System dependent function called when a thread is unbound from a
- * worker.
- *
- * @param w Worker to unbind from the currently executing thread.
- */
-COMMON_SYSDEP
-void __cilkrts_sysdep_unbind_thread(__cilkrts_worker *w);
-
-/**
- * Imports a user thread the first time it returns to a stolen parent.
* The thread has been bound to a worker, but additional steps need to
* be taken to start running a scheduling loop.
*
@@ -244,35 +145,10 @@ COMMON_SYSDEP
void __cilkrts_sysdep_import_user_thread(__cilkrts_worker *w);
/**
- * Fills in the __cilkrts_region_properties for a __cilkrts_stack.
- *
- * @param sd The stack that's being run on.
- * @param properties Buffer to hold information about the stack region.
- *
- * @return 1 on success.
- * @return 0 on failure.
- */
-COMMON_SYSDEP
-int __cilkrts_sysdep_get_stack_region_properties(__cilkrts_stack *sd,
- __cilkrts_region_properties *properties);
-
-/**
- * Returns true if the thread ID specified matches the thread ID we saved
- * for a worker.
- *
- * @param g Pointer to the global state. Used to validate the index.
- * @param i Index for the worker.
- * @param thread_id Thread ID to be checked.
- */
-COMMON_SYSDEP
-int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g,
- int i,
- cilkos_thread_id_t thread_id);
-
-/**
- * Function to be run for each of the system worker threads.
- * This declaration also appears in cilk/cilk_undocumented.h -- don't change
- * one declaration without also changing the other.
+ * @brief Function to be run for each of the system worker threads.
+ *
+ * This declaration also appears in cilk/cilk_undocumented.h -- don't
+ * change one declaration without also changing the other.
*
* @param arg The context value passed to the thread creation routine for
* the OS we're running on.
@@ -284,7 +160,7 @@ int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g,
CILK_EXPORT unsigned __CILKRTS_NOTHROW __stdcall
__cilkrts_worker_stub(void *arg);
#else
-/* Do not use CILK_API because __cilkrts_worker_stub have defauld visibility */
+/* Do not use CILK_API because __cilkrts_worker_stub have default visibility */
__attribute__((visibility("default")))
void* __CILKRTS_NOTHROW __cilkrts_worker_stub(void *arg);
#endif
@@ -322,72 +198,73 @@ COMMON_SYSDEP
void __cilkrts_setup_for_execution_sysdep(__cilkrts_worker *w,
full_frame *ff);
-/****************************************************************************
- * TBB interop functions
- * **************************************************************************/
-
/**
- * Set the TBB callback information for a stack
+ * @brief OS-specific implementaton of resetting fiber and frame state
+ * to resume exeuction.
*
- * @param sd The stack to set the TBB callback information for
- * @param o The TBB callback thunk. Specifies the callback address and
- * context value.
- */
-COMMON_SYSDEP
-void __cilkrts_set_stack_op(__cilkrts_stack *sd,
- __cilk_tbb_stack_op_thunk o);
-
-/**
- * Call TBB to tell it about an "interesting" occurrance
+ * This method:
+ * 1. Calculates the value of stack pointer where we should resume
+ * execution of "sf". This calculation uses info stored in the
+ * fiber, and takes into account alignment and frame size.
+ * 2. Updates sf and ff to match the calculated stack pointer.
*
- * @param w The worker the stack is running on
- * @param op Value specifying the "interesting" occurrance
- * @param sd The stack TBB is being notified about
- */
-COMMON_SYSDEP
-
-void __cilkrts_invoke_stack_op(__cilkrts_worker *w,
- enum __cilk_tbb_stack_op op,
- __cilkrts_stack *sd);
-
-/**
- * Save TBB the TBB callback address and context value in thread-local
- * storage. We'll use it later when the thread binds to a worker.
+ * On Unix, the stack pointer calculation looks up the base of the
+ * stack from the fiber.
*
- * @param o The TBB callback thunk which is to be saved.
- */
-COMMON_SYSDEP
-void tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o);
-
-/**
- * Called when we bind a thread to the runtime. If there is any TBB
- * interop information in thread-local storage, bind it to the
- * stack now.
+ * On Windows, this calculation is calls "alloca" to find a stack
+ * pointer on the currently executing stack. Thus, the Windows code
+ * assumes @c fiber is the currently executing fiber.
*
- * @param w The worker that has been bound to the thread.
- * @param sd The stack that should take over the TBB interop information.
+ * @param fiber fiber to resume execution on.
+ * @param ff full_frame for the frame we're resuming.
+ * @param sf __cilkrts_stack_frame that we should resume
+ * @return The calculated stack pointer.
*/
COMMON_SYSDEP
-void tbb_interop_use_saved_stack_op_info(__cilkrts_worker *w,
- __cilkrts_stack *sd);
+char* sysdep_reset_jump_buffers_for_resume(cilk_fiber* fiber,
+ full_frame *ff,
+ __cilkrts_stack_frame *sf);
/**
- * Free any TBB interop information saved in thread-local storage
+ * @brief System-dependent longjmp to user code for resuming execution
+ * of a @c __cilkrts_stack_frame.
+ *
+ * This method:
+ * - Changes the stack pointer in @c sf to @c new_sp.
+ * - If @c ff_for_exceptions is not NULL, changes fields in @c sf and
+ * @c ff_for_exceptions for exception processing.
+ * - Restores any floating point state
+ * - Finishes with a longjmp to user code, never to return.
+ *
+ * @param new_sp stack pointer where we should resume execution
+ * @param sf @c __cilkrts_stack_frame for the frame we're resuming.
+ * @param ff_for_exceptions full_frame to safe exception info into, if necessary
*/
COMMON_SYSDEP
-void tbb_interop_free_stack_op_info(void);
+NORETURN
+sysdep_longjmp_to_sf(char* new_sp,
+ __cilkrts_stack_frame *sf,
+ full_frame *ff_for_exceptions);
/**
- * Migrate any TBB interop information from a __cilkrts_stack to
- * thread-local storage. Returns immediately if no TBB interop information
- * has been associated with the stack.
+ * @brief System-dependent code to save floating point control information
+ * to a @c __cilkrts_stack_frame. This function will be called by compilers
+ * that cannot inline the code.
*
- * @param sd The __cilkrts_stack who's TBB interop information should be
- * saved in thread-local storage.
+ * Note that this function does *not* save the current floating point
+ * registers. It saves the floating point control words that control
+ * precision and rounding and stuff like that.
+ *
+ * This function will be a noop for architectures that don't have warts
+ * like the floating point control words, or where the information is
+ * already being saved by the setjmp.
+ *
+ * @param sf @c __cilkrts_stack_frame for the frame we're
+ * saving the floating point control information in.
*/
COMMON_SYSDEP
-void tbb_interop_save_info_from_stack(__cilkrts_stack *sd);
-
+void
+sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf);
__CILKRTS_END_EXTERN_C
diff --git a/libcilkrts/runtime/unix_symbols.t b/libcilkrts/runtime/unix_symbols.t
index 98d55984f47..1c4fdfd6ac1 100644
--- a/libcilkrts/runtime/unix_symbols.t
+++ b/libcilkrts/runtime/unix_symbols.t
@@ -1,25 +1,30 @@
-# Copyright (C) 2011
-# Intel Corporation
-#
-# This file is part of the Intel Cilk Plus Library. This library is free
-# software; you can redistribute it and/or modify it under the
-# terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3, or (at your option)
-# any later version.
-#
-# This library is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# Under Section 7 of GPL version 3, you are granted additional
-# permissions described in the GCC Runtime Library Exception, version
-# 3.1, as published by the Free Software Foundation.
-#
-# You should have received a copy of the GNU General Public License and
-# a copy of the GCC Runtime Library Exception along with this program;
-# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
-# <http://www.gnu.org/licenses/>.
+# @copyright
+# Copyright (C) 2011
+# Intel Corporation
+#
+# @copyright
+# This file is part of the Intel Cilk Plus Library. This library is free
+# software; you can redistribute it and/or modify it under the
+# terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3, or (at your option)
+# any later version.
+#
+# @copyright
+# This library is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# @copyright
+# Under Section 7 of GPL version 3, you are granted additional
+# permissions described in the GCC Runtime Library Exception, version
+# 3.1, as published by the Free Software Foundation.
+#
+# @copyright
+# You should have received a copy of the GNU General Public License and
+# a copy of the GCC Runtime Library Exception along with this program;
+# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+# <http://www.gnu.org/licenses/>.
__cilkrts_bind_thread
__cilkrts_bind_thread_1
@@ -41,8 +46,6 @@ __cilkrts_get_nworkers
__cilkrts_get_pedigree_info
__cilkrts_get_pedigree_internal
__cilkrts_get_sf
-__cilkrts_get_stack_region_id
-__cilkrts_get_stack_region_properties
__cilkrts_get_stack_size
__cilkrts_get_tls_worker
__cilkrts_get_tls_worker_fast
@@ -57,13 +60,15 @@ __cilkrts_hyperobject_alloc
__cilkrts_hyperobject_dealloc
__cilkrts_hyperobject_noop_destroy
__cilkrts_init
-# __cilkrts_init_worker_sysdep
__cilkrts_irml_version
__cilkrts_leave_frame
__cilkrts_metacall
__cilkrts_rethrow
__cilkrts_return_exception
+__cilkrts_save_fp_ctrl_state
__cilkrts_set_param
+__cilkrts_stack_alloc
+__cilkrts_stack_free
__cilkrts_sync
__cilkrts_synched
__cilkrts_watch_stack
diff --git a/libcilkrts/runtime/worker_mutex.c b/libcilkrts/runtime/worker_mutex.c
index 51e4d4b47bd..d83b4b4bbff 100644
--- a/libcilkrts/runtime/worker_mutex.c
+++ b/libcilkrts/runtime/worker_mutex.c
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
#include "worker_mutex.h"
@@ -60,24 +65,23 @@ void __cilkrts_mutex_lock(__cilkrts_worker *w, struct mutex *m)
NOTE_INTERVAL(w, INTERVAL_MUTEX_LOCK);
if (!TRY_ACQUIRE(m)) {
- START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
- count = 0;
- do {
- do {
- __cilkrts_short_pause();
-
- if (++count >= maxspin) {
- STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
- START_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING);
- /* let the OS reschedule every once in a while */
- __cilkrts_yield();
- STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING);
- START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
- count = 0;
- }
- } while (m->lock != 0);
- } while (!TRY_ACQUIRE(m));
- STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ count = 0;
+ do {
+ do {
+ __cilkrts_short_pause();
+ if (++count >= maxspin) {
+ STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ START_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING);
+ /* let the OS reschedule every once in a while */
+ __cilkrts_yield();
+ STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING);
+ START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
+ count = 0;
+ }
+ } while (m->lock != 0);
+ } while (!TRY_ACQUIRE(m));
+ STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING);
}
CILK_ASSERT(m->owner == 0);
diff --git a/libcilkrts/runtime/worker_mutex.h b/libcilkrts/runtime/worker_mutex.h
index 2dacf48980d..e016faf8916 100644
--- a/libcilkrts/runtime/worker_mutex.h
+++ b/libcilkrts/runtime/worker_mutex.h
@@ -2,28 +2,33 @@
*
*************************************************************************
*
- * Copyright (C) 2009-2011
- * Intel Corporation
- *
- * This file is part of the Intel Cilk Plus Library. This library is free
- * software; you can redistribute it and/or modify it under the
- * terms of the GNU General Public License as published by the
- * Free Software Foundation; either version 3, or (at your option)
- * any later version.
- *
- * This library is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- * GNU General Public License for more details.
- *
- * Under Section 7 of GPL version 3, you are granted additional
- * permissions described in the GCC Runtime Library Exception, version
- * 3.1, as published by the Free Software Foundation.
- *
- * You should have received a copy of the GNU General Public License and
- * a copy of the GCC Runtime Library Exception along with this program;
- * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- * <http://www.gnu.org/licenses/>.
+ * @copyright
+ * Copyright (C) 2009-2011
+ * Intel Corporation
+ *
+ * @copyright
+ * This file is part of the Intel Cilk Plus Library. This library is free
+ * software; you can redistribute it and/or modify it under the
+ * terms of the GNU General Public License as published by the
+ * Free Software Foundation; either version 3, or (at your option)
+ * any later version.
+ *
+ * @copyright
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * @copyright
+ * Under Section 7 of GPL version 3, you are granted additional
+ * permissions described in the GCC Runtime Library Exception, version
+ * 3.1, as published by the Free Software Foundation.
+ *
+ * @copyright
+ * You should have received a copy of the GNU General Public License and
+ * a copy of the GCC Runtime Library Exception along with this program;
+ * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ * <http://www.gnu.org/licenses/>.
**************************************************************************/
/**
@@ -42,9 +47,6 @@
__CILKRTS_BEGIN_EXTERN_C
-// Forwarded declarations
-typedef struct __cilkrts_worker __cilkrts_worker;
-
/**
* Mutexes are treated as an abstract data type within the Cilk
* runtime system. They are implemented as simple spin loops and
@@ -59,7 +61,7 @@ typedef struct mutex {
} mutex;
/**
- * Initialize a Cilk mutex.
+ * @brief Initialize a Cilk mutex.
*
* @param m Mutex to be initialized.
*/
@@ -67,7 +69,9 @@ COMMON_PORTABLE
void __cilkrts_mutex_init(struct mutex *m);
/**
- * Acquire a Cilk mutex. If statistics are being gathered, the time spent
+ * @brief Acquire a Cilk mutex.
+ *
+ * If statistics are being gathered, the time spent
* acquiring the mutex will be attributed to the specified worker.
*
* @param w Worker that will become the owner of this mutex.
@@ -77,9 +81,10 @@ COMMON_PORTABLE
void __cilkrts_mutex_lock(__cilkrts_worker *w,
struct mutex *m);
/**
- * Attempt to lock a Cilk mutex and fail if it isn't available. If statistics
- * are being gathered, the time spent acquiring the mutex will be attributed
- * to the specified worker.
+ * @brief Attempt to lock a Cilk mutex and fail if it isn't available.
+ *
+ * If statistics are being gathered, the time spent acquiring the
+ * mutex will be attributed to the specified worker.
*
* @param w Worker that will become the owner of this mutex.
* @param m Mutex to be acquired.
@@ -92,11 +97,12 @@ int __cilkrts_mutex_trylock(__cilkrts_worker *w,
struct mutex *m);
/**
- * Release a Cilk mutex. If statistics are being gathered, the time spent
+ * @brief Release a Cilk mutex.
+ *
+ * If statistics are being gathered, the time spent
* acquiring the mutex will be attributed to the specified worker.
*
- * Preconditions:
- * - The mutex must be owned by the worker.
+ * @pre The mutex must be owned by the worker.
*
* @param w Worker that owns this mutex.
* @param m Mutex to be released.
@@ -106,7 +112,8 @@ void __cilkrts_mutex_unlock(__cilkrts_worker *w,
struct mutex *m);
/**
- * Deallocated a Cilk mutex. Currently does nothing.
+ * @brief Deallocate a Cilk mutex. Currently does nothing.
+ *
* @param w Unused.
* @param m Mutex to be deallocated.
*/