diff options
author | Balaji V. Iyer <balaji.v.iyer@intel.com> | 2013-08-05 18:52:16 +0000 |
---|---|---|
committer | Balaji V. Iyer <balaji.v.iyer@intel.com> | 2013-08-05 18:52:16 +0000 |
commit | 5f967f13d141fc35ca1747e21a62fcd2804d0bbd (patch) | |
tree | 736fe5c630f50a57815fb3f14a2c85dd20c2e576 | |
parent | 8ec6a963082e92fd6c843553764906ee0f162bb5 (diff) |
Updated libcilkrts to revision 3520.
git-svn-id: https://gcc.gnu.org/svn/gcc/branches/cilkplus@201502 138bc75d-0d04-0410-961f-82ee72b054a4
96 files changed, 20107 insertions, 8767 deletions
diff --git a/libcilkrts/ChangeLog.cilkplus b/libcilkrts/ChangeLog.cilkplus index 5b6898f1e81..db17ce0e333 100644 --- a/libcilkrts/ChangeLog.cilkplus +++ b/libcilkrts/ChangeLog.cilkplus @@ -1,3 +1,99 @@ +2013-08-05 Balaji V. Iyer <balaji.v.iyer@intel.com> + + * include/cilk/reducer_opmul.h: Updated to revision 3520. + * include/cilk/cilk_api_linux.h: Likewise. + * include/cilk/reducer_min.h: Likewise. + * include/cilk/cilk_undocumented.h: Likewise. + * include/cilk/reducer_max.h: Likewise. + * include/cilk/reducer_opadd.h: Likewise. + * include/cilk/hyperobject_base.h: Likewise. + * include/cilk/reducer_opand.h: Likewise. + * include/cilk/cilk_api.h: Likewise. + * include/cilk/cilk_api.h: Likewise. + * include/cilk/cilk_stub.h: Likewise. + * include/cilk/reducer_string.h: Likewise. + * include/cilk/reducer_opxor.h: Likewise. + * include/cilk/cilk.h: Likewise. + * include/cilk/reducer_ostream.h: Likewise. + * include/cilk/reducer_list.h: Likewise. + * include/cilk/reducer_opor.h: Likewise. + * include/cilk/reducer_file.h: Likewise. + * include/cilk/common.h: Likewise. + * include/cilktools/lock_guard.h: Likewise. + * include/cilktools/cilkview.h: Likewise. + * include/cilktools/cilkscreen.h: Likewise. + * include/cilktools/fake_mutex.h: Likewise. + * include/internal/abi.h: Likewise. + * include/internal/rev.mk: Likewise. + * include/internal/metacall.h: Likewise. + * include/internal/cilk_version.h: Likewise. + * runtime/stats.c: Likewise. + * runtime/local_state.h: Likewise. + * runtime/os-unix.c: Likewise. + * runtime/stats.h: Likewise. + * runtime/symbol_test.c: Likewise. + * runtime/global_state.h: Likewise. + * runtime/doxygen-layout.xml: Likewise. + * runtime/component.h: Likewise. + * runtime/os_mutex.h: Likewise. + * runtime/cilk_malloc.c: Likewise. + * runtime/cilk-abi.c: Likewise. + * runtime/cilk_api.c: Likewise. + * runtime/sysdep.h: Likewise. + * runtime/unix_symbols.t: Likewise. + * runtime/bug.cpp: Likewise. + * runtime/cilk-ittnotify.h: Likewise. + * runtime/cilk_malloc.h: Likewise. + * runtime/scheduler.c: Likewise. + * runtime/cilk-abi-cilk-for.cpp: Likewise. + * runtime/reducer_impl.h: Likewise. + * runtime/except-gcc.cpp: Likewise. + * runtime/scheduler.h: Likewise. + * runtime/os.h: Likewise. + * runtime/worker_mutex.c: Likewise. + * runtime/except.h: Likewise. + * runtime/global_state.cpp: Likewise. + * runtime/worker_mutex.h: Likewise. + * runtime/metacall_impl.c: Likewise. + * runtime/rts-common.h: Likewise. + * runtime/pedigrees.c: Likewise. + * runtime/metacall_impl.h: Likewise. + * runtime/cilk-tbb-interop.h: Likewise. + * runtime/reducer_impl.cpp: Likewise. + * runtime/full_frame.c: Likewise. + * runtime/pedigrees.h: Likewise. + * runtime/c_reducers.c: Likewise. + * runtime/full_frame.h: Likewise. + * runtime/frame_malloc.c: Likewise. + * runtime/bug.h: Likewise. + * runtime/signal_node.c: Likewise. + * runtime/jmpbuf.c: Likewise. + * runtime/os_mutex-unix.c: Likewise. + * runtime/frame_malloc.h: Likewise. + * runtime/except-gcc.h: Likewise. + * runtime/signal_node.h: Likewise. + * runtime/jmpbuf.h: Likewise. + * runtime/jmpbuf.h: Likewise. + * runtime/local_state.c: Likewise. + * runtime/doxygen.cfg: Likewise. + * Makefile.am: Likewise. + * include/cilk/metaprogramming.h: New file. + * include/cilk/metaprogramming.h: Likewise. + * include/cilk/reducer_opmul.h: Likewise. + * include/internal/cilk_fake.h: Likewise. + * runtime/cilk-abi-vla-internal.c: Likewise. + * runtime/cilk_fiber-unix.cpp: Likewise. + * runtime/cilk-abi-vla-internal.h: Likewise. + * runtime/spin_mutex.c: Likewise. + * runtime/spin_mutex.c: Likewise. + * runtime/record-replay.cpp: Likewise. + * runtime/cilk_fiber-unix.h: Likewise. + * runtime/cilk-abi-vla.c: Likewise. + * runtime/cilk_fiber.h: Likewise. + * runtime/acknowledgements.dox: Likewise. + * runtime/record-replay.h: Likewise. + * runtime/cilk_fiber.cpp: Likewise. + 2013-03-13 Balaji V. Iyer <balaji.v.iyer@intel.com> * runtime/sysdep-unix.c (__cilkrts_stop_workers): Inserted inline diff --git a/libcilkrts/Makefile.am b/libcilkrts/Makefile.am index e5d48b1ab59..cb6c563b291 100644 --- a/libcilkrts/Makefile.am +++ b/libcilkrts/Makefile.am @@ -36,15 +36,36 @@ AM_LDFLAGS = -lpthread -ldl # Target list. toolexeclib_LTLIBRARIES = libcilkrts.la -libcilkrts_la_SOURCES = runtime/cilk-abi.c runtime/cilk_api.c \ - runtime/cilk_malloc.c runtime/c_reducers.c \ - runtime/frame_malloc.c runtime/full_frame.c runtime/jmpbuf.c \ - runtime/local_state.c runtime/metacall_impl.c runtime/os_mutex-unix.c \ - runtime/os-unix.c runtime/scheduler.c runtime/signal_node.c \ - runtime/stacks.c runtime/stats.c \ - runtime/sysdep-unix.c runtime/worker_mutex.c runtime/bug.cpp \ - runtime/cilk-abi-cilk-for.cpp runtime/except-gcc.cpp \ - runtime/global_state.cpp runtime/reducer_impl.cpp runtime/pedigrees.c +libcilkrts_la_SOURCES = \ + runtime/bug.cpp \ + runtime/cilk-abi.c \ + runtime/cilk-abi-cilk-for.cpp \ + runtime/cilk-abi-vla.c \ + runtime/cilk-abi-vla-internal.c \ + runtime/cilk_api.c \ + runtime/cilk_fiber.cpp \ + runtime/cilk_fiber-unix.cpp \ + runtime/cilk_malloc.c \ + runtime/c_reducers.c \ + runtime/except-gcc.cpp \ + runtime/frame_malloc.c \ + runtime/full_frame.c \ + runtime/global_state.cpp \ + runtime/jmpbuf.c \ + runtime/local_state.c \ + runtime/metacall_impl.c \ + runtime/os_mutex-unix.c \ + runtime/os-unix.c \ + runtime/pedigrees.c \ + runtime/record-replay.cpp \ + runtime/reducer_impl.cpp \ + runtime/scheduler.c \ + runtime/signal_node.c \ + runtime/spin_mutex.c \ + runtime/stats.c \ + runtime/symbol_test.c \ + runtime/sysdep-unix.c \ + runtime/worker_mutex.c # Load the $(REVISION) value. include include/internal/rev.mk @@ -64,13 +85,16 @@ cilkinclude_HEADERS = \ include/cilk/common.h \ include/cilk/holder.h \ include/cilk/hyperobject_base.h \ + include/cilk/metaprogramming.h \ include/cilk/reducer_file.h \ include/cilk/reducer.h \ include/cilk/reducer_list.h \ include/cilk/reducer_max.h \ include/cilk/reducer_min.h \ + include/cilk/reducer_min_max.h \ include/cilk/reducer_opadd.h \ include/cilk/reducer_opand.h \ + include/cilk/reducer_opmul.h \ include/cilk/reducer_opor.h \ include/cilk/reducer_opxor.h \ include/cilk/reducer_ostream.h \ diff --git a/libcilkrts/Makefile.in b/libcilkrts/Makefile.in index 2c25a48a1a1..61156b422ac 100644 --- a/libcilkrts/Makefile.in +++ b/libcilkrts/Makefile.in @@ -40,30 +40,39 @@ ######################################################################### # -# Copyright (C) 2011-2012 -# Intel Corporation -# -# This file is part of the Intel Cilk Plus Library. This library is free -# software; you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the -# Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# Under Section 7 of GPL version 3, you are granted additional -# permissions described in the GCC Runtime Library Exception, version -# 3.1, as published by the Free Software Foundation. -# -# You should have received a copy of the GNU General Public License and -# a copy of the GCC Runtime Library Exception along with this program; -# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -# <http://www.gnu.org/licenses/>. +# @copyright +# Copyright (C) 2011-2013 +# Intel Corporation +# +# @copyright +# This file is part of the Intel Cilk Plus Library. This library is free +# software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# @copyright +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# @copyright +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# @copyright +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. ########################################################################### +# DO NOT EDIT THIS FILE! +# +# It was automatically generated by cilkrts/include/internal/Makefile + VPATH = @srcdir@ pkgdatadir = $(datadir)/@PACKAGE@ @@ -130,12 +139,14 @@ am__installdirs = "$(DESTDIR)$(toolexeclibdir)" \ "$(DESTDIR)$(cilkincludedir)" LTLIBRARIES = $(toolexeclib_LTLIBRARIES) libcilkrts_la_LIBADD = -am_libcilkrts_la_OBJECTS = cilk-abi.lo cilk_api.lo cilk_malloc.lo \ - c_reducers.lo frame_malloc.lo full_frame.lo jmpbuf.lo \ - local_state.lo metacall_impl.lo os_mutex-unix.lo os-unix.lo \ - scheduler.lo signal_node.lo stacks.lo stats.lo sysdep-unix.lo \ - worker_mutex.lo bug.lo cilk-abi-cilk-for.lo except-gcc.lo \ - global_state.lo reducer_impl.lo pedigrees.lo +am_libcilkrts_la_OBJECTS = bug.lo cilk-abi.lo cilk-abi-cilk-for.lo \ + cilk-abi-vla.lo cilk-abi-vla-internal.lo cilk_api.lo \ + cilk_fiber.lo cilk_fiber-unix.lo cilk_malloc.lo c_reducers.lo \ + except-gcc.lo frame_malloc.lo full_frame.lo global_state.lo \ + jmpbuf.lo local_state.lo metacall_impl.lo os_mutex-unix.lo \ + os-unix.lo pedigrees.lo record-replay.lo reducer_impl.lo \ + scheduler.lo signal_node.lo spin_mutex.lo stats.lo \ + symbol_test.lo sysdep-unix.lo worker_mutex.lo libcilkrts_la_OBJECTS = $(am_libcilkrts_la_OBJECTS) libcilkrts_la_LINK = $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) \ $(LIBTOOLFLAGS) --mode=link $(CXXLD) $(AM_CXXFLAGS) \ @@ -304,17 +315,38 @@ AM_LDFLAGS = -lpthread -ldl # Target list. toolexeclib_LTLIBRARIES = libcilkrts.la -libcilkrts_la_SOURCES = runtime/cilk-abi.c runtime/cilk_api.c \ - runtime/cilk_malloc.c runtime/c_reducers.c \ - runtime/frame_malloc.c runtime/full_frame.c runtime/jmpbuf.c \ - runtime/local_state.c runtime/metacall_impl.c runtime/os_mutex-unix.c \ - runtime/os-unix.c runtime/scheduler.c runtime/signal_node.c \ - runtime/stacks.c runtime/stats.c \ - runtime/sysdep-unix.c runtime/worker_mutex.c runtime/bug.cpp \ - runtime/cilk-abi-cilk-for.cpp runtime/except-gcc.cpp \ - runtime/global_state.cpp runtime/reducer_impl.cpp runtime/pedigrees.c - -CILK_REVISION = 2856 +libcilkrts_la_SOURCES = \ + runtime/bug.cpp \ + runtime/cilk-abi.c \ + runtime/cilk-abi-cilk-for.cpp \ + runtime/cilk-abi-vla.c \ + runtime/cilk-abi-vla-internal.c \ + runtime/cilk_api.c \ + runtime/cilk_fiber.cpp \ + runtime/cilk_fiber-unix.cpp \ + runtime/cilk_malloc.c \ + runtime/c_reducers.c \ + runtime/except-gcc.cpp \ + runtime/frame_malloc.c \ + runtime/full_frame.c \ + runtime/global_state.cpp \ + runtime/jmpbuf.c \ + runtime/local_state.c \ + runtime/metacall_impl.c \ + runtime/os_mutex-unix.c \ + runtime/os-unix.c \ + runtime/pedigrees.c \ + runtime/record-replay.cpp \ + runtime/reducer_impl.cpp \ + runtime/scheduler.c \ + runtime/signal_node.c \ + runtime/spin_mutex.c \ + runtime/stats.c \ + runtime/symbol_test.c \ + runtime/sysdep-unix.c \ + runtime/worker_mutex.c + +CILK_REVISION = 3520 # Load the $(REVISION) value. @@ -333,13 +365,16 @@ cilkinclude_HEADERS = \ include/cilk/common.h \ include/cilk/holder.h \ include/cilk/hyperobject_base.h \ + include/cilk/metaprogramming.h \ include/cilk/reducer_file.h \ include/cilk/reducer.h \ include/cilk/reducer_list.h \ include/cilk/reducer_max.h \ include/cilk/reducer_min.h \ + include/cilk/reducer_min_max.h \ include/cilk/reducer_opadd.h \ include/cilk/reducer_opand.h \ + include/cilk/reducer_opmul.h \ include/cilk/reducer_opor.h \ include/cilk/reducer_opxor.h \ include/cilk/reducer_ostream.h \ @@ -466,8 +501,12 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/bug.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/c_reducers.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi-cilk-for.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi-vla-internal.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi-vla.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk-abi.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_api.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_fiber-unix.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_fiber.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/cilk_malloc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/except-gcc.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/frame_malloc.Plo@am__quote@ @@ -479,11 +518,13 @@ distclean-compile: @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/os-unix.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/os_mutex-unix.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/pedigrees.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/record-replay.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/reducer_impl.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/scheduler.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/signal_node.Plo@am__quote@ -@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stacks.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/spin_mutex.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/stats.Plo@am__quote@ +@AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/symbol_test.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/sysdep-unix.Plo@am__quote@ @AMDEP_TRUE@@am__include@ @am__quote@./$(DEPDIR)/worker_mutex.Plo@am__quote@ @@ -515,6 +556,20 @@ cilk-abi.lo: runtime/cilk-abi.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cilk-abi.lo `test -f 'runtime/cilk-abi.c' || echo '$(srcdir)/'`runtime/cilk-abi.c +cilk-abi-vla.lo: runtime/cilk-abi-vla.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cilk-abi-vla.lo -MD -MP -MF $(DEPDIR)/cilk-abi-vla.Tpo -c -o cilk-abi-vla.lo `test -f 'runtime/cilk-abi-vla.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cilk-abi-vla.Tpo $(DEPDIR)/cilk-abi-vla.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/cilk-abi-vla.c' object='cilk-abi-vla.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cilk-abi-vla.lo `test -f 'runtime/cilk-abi-vla.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla.c + +cilk-abi-vla-internal.lo: runtime/cilk-abi-vla-internal.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cilk-abi-vla-internal.lo -MD -MP -MF $(DEPDIR)/cilk-abi-vla-internal.Tpo -c -o cilk-abi-vla-internal.lo `test -f 'runtime/cilk-abi-vla-internal.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla-internal.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cilk-abi-vla-internal.Tpo $(DEPDIR)/cilk-abi-vla-internal.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/cilk-abi-vla-internal.c' object='cilk-abi-vla-internal.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o cilk-abi-vla-internal.lo `test -f 'runtime/cilk-abi-vla-internal.c' || echo '$(srcdir)/'`runtime/cilk-abi-vla-internal.c + cilk_api.lo: runtime/cilk_api.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT cilk_api.lo -MD -MP -MF $(DEPDIR)/cilk_api.Tpo -c -o cilk_api.lo `test -f 'runtime/cilk_api.c' || echo '$(srcdir)/'`runtime/cilk_api.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/cilk_api.Tpo $(DEPDIR)/cilk_api.Plo @@ -585,6 +640,13 @@ os-unix.lo: runtime/os-unix.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o os-unix.lo `test -f 'runtime/os-unix.c' || echo '$(srcdir)/'`runtime/os-unix.c +pedigrees.lo: runtime/pedigrees.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pedigrees.lo -MD -MP -MF $(DEPDIR)/pedigrees.Tpo -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/pedigrees.Tpo $(DEPDIR)/pedigrees.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/pedigrees.c' object='pedigrees.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c + scheduler.lo: runtime/scheduler.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT scheduler.lo -MD -MP -MF $(DEPDIR)/scheduler.Tpo -c -o scheduler.lo `test -f 'runtime/scheduler.c' || echo '$(srcdir)/'`runtime/scheduler.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/scheduler.Tpo $(DEPDIR)/scheduler.Plo @@ -599,12 +661,12 @@ signal_node.lo: runtime/signal_node.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o signal_node.lo `test -f 'runtime/signal_node.c' || echo '$(srcdir)/'`runtime/signal_node.c -stacks.lo: runtime/stacks.c -@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT stacks.lo -MD -MP -MF $(DEPDIR)/stacks.Tpo -c -o stacks.lo `test -f 'runtime/stacks.c' || echo '$(srcdir)/'`runtime/stacks.c -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/stacks.Tpo $(DEPDIR)/stacks.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/stacks.c' object='stacks.lo' libtool=yes @AMDEPBACKSLASH@ +spin_mutex.lo: runtime/spin_mutex.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT spin_mutex.lo -MD -MP -MF $(DEPDIR)/spin_mutex.Tpo -c -o spin_mutex.lo `test -f 'runtime/spin_mutex.c' || echo '$(srcdir)/'`runtime/spin_mutex.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/spin_mutex.Tpo $(DEPDIR)/spin_mutex.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/spin_mutex.c' object='spin_mutex.lo' libtool=yes @AMDEPBACKSLASH@ @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o stacks.lo `test -f 'runtime/stacks.c' || echo '$(srcdir)/'`runtime/stacks.c +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o spin_mutex.lo `test -f 'runtime/spin_mutex.c' || echo '$(srcdir)/'`runtime/spin_mutex.c stats.lo: runtime/stats.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT stats.lo -MD -MP -MF $(DEPDIR)/stats.Tpo -c -o stats.lo `test -f 'runtime/stats.c' || echo '$(srcdir)/'`runtime/stats.c @@ -613,6 +675,13 @@ stats.lo: runtime/stats.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o stats.lo `test -f 'runtime/stats.c' || echo '$(srcdir)/'`runtime/stats.c +symbol_test.lo: runtime/symbol_test.c +@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT symbol_test.lo -MD -MP -MF $(DEPDIR)/symbol_test.Tpo -c -o symbol_test.lo `test -f 'runtime/symbol_test.c' || echo '$(srcdir)/'`runtime/symbol_test.c +@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/symbol_test.Tpo $(DEPDIR)/symbol_test.Plo +@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/symbol_test.c' object='symbol_test.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o symbol_test.lo `test -f 'runtime/symbol_test.c' || echo '$(srcdir)/'`runtime/symbol_test.c + sysdep-unix.lo: runtime/sysdep-unix.c @am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT sysdep-unix.lo -MD -MP -MF $(DEPDIR)/sysdep-unix.Tpo -c -o sysdep-unix.lo `test -f 'runtime/sysdep-unix.c' || echo '$(srcdir)/'`runtime/sysdep-unix.c @am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/sysdep-unix.Tpo $(DEPDIR)/sysdep-unix.Plo @@ -627,13 +696,6 @@ worker_mutex.lo: runtime/worker_mutex.c @AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o worker_mutex.lo `test -f 'runtime/worker_mutex.c' || echo '$(srcdir)/'`runtime/worker_mutex.c -pedigrees.lo: runtime/pedigrees.c -@am__fastdepCC_TRUE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -MT pedigrees.lo -MD -MP -MF $(DEPDIR)/pedigrees.Tpo -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c -@am__fastdepCC_TRUE@ $(am__mv) $(DEPDIR)/pedigrees.Tpo $(DEPDIR)/pedigrees.Plo -@AMDEP_TRUE@@am__fastdepCC_FALSE@ source='runtime/pedigrees.c' object='pedigrees.lo' libtool=yes @AMDEPBACKSLASH@ -@AMDEP_TRUE@@am__fastdepCC_FALSE@ DEPDIR=$(DEPDIR) $(CCDEPMODE) $(depcomp) @AMDEPBACKSLASH@ -@am__fastdepCC_FALSE@ $(LIBTOOL) --tag=CC $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CC) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CFLAGS) $(CFLAGS) -c -o pedigrees.lo `test -f 'runtime/pedigrees.c' || echo '$(srcdir)/'`runtime/pedigrees.c - .cpp.o: @am__fastdepCXX_TRUE@ $(CXXCOMPILE) -MT $@ -MD -MP -MF $(DEPDIR)/$*.Tpo -c -o $@ $< @am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/$*.Tpo $(DEPDIR)/$*.Po @@ -669,6 +731,20 @@ cilk-abi-cilk-for.lo: runtime/cilk-abi-cilk-for.cpp @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o cilk-abi-cilk-for.lo `test -f 'runtime/cilk-abi-cilk-for.cpp' || echo '$(srcdir)/'`runtime/cilk-abi-cilk-for.cpp +cilk_fiber.lo: runtime/cilk_fiber.cpp +@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT cilk_fiber.lo -MD -MP -MF $(DEPDIR)/cilk_fiber.Tpo -c -o cilk_fiber.lo `test -f 'runtime/cilk_fiber.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber.cpp +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/cilk_fiber.Tpo $(DEPDIR)/cilk_fiber.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/cilk_fiber.cpp' object='cilk_fiber.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o cilk_fiber.lo `test -f 'runtime/cilk_fiber.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber.cpp + +cilk_fiber-unix.lo: runtime/cilk_fiber-unix.cpp +@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT cilk_fiber-unix.lo -MD -MP -MF $(DEPDIR)/cilk_fiber-unix.Tpo -c -o cilk_fiber-unix.lo `test -f 'runtime/cilk_fiber-unix.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber-unix.cpp +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/cilk_fiber-unix.Tpo $(DEPDIR)/cilk_fiber-unix.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/cilk_fiber-unix.cpp' object='cilk_fiber-unix.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o cilk_fiber-unix.lo `test -f 'runtime/cilk_fiber-unix.cpp' || echo '$(srcdir)/'`runtime/cilk_fiber-unix.cpp + except-gcc.lo: runtime/except-gcc.cpp @am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT except-gcc.lo -MD -MP -MF $(DEPDIR)/except-gcc.Tpo -c -o except-gcc.lo `test -f 'runtime/except-gcc.cpp' || echo '$(srcdir)/'`runtime/except-gcc.cpp @am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/except-gcc.Tpo $(DEPDIR)/except-gcc.Plo @@ -683,6 +759,13 @@ global_state.lo: runtime/global_state.cpp @AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ @am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o global_state.lo `test -f 'runtime/global_state.cpp' || echo '$(srcdir)/'`runtime/global_state.cpp +record-replay.lo: runtime/record-replay.cpp +@am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT record-replay.lo -MD -MP -MF $(DEPDIR)/record-replay.Tpo -c -o record-replay.lo `test -f 'runtime/record-replay.cpp' || echo '$(srcdir)/'`runtime/record-replay.cpp +@am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/record-replay.Tpo $(DEPDIR)/record-replay.Plo +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ source='runtime/record-replay.cpp' object='record-replay.lo' libtool=yes @AMDEPBACKSLASH@ +@AMDEP_TRUE@@am__fastdepCXX_FALSE@ DEPDIR=$(DEPDIR) $(CXXDEPMODE) $(depcomp) @AMDEPBACKSLASH@ +@am__fastdepCXX_FALSE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -c -o record-replay.lo `test -f 'runtime/record-replay.cpp' || echo '$(srcdir)/'`runtime/record-replay.cpp + reducer_impl.lo: runtime/reducer_impl.cpp @am__fastdepCXX_TRUE@ $(LIBTOOL) --tag=CXX $(AM_LIBTOOLFLAGS) $(LIBTOOLFLAGS) --mode=compile $(CXX) $(DEFS) $(DEFAULT_INCLUDES) $(INCLUDES) $(AM_CPPFLAGS) $(CPPFLAGS) $(AM_CXXFLAGS) $(CXXFLAGS) -MT reducer_impl.lo -MD -MP -MF $(DEPDIR)/reducer_impl.Tpo -c -o reducer_impl.lo `test -f 'runtime/reducer_impl.cpp' || echo '$(srcdir)/'`runtime/reducer_impl.cpp @am__fastdepCXX_TRUE@ $(am__mv) $(DEPDIR)/reducer_impl.Tpo $(DEPDIR)/reducer_impl.Plo diff --git a/libcilkrts/include/cilk/cilk.h b/libcilkrts/include/cilk/cilk.h index fa7ac8eaea0..e4a6e155954 100644 --- a/libcilkrts/include/cilk/cilk.h +++ b/libcilkrts/include/cilk/cilk.h @@ -1,34 +1,66 @@ /* cilk.h -*-C++-*- * - * Copyright (C) 2010-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2010-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/** @file cilk.h + * + * @brief Provides convenient aliases for the Cilk language keywords. + * + * @details + * Since Cilk is a nonstandard extension to both C and C++, the Cilk + * language keywords all begin with “`_Cilk_`”, which guarantees that they + * will not conflict with user-defined identifiers in properly written + * programs, so that “standard” C and C++ programs can safely be + * compiled a Cilk-enabled C or C++ compiler. + * + * However, this means that the keywords _look_ like something grafted on to + * the base language. Therefore, you can include this header: + * + * #include "cilk/cilk.h" * + * and then write the Cilk keywords with a “`cilk_`” prefix instead of + * “`_Cilk_`”. + * + * @ingroup language */ - -/* Define convenient aliases for Cilk keywords */ - + + +/** @defgroup language Language Keywords + * Definitions having to do with the Cilk language. + * @{ + */ + #ifndef cilk_spawn -# define cilk_spawn _Cilk_spawn -# define cilk_sync _Cilk_sync -# define cilk_for _Cilk_for +# define cilk_spawn _Cilk_spawn ///< Spawn a task that can execute in parallel. +# define cilk_sync _Cilk_sync ///< Wait for spawned tasks to complete. +# define cilk_for _Cilk_for ///< Execute iterations of a for loop in parallel. #endif + +/// @} diff --git a/libcilkrts/include/cilk/cilk_api.h b/libcilkrts/include/cilk/cilk_api.h index f7078ec2373..f56216ae6dd 100644 --- a/libcilkrts/include/cilk/cilk_api.h +++ b/libcilkrts/include/cilk/cilk_api.h @@ -1,62 +1,57 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. - * - */ - -/** - * @if public_doc - * @mainpage - * @section intro_sec Introduction +/* cilk_api.h * - * In addition to the Cilk Plus keywords, Intel Cilk Plus provides an API to - * allow users to query and control the Intel Cilk Plus runtime. - * @endif + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ - -/** - * @file cilk_api.h + +/** @file cilk_api.h * - * @brief Defines the documented API exposed by the Intel Cilk Plus for use + * @brief Defines the documented API exposed by the Cilk Plus for use * by applications. + * + * @ingroup api */ - -/** - * @page API - * Cilk API -- Functions callable by the user to modify the operation of the - * Cilk scheduler. - */ - + #ifndef INCLUDED_CILK_API_H #define INCLUDED_CILK_API_H -#ifndef CILK_STUB /* Real (non-stub) definitions below */ +/** @defgroup api Runtime API + * API to allow user programs to interact with the Cilk runtime. + * @{ + */ + +#ifndef CILK_STUB /* Real (non-stub) definitions */ #if ! defined(__cilk) && ! defined(USE_CILK_API) # ifdef _WIN32 -# pragma message("Warning: Cilk ABI is being used with non-Cilk compiler (or Cilk is disabled)") +# error Cilk API is being used with non-Cilk compiler (or Cilk is disabled) # else -# warning Cilk ABI is being used with non-Cilk compiler (or Cilk is disabled) +# warning Cilk API is being used with non-Cilk compiler (or Cilk is disabled) # endif #endif @@ -81,8 +76,9 @@ __CILKRTS_BEGIN_EXTERN_C -/** @brief Return values from __cilkrts_set_param() and __cilkrts_set_param_w() */ -enum { +/** Return values from __cilkrts_set_param() and __cilkrts_set_param_w() + */ +enum __cilkrts_set_param_status { __CILKRTS_SET_PARAM_SUCCESS = 0, /**< Success - parameter set */ __CILKRTS_SET_PARAM_UNIMP = 1, /**< Unimplemented parameter */ __CILKRTS_SET_PARAM_XRANGE = 2, /**< Parameter value out of range */ @@ -90,42 +86,73 @@ enum { __CILKRTS_SET_PARAM_LATE = 4 /**< Too late to change parameter value */ }; -/** - * @brief Set user controllable parameters +/** Set user controllable runtime parameters + * + * Call this function to set runtime parameters that control the behavior + * of the Cilk scheduler. + * + * @param param A string specifying the parameter to be set. One of: + * - `"nworkers"` + * - `"force reduce"` + * @param value A string specifying the parameter value. + * @returns A value from the @ref __cilkrts_set_param_status + * enumeration indicating the result of the operation. + * + * @par The "nworkers" parameter * - * @param param - string specifying parameter to be set - * @param value - string specifying new value - * @returns One of: __CILKRTS_SET_PARAM_SUCCESS ( = 0), - * __CILKRTS_SET_PARAM_UNIMP, __CILKRTS_SET_PARAM_XRANGE, - * __CILKRTS_SET_PARAM_INVALID, or __CILKRTS_SET_PARAM_LATE. + * This parameter specifies the number of worker threads to be created by the + * Cilk runtime. @a Value must be a string of digits to be parsed by + * `strtol()`. * - * @attention The wide character function __cilkrts_set_param_w() is available - * only on Windows. + * The number of worker threads is: + * 1. the value set with `__cilkrts_set_param("nworkers")`, if it is + * positive; otherwise, + * 2. the value of the CILK_NWORKERS environment variable, if it is + * defined; otherwise + * 3. the number of cores available, as reported by the operating system. * - * Allowable parameter names: + * @note + * Technically, Cilk distinguishes between the _user thread_ (the thread that + * the user code was executing on when the Cilk runtime started), and + * _worker threads_ (new threads created by the Cilk runtime to support + * Cilk parallelism). `nworkers` actually includes both the user thread and + * the worker threads; that is, it is one greater than the number of true + * “worker threads”. * - * - "nworkers" - number of cores that should run Cilk code. The value is a - * string of digits to be parsed by strtol. Negative numbers are not valid - * for "nworkers". + * @note + * Setting `nworkers = 1` produces serial behavior. Cilk spawns and syncs will + * be executed, but with only one worker, continuations will never be stolen, + * so all code will execute in serial. * - * The precedence for "nworkers" is: - * 1) __cilkrts_set_param("nworkers") - * 2) The CILK_NWORKERS environment variable - * 3) The number of cores returned by the OS. + * @warning + * The number of worker threads can only be set *before* the runtime has + * started. Attempting to set it when the runtime is running will have no + * effect, and will return an error code. You can call __cilkrts_end_cilk() + * to shut down the runtime to change the number of workers. * - * Setting "nworkers" to "0" sets the number of workers to the value of - * CILK_NWORKERS environment number or the number of cores returned by the - * OS. + * @warning + * The default Cilk scheduler behavior is usually pretty good. The ability + * to override `nworkers` can be useful for experimentation, but it won’t + * usually be necessary for getting good performance. * - * "nworkers" can only be set *before* the runtime has started. Attempting - * to set "nworkers" when the runtime is running will return an error code. - * You can use __cilkrts_end_cilk() to shut down the runtime to change the - * number of workers. + * @par The "force reduce" parameter * - * - "force reduce" - test reducer callbacks by allocating new views - * for every spawn within which a reducer is accessed. This can - * significantly reduce performance. The value is "1" or "true" - * to enable, "0" or "false" to disable. + * This parameter controls whether the runtime should allocate a new view + * for a reducer for every parallel strand that it is accessed on. (See + * @ref pagereducers.) @a Value must be `"1"` or `"true"` to enable the + * “force reduce” behavior, or `"0"` or `"false"` to disable it. + * + * “Force reduce” behavior will also be enabled if + * `__cilkrts_set_param("force reduce")` is not called, but the + * `CILK_FORCE_REDUCE` environment variable is defined. + * + * @warning + * When this option is enabled, `nworkers` should be set to `1`. Using “force + * reduce” with more than one worker may result in runtime errors. + * + * @warning + * Enabling this option can significantly reduce performance. It should + * _only_ be used as a debugging tool. */ CILK_API(int) __cilkrts_set_param(const char *param, const char *value); @@ -133,73 +160,81 @@ CILK_API(int) __cilkrts_set_param(const char *param, const char *value); /** * Set user controllable parameters using wide strings * + * @note This variant of __cilkrts_set_param() is only available + * on Windows. + * * @copydetails __cilkrts_set_param */ CILK_API(int) __cilkrts_set_param_w(const wchar_t *param, const wchar_t *value); #endif -/** - * Shut down and deallocate all Cilk state. The runtime will abort the - * application if Cilk is still in use by this thread. Otherwise the runtime - * will wait for all other threads using Cilk to exit. +/** Shut down and deallocate all Cilk state. The runtime will abort the + * application if Cilk is still in use by this thread. Otherwise the runtime + * will wait for all other threads using Cilk to exit. */ CILK_API(void) __cilkrts_end_cilk(void); -/** - * Allocate Cilk data structures, starting the runtime. +/** Initialize the Cilk data structures and start the runtime. */ CILK_API(void) __cilkrts_init(void); -/** - * Return the number of worker threads that this instance of Cilk - * will attempt to use. +/** Return the runtime `nworkers` parameter. (See the discussion of `nworkers` + * in the documentation for __cilkrts_set_param().) */ CILK_API(int) __cilkrts_get_nworkers(void); -/** - *Return the number of worker threads allocated. +/** Return the number of thread data structures. + * + * This function returns the number of data structures that has been allocated + * allocated by the runtime to hold information about user and worker threads. + * + * If you don’t already know what this is good for, then you probably don’t + * need it. */ CILK_API(int) __cilkrts_get_total_workers(void); -/** - * Return a small integer indicating which Cilk worker the function is - * currently running on. Each thread started by the Cilk runtime library - * (referred to as a system worker) has a unique worker number in the range - * 1..P-1, where P is the value returned by __cilkrts_get_nworkers(). - * - * Note that all threads started by the user or by other libraries (referred - * to as user workers) share the worker number 0. Therefore, the worker number - * is not unique across multiple user threads. +/** What thread is the function running on? + * + * Return a small integer identifying the current thread. Each worker thread + * started by the Cilk runtime library has a unique worker number in the range + * `1 .. nworkers - 1`. + * + * All _user_ threads (threads started by the user, or by other libraries) are + * identified as worker number 0. Therefore, the worker number is not unique + * across multiple user threads. */ CILK_API(int) __cilkrts_get_worker_number(void); -/** - * Return non-zero if force reduce mode is on +/** Test whether “force reduce” behavior is enabled. + * + * @return Non-zero if force-reduce mode is on, zero if it is off. */ CILK_API(int) __cilkrts_get_force_reduce(void); -/** - * Interact with tools +/** Interact with tools */ CILK_API(void) __cilkrts_metacall(unsigned int tool, unsigned int code, void *data); #ifdef _WIN32 +/// Windows exception description record. typedef struct _EXCEPTION_RECORD _EXCEPTION_RECORD; -/** Callback function signature for Windows exception notification */ +/** Function signature for Windows exception notification callbacks. + */ typedef void (*__cilkrts_pfn_seh_callback)(const _EXCEPTION_RECORD *exception); -/** - * Debugging aid for exceptions on Windows. +/** Specify a function to call when a non-C++ exception is caught. + * + * Cilk Plus parallelism plays nicely with C++ exception handling, but the + * Cilk Plus runtime has no way to unwind the stack across a strand boundary + * for Microsoft SEH (“Structured Exception Handling”) exceptions. Therefore, + * when the runtime catches such an exception, it must abort the application. * - * The specified function will be called when a non-C++ exception is caught - * by the Cilk Plus runtime. This is illegal since there's no way for the - * Cilk Plus runtime to know how to unwind the stack across a strand boundary - * for Structure Exceptions. + * If an SEH callback has been set, the runtime will call it before aborting. * - * This function allows an application to do something before the Cilk Plus - * runtime aborts the application. + * @param pfn A pointer to a callback function to be called before the + * runtime aborts the program because of an SEH exception. */ CILK_API(int) __cilkrts_set_seh_callback(__cilkrts_pfn_seh_callback pfn); #endif /* _WIN32 */ @@ -207,25 +242,36 @@ CILK_API(int) __cilkrts_set_seh_callback(__cilkrts_pfn_seh_callback pfn); #if __CILKRTS_ABI_VERSION >= 1 /* Pedigree API is available only for compilers that use ABI version >= 1. */ -/** - * Pedigree API + +/** @name Pedigrees */ +//@{ -/* Internal implementation of __cilkrts_get_pedigree */ +// @cond internal + +/** Support for __cilkrts_get_pedigree. + */ CILK_API(__cilkrts_pedigree) __cilkrts_get_pedigree_internal(__cilkrts_worker *w); -/** - * @brief Returns the current pedigree, in a linked list representation. +/** Support for __cilkrts_bump_worker_rank. + */ +CILK_API(int) +__cilkrts_bump_worker_rank_internal(__cilkrts_worker* w); + +/// @endcond + + +/** Get the current pedigree, in a linked list representation. * - * This routine returns a copy of the last node in the pedigree list. - * For example, if the current pedigree (in order) is <1, 2, 3, 4>, - * then this method returns a node with rank == 4, and whose parent - * field points to the node with rank of 3. In summary, following the - * nodes in the chain visits the terms of the pedigree in reverse. + * This routine returns a copy of the last node in the pedigree list. + * For example, if the current pedigree (in order) is <1, 2, 3, 4>, + * then this method returns a node with rank == 4, and whose parent + * field points to the node with rank of 3. In summary, following the + * nodes in the chain visits the terms of the pedigree in reverse. * - * The returned node is guaranteed to be valid only until the caller - * of this routine has returned. + * The returned node is guaranteed to be valid only until the caller + * of this routine has returned. */ __CILKRTS_INLINE __cilkrts_pedigree __cilkrts_get_pedigree(void) @@ -233,13 +279,16 @@ __cilkrts_pedigree __cilkrts_get_pedigree(void) return __cilkrts_get_pedigree_internal(__cilkrts_get_tls_worker()); } -/** - * @brief DEPRECATED -- Context used by __cilkrts_get_pedigree_info. +/** Context used by __cilkrts_get_pedigree_info. + * + * @deprecated + * This data structure is only used by the deprecated + * __cilkrts_get_pedigree_info function. * - * Callers should initialize the - * data array to NULL, and set the size to sizeof(__cilkrts_pedigree_context_t - * before the first call to __cilkrts_get_pedigree_info and should not examine - * or modify it after. + * Callers should initialize the `data` array to NULL and set the `size` + * field to `sizeof(__cilkrts_pedigree_context_t)` before the first call + * to __cilkrts_get_pedigree_info(), and should not examine or modify it + * thereafter. */ typedef struct { @@ -247,17 +296,19 @@ typedef struct void *data[3]; /**< Opaque context data */ } __cilkrts_pedigree_context_t; -/** - * @brief DEPRECATED -- Use __cilkrts_get_pedigree instead. +/** Get pedigree information. + * + * @deprecated + * Use __cilkrts_get_pedigree() instead. * - * This routine allows code to walk up the stack of Cilk frames to gather - * the pedigree. + * This routine allows code to walk up the stack of Cilk frames to gather + * the pedigree. * - * Initialize the pedigree walk by filling the pedigree context with NULLs - * and setting the size field to sizeof(__cilkrts_pedigree_context). - * Other than initialization to NULL to start the walk, user coder should - * consider the pedigree context data opaque and should not examine or - * modify it. + * Initialize the pedigree walk by filling the pedigree context with NULLs + * and setting the size field to sizeof(__cilkrts_pedigree_context). + * Other than initialization to NULL to start the walk, user coder should + * consider the pedigree context data opaque and should not examine or + * modify it. * * @returns 0 - Success - birthrank is valid * @returns >0 - End of pedigree walk @@ -270,10 +321,10 @@ CILK_API(int) __cilkrts_get_pedigree_info(/* In/Out */ __cilkrts_pedigree_context_t *context, /* Out */ uint64_t *sf_birthrank); -/** - * @brief DEPRECATED -- Use __cilkrts_get_pedigree().rank instead. +/** Get the rank of the currently executing worker. * - * Fetch the rank from the currently executing worker + * @deprecated + * Use `__cilkrts_get_pedigree().rank` instead. * * @returns 0 - Success - *rank is valid * @returns <0 - Failure - *rank is not changed @@ -285,12 +336,7 @@ int __cilkrts_get_worker_rank(uint64_t *rank) return 0; } -/* Internal implementation of __cilkrts_bump_worker_rank */ -CILK_API(int) -__cilkrts_bump_worker_rank_internal(__cilkrts_worker* w); - -/** - * @brief Increment the pedigree rank of the currently executing worker +/** Increment the pedigree rank of the currently executing worker. * * @returns 0 - Success - rank was incremented * @returns-1 - Failure @@ -301,57 +347,33 @@ int __cilkrts_bump_worker_rank(void) return __cilkrts_bump_worker_rank_internal(__cilkrts_get_tls_worker()); } -/* Internal implementation of __cilkrts_bump_worker_rank */ -CILK_API(int) -__cilkrts_bump_loop_rank_internal(__cilkrts_worker* w); - -/** - * @brief Increment the pedigree rank for a cilk_for loop. - * - * A cilk_for loop is implemented using a divide and conquer recursive - * algorithm. This allows the work of the cilk_for loop to spread optimally - * across the available workers. Unfortunately, this makes the pedigree - * for dependent on the grainsize. Unless overridden by the cilk grainsize - * pragma, the grainsize is based on number of workers and the number of - * iterations in the loop. - * - * To fix this, the pedigree is "flattened" in a cilk_for. A pedigree node is - * created for the loop index, and a second node is created for the loop body. - * The compiler generates a lambda function from the loop body that is passed - * the low and high bounds of the loop indicies it should iterate over. This - * range is the "grain size". When the loop body lambda function is called, - * the pedigree rank of the loop index node is initialized to the lower loop - * index. - * - * Eventually, the compiler generated loop body lambda function should - * increment the cilk_for rank at the end of each iteration around the - * cilk_for loop body. However, this is not currently implemented. - * - * This function is provided to allow users to increment the cilk_for rank - * themselves. Users should call this function only at the end of a cilk_for - * loop body. Use of this function is not required. If not used, the - * pedigree sequence will change any time the loop's grainsize changes, i.e., - * if the program is run with a different number of workers. - * - * When the code generated by the compiler for the cilk_for loop body - * "does the right thing" this function will become a noop. - * - * @returns 0 - Success - rank was incremented - * @returns -1 - Failure +/** Increment the pedigree rank for a cilk_for loop. + * Obsolete. + * + * @deprecated + * This function was provided to allow the user to manipulate the pedigree + * rank of a `cilk_for` loop. The compiler now generates code to do that + * manipulation automatically, so this function is now unnecessary. It may + * be called, but will have no effect. */ CILK_EXPORT_AND_INLINE int __cilkrts_bump_loop_rank(void) { - return __cilkrts_bump_loop_rank_internal(__cilkrts_get_tls_worker()); + return 0; } +//@} + #endif /* __CILKRTS_ABI_VERSION >= 1 */ __CILKRTS_END_EXTERN_C #else /* CILK_STUB */ -/* Stubs for the api functions */ +// Programs compiled with CILK_STUB are not linked with the Cilk runtime +// library, so they should not have external references to runtime functions. +// Therefore, the functions are replaced with stubs. + #ifdef _WIN32 #define __cilkrts_set_param_w(name,value) ((value), 0) #define __cilkrts_set_seh_callback(pfn) (0) @@ -372,7 +394,7 @@ __CILKRTS_END_EXTERN_C #define __cilkrts_bump_worker_rank() (-1) #define __cilkrts_bump_loop_rank() (-1) -/** +/* * A stub method for __cilkrts_get_pedigree. * Returns an empty __cilkrts_pedigree. */ @@ -392,4 +414,6 @@ __cilkrts_pedigree __cilkrts_get_pedigree_stub(void) #endif /* CILK_STUB */ +//@} + #endif /* INCLUDED_CILK_API_H */ diff --git a/libcilkrts/include/cilk/cilk_api_linux.h b/libcilkrts/include/cilk/cilk_api_linux.h index c4d2dad78c6..eae7be4c8f4 100644 --- a/libcilkrts/include/cilk/cilk_api_linux.h +++ b/libcilkrts/include/cilk/cilk_api_linux.h @@ -1,26 +1,31 @@ /* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * */ diff --git a/libcilkrts/include/cilk/cilk_stub.h b/libcilkrts/include/cilk/cilk_stub.h index 80f0c3e0207..67df008c4d7 100644 --- a/libcilkrts/include/cilk/cilk_stub.h +++ b/libcilkrts/include/cilk/cilk_stub.h @@ -1,27 +1,32 @@ /* cilk_stub.h -*-C++-*- * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * */ diff --git a/libcilkrts/include/cilk/cilk_undocumented.h b/libcilkrts/include/cilk/cilk_undocumented.h index 7eca7ee08f0..9163492c821 100644 --- a/libcilkrts/include/cilk/cilk_undocumented.h +++ b/libcilkrts/include/cilk/cilk_undocumented.h @@ -1,26 +1,31 @@ /* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * ****************************************************************************** * @@ -95,15 +100,17 @@ CILK_EXPORT __CILKRTS_NOTHROW int __cilkrts_watch_stack(struct __cilk_tbb_unwatch_thunk *u, struct __cilk_tbb_stack_op_thunk o); +#ifndef IN_CILK_RUNTIME #ifdef _WIN32 /* Do not use CILK_API because __cilkrts_worker_stub must be __stdcall */ CILK_EXPORT unsigned __CILKRTS_NOTHROW __stdcall __cilkrts_worker_stub(void *arg); #else -/* Do not use CILK_API because __cilkrts_worker_stub have defauld visibility */ -__attribute__((visibility("default"))) -void* __CILKRTS_NOTHROW __cilkrts_worker_stub(void *arg); -#endif +/* Do not use CILK_API because __cilkrts_worker_stub have default visibility */ +CILK_EXPORT void* __CILKRTS_NOTHROW +__cilkrts_worker_stub(void *arg); +#endif /* _WIN32 */ +#endif /* IN_CILK_RUNTIME */ __CILKRTS_END_EXTERN_C diff --git a/libcilkrts/include/cilk/common.h b/libcilkrts/include/cilk/common.h index bc28100bfcb..aeff8f358e6 100644 --- a/libcilkrts/include/cilk/common.h +++ b/libcilkrts/include/cilk/common.h @@ -1,39 +1,74 @@ -/* - * Copyright (C) 2010-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. +/** common.h * + * @copyright + * Copyright (C) 2010-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/** @file common.h + * + * @brief Defines common macros and structures used by the Intel Cilk Plus + * runtime. + * + * @ingroup common */ +/** @defgroup common Common Definitions + * Macro, structure, and class definitions used elsewhere in the runtime. + * @{ + */ + #ifndef INCLUDED_CILK_COMMON #define INCLUDED_CILK_COMMON #ifdef __cplusplus +/** Namespace for all Cilk definitions that can be included in user code. + */ +namespace cilk { + + /** Namespace for definitions that are primarily intended for use + * in other Cilk definitions. + */ + namespace internal {} +} +#endif + +/** Cilk library version = 1.0 + */ +#define CILK_LIBRARY_VERSION 100 + +#ifdef __cplusplus # include <cassert> #else # include <assert.h> #endif -/* Prefix standard library function and type names with __STDNS in order to +/** + * Prefix standard library function and type names with __STDNS in order to * get correct lookup in both C and C++. */ #ifdef __cplusplus @@ -42,8 +77,12 @@ # define __STDNS #endif -/* CILK_EXPORT - Define export of runtime functions from shared library. +/** + * @def CILK_EXPORT + * Define export of runtime functions from shared library. * Should be exported only from cilkrts*.dll/cilkrts*.so + * @def CILK_EXPORT_DATA + * Define export of runtime data from shared library. */ #ifdef _WIN32 # ifdef IN_CILK_RUNTIME @@ -53,7 +92,7 @@ # define CILK_EXPORT __declspec(dllimport) # define CILK_EXPORT_DATA __declspec(dllimport) # endif /* IN_CILK_RUNTIME */ -#elif defined(__CYGWIN__) +#elif defined(__CYGWIN__) || defined(__APPLE__) || defined(_DARWIN_C_SOURCE) # define CILK_EXPORT /* nothing */ # define CILK_EXPORT_DATA /* nothing */ #else /* Unix/gcc */ @@ -66,14 +105,39 @@ # endif /* IN_CILK_RUNTIME */ #endif /* Unix/gcc */ +/** + * @def __CILKRTS_BEGIN_EXTERN_C + * Macro to denote the start of a section in which all names have "C" linkage. + * That is, none of the names are to be mangled. + * @see __CILKRTS_END_EXTERN_C + * @see __CILKRTS_EXTERN_C + * + * @def __CILKRTS_END_EXTERN_C + * Macro to denote the end of a section in which all names have "C" linkage. + * That is, none of the names are to be mangled. + * @see __CILKRTS_BEGIN_EXTERN_C + * @see __CILKRTS_EXTERN_C + * + * @def __CILKRTS_EXTERN_C + * Macro to prefix a single definition which has "C" linkage. + * That is, the defined name is not to be mangled. + * @see __CILKRTS_BEGIN_EXTERN_C + * @see __CILKRTS_END_EXTERN_C + */ #ifdef __cplusplus -# define __CILKRTS_BEGIN_EXTERN_C extern "C" { -# define __CILKRTS_END_EXTERN_C } +# define __CILKRTS_BEGIN_EXTERN_C extern "C" { +# define __CILKRTS_END_EXTERN_C } +# define __CILKRTS_EXTERN_C extern "C" #else # define __CILKRTS_BEGIN_EXTERN_C # define __CILKRTS_END_EXTERN_C +# define __CILKRTS_EXTERN_C #endif +/** + * OS-independent macro to specify a function which is known to not throw + * an exception. + */ #ifdef __cplusplus # ifdef _WIN32 # define __CILKRTS_NOTHROW __declspec(nothrow) @@ -84,15 +148,31 @@ # define __CILKRTS_NOTHROW /* nothing */ #endif /* __cplusplus */ +/** Cache alignment. (Good enough for most architectures.) + */ +#define __CILKRTS_CACHE_LINE__ 64 + +/** + * Macro to specify alignment of a data member in a structure. + */ #ifdef _WIN32 # define CILK_ALIGNAS(n) __declspec(align(n)) #else /* Unix/gcc */ -# define CILK_ALIGNAS(n) __attribute__((aligned(n))) +# define CILK_ALIGNAS(n) __attribute__((__aligned__(n))) #endif /* Unix/gcc */ -/* CILK_API: Called explicitly by the programmer. - * CILK_ABI: Called by compiler-generated code. - * CILK_ABI_THROWS: An ABI function that may throw an exception +/** + * Macro to specify cache-line alignment of a data member in a structure. + */ +#define __CILKRTS_CACHE_ALIGN CILK_ALIGNAS(__CILKRTS_CACHE_LINE__) + +/** + * @def CILK_API(RET_TYPE) + * A function called explicitly by the programmer. + * @def CILK_ABI(RET_TYPE) + * A function called by compiler-generated code. + * @def CILK_ABI_THROWS(RET_TYPE) + * An ABI function that may throw an exception * * Even when these are the same definitions, they should be separate macros so * that they can be easily found in the code. @@ -108,7 +188,8 @@ # define CILK_ABI_THROWS(RET_TYPE) CILK_EXPORT RET_TYPE #endif -/* __CILKRTS_ASSERT should be defined for debugging only, otherwise it +/** + * __CILKRTS_ASSERT should be defined for debugging only, otherwise it * interferes with vectorization. Since NDEBUG is not reliable (it must be * set by the user), we must use a platform-specific detection of debug mode. */ @@ -126,7 +207,9 @@ # define __CILKRTS_ASSERT(e) ((void) 0) #endif -// Inlining is always available, but not always the same way. +/** + * OS-independent macro to specify a function that should be inlined + */ #ifdef __cpluspus // C++ # define __CILKRTS_INLINE inline @@ -137,22 +220,26 @@ // C89 on Windows # define __CILKRTS_INLINE __inline #else - // C89 on Linux -# define __CILKRTS_INLINE __inline__ + // C89 on GCC-compatible systems +# define __CILKRTS_INLINE extern __inline__ #endif -// Functions marked as CILK_EXPORT_AND_INLINE have both -// inline versions defined in the Cilk API, as well as -// non-inlined versions that are exported (for -// compatibility with previous versions that did not -// inline the functions). +/** + * Functions marked as CILK_EXPORT_AND_INLINE have both + * inline versions defined in the Cilk API, as well as + * non-inlined versions that are exported (for + * compatibility with previous versions that did not + * inline the functions). + */ #ifdef COMPILING_CILK_API_FUNCTIONS # define CILK_EXPORT_AND_INLINE CILK_EXPORT #else # define CILK_EXPORT_AND_INLINE __CILKRTS_INLINE #endif -// Try to determine if compiler supports rvalue references. +/** + * Try to determine if compiler supports rvalue references. + */ #if defined(__cplusplus) && !defined(__CILKRTS_RVALUE_REFERENCES) # if __cplusplus >= 201103L // C++11 # define __CILKRTS_RVALUE_REFERENCES 1 @@ -223,17 +310,35 @@ // the internal version of API methods require a worker // structure as parameter. __CILKRTS_BEGIN_EXTERN_C + /// Worker struct, exported for inlined API methods + /// @ingroup api struct __cilkrts_worker; - typedef struct __cilkrts_worker __cilkrts_worker; - typedef struct __cilkrts_worker *__cilkrts_worker_ptr; + + /// Worker struct, exported for inlined API methods + /// @ingroup api + typedef struct __cilkrts_worker __cilkrts_worker; + + /// Worker struct pointer, exported for inlined API methods + /// @ingroup api + typedef struct __cilkrts_worker *__cilkrts_worker_ptr; + + + /// Fetch the worker out of TLS. CILK_ABI(__cilkrts_worker_ptr) __cilkrts_get_tls_worker(void); + + /// void *, defined to work around complaints from the compiler + /// about using __declspec(nothrow) after the "void *" return type + typedef void * __cilkrts_void_ptr; + __CILKRTS_END_EXTERN_C #if __CILKRTS_ABI_VERSION >= 1 // Pedigree API is available only for compilers that use ABI version >= 1. -/** Pedigree information kept in the worker and stack frame */ +/** Pedigree information kept in the worker and stack frame. + * @ingroup api + */ typedef struct __cilkrts_pedigree { /** Rank at start of spawn helper. Saved rank for spawning functions */ @@ -245,4 +350,6 @@ typedef struct __cilkrts_pedigree #endif // __CILKRTS_ABI_VERSION >= 1 +/// @} + #endif /* INCLUDED_CILK_COMMON */ diff --git a/libcilkrts/include/cilk/holder.h b/libcilkrts/include/cilk/holder.h index 87778ed84c1..ac80f42d1d2 100644 --- a/libcilkrts/include/cilk/holder.h +++ b/libcilkrts/include/cilk/holder.h @@ -1,26 +1,31 @@ /* - * Copyright (C) 2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * */ diff --git a/libcilkrts/include/cilk/hyperobject_base.h b/libcilkrts/include/cilk/hyperobject_base.h index 73279065f99..5d0393e1b04 100644 --- a/libcilkrts/include/cilk/hyperobject_base.h +++ b/libcilkrts/include/cilk/hyperobject_base.h @@ -1,26 +1,31 @@ /* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * */ @@ -45,19 +50,6 @@ # endif #endif -/* Macro to cache-align a declaration. Argument(s) comprise either a - * variable or a struct declaration. */ -#define __CILKRTS_CACHE_LINE__ 64 /* Good enough for most architectures */ -#if defined(__INTEL_COMPILER) || defined(_WIN32) -# define __CILKRTS_CACHE_ALIGNED(...) \ - __declspec(align(__CILKRTS_CACHE_LINE__)) __VA_ARGS__ -#elif defined(__GNUC__) -# define __CILKRTS_CACHE_ALIGNED(...) \ - __VA_ARGS__ __attribute__((__aligned__(__CILKRTS_CACHE_LINE__))) -#else -# define __CILKRTS_CACHE_ALIGNED(...) __VA_ARGS__ -#endif - /* The __CILKRTS_STRAND_PURE attribute tells the compiler that the value * returned by 'func' for a given argument to 'func' will remain valid until * the next strand boundary (spawn or sync) or until the next call to a @@ -106,6 +98,9 @@ typedef struct __cilkrts_hyperobject_base __STDNS size_t __view_size; /* Size of each view */ } __cilkrts_hyperobject_base; + +#ifndef CILK_STUB + /* Library functions. */ CILK_EXPORT void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key); @@ -123,6 +118,50 @@ CILK_EXPORT CILK_EXPORT void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2); + +#else // CILK_STUB + +// Programs compiled with CILK_STUB are not linked with the Cilk runtime +// library, so they should not have external references to cilkrts functions. +// Furthermore, they don't need the hyperobject functionality, so the +// functions can be stubbed. + +#define __cilkrts_hyperobject_create __cilkrts_hyperobject_create__stub +__CILKRTS_INLINE + void __cilkrts_hyper_create(__cilkrts_hyperobject_base *key) + {} + +#define __cilkrts_hyperobject_destroy __cilkrts_hyperobject_destroy__stub +__CILKRTS_INLINE + void __cilkrts_hyper_destroy(__cilkrts_hyperobject_base *key) + {} + +#define __cilkrts_hyperobject_lookup __cilkrts_hyperobject_lookup__stub +__CILKRTS_INLINE + void* __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *key) + { return (char*)(key) + key->__view_offset; } + +// Pointers to these functions are stored into monoids, so real functions +// are needed. + +#define __cilkrts_hyperobject_alloc __cilkrts_hyperobject_alloc__stub +__CILKRTS_INLINE + void* __cilkrts_hyperobject_alloc(void* ignore, __STDNS size_t bytes) + { assert(0); return __STDNS malloc(bytes); } + +#define __cilkrts_hyperobject_dealloc __cilkrts_hyperobject_dealloc__stub +__CILKRTS_INLINE + void __cilkrts_hyperobject_dealloc(void* ignore, void* view) + { assert(0); __STDNS free(view); } + +#define __cilkrts_hyperobject_noop_destroy \ + __cilkrts_hyperobject_noop_destroy__stub +__CILKRTS_INLINE + void __cilkrts_hyperobject_noop_destroy(void* ignore, void* ignore2) + {} + +#endif + __CILKRTS_END_EXTERN_C #endif /* INCLUDED_CILK_HYPEROBJECT_BASE */ diff --git a/libcilkrts/include/cilk/metaprogramming.h b/libcilkrts/include/cilk/metaprogramming.h new file mode 100644 index 00000000000..6ef8c063688 --- /dev/null +++ b/libcilkrts/include/cilk/metaprogramming.h @@ -0,0 +1,523 @@ +/* metaprogramming.h -*- C++ -*- + * + * @copyright + * Copyright (C) 2012-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/** @file metaprogramming.h + * + * @brief Defines metaprogramming utility classes used in the Cilk library. + * + * @ingroup common + */ + +#ifndef METAPROGRAMMING_H_INCLUDED +#define METAPROGRAMMING_H_INCLUDED + +#ifdef __cplusplus + +#include <functional> +#include <new> +#include <cstdlib> +#ifdef _WIN32 +#include <malloc.h> +#endif +#include <algorithm> + +namespace cilk { + +namespace internal { + +/** Test if a class is empty. + * + * If @a Class is an empty (and therefore necessarily stateless) class, then + * the “empty base-class optimization” guarantees that + * `sizeof(check_for_empty_class<Class>) == sizeof(char)`. Conversely, if + * `sizeof(check_for_empty_class<Class>) > sizeof(char)`, then @a Class is not + * empty, and we must discriminate distinct instances of @a Class. + * + * Typical usage: + * + * // General definition of A<B> for non-empty B: + * template <typename B, bool BIsEmpty = class_is_empty<B>::value> > + * class A { ... }; + * + * // Specialized definition of A<B> for empty B: + * template <typename B> + * class A<B, true> { ... }; + * + * @tparam Class The class to be tested for emptiness. + * + * @result The `value` member will be `true` if @a Class is empty, + * `false` otherwise. + * + * @ingroup common + */ +template <class Class> +class class_is_empty { + class check_for_empty_class : public Class + { + char m_data; + public: + // Declared but not defined + check_for_empty_class(); + check_for_empty_class(const check_for_empty_class&); + check_for_empty_class& operator=(const check_for_empty_class&); + ~check_for_empty_class(); + }; +public: + + /** Constant is true if and only if @a Class is empty. + */ + static const bool value = (sizeof(check_for_empty_class) == sizeof(char)); +}; + + +/** Compute the alignment of a type. (More precisely, the alignment of a data + * member of the type in a structure.) + * + * For example: + * + * align_of<double>::value == 8 + * + * Adapted from the [AlignOf](http://llvm.org/doxygen/AlignOf_8h_source.html) + * class used in [LLVM](http://llvm.org). + * + * @tparam T The type whose alignment is to be computed. + * + * @result `value` will be the alignment for type @a T. + * + * @see alignof() + * + * @ingroup common + */ +template <typename T> +class align_of { + + struct impl { + char x; + T t; + impl(); // Never instantiate. + impl(const impl&); + }; + +public: + enum { + /** The alignment of the type @a T. + */ + value = static_cast<std::size_t>(sizeof(impl) - sizeof(T)) + }; +}; + + +/** Get the functor class corresponding to a binary function type. + * + * The `binary_functor` template class class can be instantiated with a binary + * functor class or with a real binary function, and will yield an equivalent + * binary functor class class in either case. + * + * @tparam F A binary functor class, a binary function type, or a pointer to + * binary function type. + * + * @result `binary_functor<F>::%type` will be the same as @a F if @a F is + * a class. It will be a `std::pointer_to_binary_function` wrapper + * if @a F is a binary function or binary function pointer type. + * (It will _not_ necessarily be an `Adaptable Binary Function` + * class, since @a F might be a non-adaptable binary functor + * class.) + * + * @ingroup common + */ +template <typename F> +struct binary_functor { + /// The binary functor class equivalent to @a F. + typedef F type; +}; + +/// @copydoc binary_functor +/// Specialization for binary function. +template <typename R, typename A, typename B> +struct binary_functor<R(A,B)> { + /// The binary functor class equivalent to @a F. + typedef std::pointer_to_binary_function<A, B, R> type; +}; + +/// @copydoc binary_functor +/// Specialization for pointer to binary function. +template <typename R, typename A, typename B> +struct binary_functor<R(*)(A,B)> { + /// The binary functor class equivalent to @a F. + typedef std::pointer_to_binary_function<A, B, R> type; +}; + + +/** Indirect binary function class with specified types. + * + * `typed_indirect_binary_function<F>` is an `Adaptable Binary Function` class + * based on an existing binary functor class or binary function type @a F. If + * @a F is a stateless class, then this class will be empty, and its + * `operator()` will invoke @a F’s `operator()`. Otherwise, an object of this + * class will hold a pointer to an object of type @a F, and will refer its + * `operator()` calls to the pointed-to @a F object. + * + * That is, suppose that we have the declarations: + * + * F *p; + * typed_indirect_binary_function<F, int, int, bool> ibf(p); + * + * Then: + * + * - `ibf(x, y) == (*p)(x, y)`. + * - `ibf(x, y)` will not do a pointer dereference if `F` is an empty class. + * + * @note Just to repeat: if `F` is an empty class, then + * `typed_indirect_binary_function\<F\>' is also an empty class. + * This is critical for its use in the @ref min_max::view_base + * "min/max reducer view classes", where it allows the view to + * call a comparison functor in the monoid without actually + * having to allocate a pointer in the view class when the + * comparison class is empty. + * + * @note If you have an `Adaptable Binary Function` class or a binary + * function type, then you can use the + * @ref indirect_binary_function class, which derives the + * argument and result types parameter type instead of requiring + * you to specify them as template arguments. + * + * @tparam F A binary functor class, a binary function type, or a pointer to + * binary function type. + * @param A1 The first argument type. + * @param A2 The second argument type. + * @param R The result type. + * + * @see min_max::comparator_base + * @see indirect_binary_function + * + * @ingroup common + */ +template < typename F + , typename A1 + , typename A2 + , typename R + , typename Functor = typename binary_functor<F>::type + , bool FunctorIsEmpty = class_is_empty<Functor>::value + > +class typed_indirect_binary_function : std::binary_function<A1, A2, R> +{ + const F* f; +public: + /// Constructor captures a pointer to the wrapped function. + typed_indirect_binary_function(const F* f) : f(f) {} + + /// Return the comparator pointer, or `NULL` if the comparator is stateless. + const F* pointer() const { return f; } + + /// Apply the pointed-to functor to the arguments. + R operator()(const A1& a1, const A2& a2) const { return (*f)(a1, a2); } +}; + + +/// @copydoc typed_indirect_binary_function +/// Specialization for an empty functor class. (This is only possible if @a F +/// itself is an empty class. If @a F is a function or pointer-to-function +/// type, then the functor will contain a pointer.) +template <typename F, typename A1, typename A2, typename R, typename Functor> +class typed_indirect_binary_function<F, A1, A2, R, Functor, true> : + std::binary_function<A1, A2, R> +{ +public: + /// Return `NULL` for the comparator pointer of a stateless comparator. + const F* pointer() const { return 0; } + + /// Constructor discards the pointer to a stateless functor class. + typed_indirect_binary_function(const F* f) {} + + /// Create an instance of the stateless functor class and apply it to the arguments. + R operator()(const A1& a1, const A2& a2) const { return F()(a1, a2); } +}; + + +/** Indirect binary function class with inferred types. + * + * This is identical to @ref typed_indirect_binary_function, except that it + * derives the binary function argument and result types from the parameter + * type @a F instead of taking them as additional template parameters. If @a F + * is a class type, then it must be an `Adaptable Binary Function`. + * + * @see typed_indirect_binary_function + * + * @ingroup common + */ +template <typename F, typename Functor = typename binary_functor<F>::type> +class indirect_binary_function : + typed_indirect_binary_function< F + , typename Functor::first_argument_type + , typename Functor::second_argument_type + , typename Functor::result_type + > +{ + typedef typed_indirect_binary_function< F + , typename Functor::first_argument_type + , typename Functor::second_argument_type + , typename Functor::result_type + > + base; +public: + indirect_binary_function(const F* f) : base(f) {} ///< Constructor +}; + + +/** Choose a type based on a boolean constant. + * + * This metafunction is identical to C++11’s condition metafunction. + * It needs to be here until we can reasonably assume that users will be + * compiling with C++11. + * + * @tparam Cond A boolean constant. + * @tparam IfTrue A type. + * @tparam IfFalse A type. + * @result The `type` member will be a typedef of @a IfTrue if @a Cond + * is true, and a typedef of @a IfFalse if @a Cond is false. + * + * @ingroup common + */ +template <bool Cond, typename IfTrue, typename IfFalse> +struct condition +{ + typedef IfTrue type; ///< The type selected by the condition. +}; + +/// @copydoc condition +/// Specialization for @a Cond == `false`. +template <typename IfTrue, typename IfFalse> +struct condition<false, IfTrue, IfFalse> +{ + typedef IfFalse type; ///< The type selected by the condition. +}; + + +/** @def __CILKRTS_STATIC_ASSERT + * + * @brief Compile-time assertion. + * + * Causes a compilation error if a compile-time constant expression is false. + * + * @par Usage example. + * This assertion is used in reducer_min_max.h to avoid defining + * legacy reducer classes that would not be binary-compatible with the + * same classes compiled with earlier versions of the reducer library. + * + * __CILKRTS_STATIC_ASSERT( + * internal::class_is_empty< internal::binary_functor<Compare> >::value, + * "cilk::reducer_max<Value, Compare> only works with an empty Compare class"); + * + * @note In a C++11 compiler, this is just the language predefined + * `static_assert` macro. + * + * @note In a non-C++11 compiler, the @a Msg string is not directly included + * in the compiler error message, but it may appear if the compiler + * prints the source line that the error occurred on. + * + * @param Cond The expression to test. + * @param Msg A string explaining the failure. + * + * @ingroup common + */ +#if defined(__INTEL_CXX11_MODE__) || defined(__GXX_EXPERIMENTAL_CXX0X__) +# define __CILKRTS_STATIC_ASSERT(Cond, Msg) static_assert(Cond, Msg) +#else +# define __CILKRTS_STATIC_ASSERT(Cond, Msg) \ + typedef int __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \ + [::cilk::internal::static_assert_failure<(Cond)>::Success] + +/// @cond internal + template <bool> struct static_assert_failure { }; + template <> struct static_assert_failure<true> { enum { Success = 1 }; }; + +# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE \ + __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(__cilkrts_static_assert_, __LINE__) +# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE1(a, b) \ + __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b) +# define __CILKRTS_STATIC_ASSERT_DUMMY_TYPE2(a, b) a ## b +/// @endcond + +#endif + +/// @cond internal + +/** @name Aligned heap management. + */ +//@{ + +/** Implementation-specific aligned memory allocation function. + * + * @param size The minimum number of bytes to allocate. + * @param alignment The required alignment (must be a power of 2). + * @return The address of a block of memory of at least @a size + * bytes. The address will be a multiple of @a alignment. + * `NULL` if the allocation fails. + * + * @see deallocate_aligned() + */ +inline void* allocate_aligned(std::size_t size, std::size_t alignment) +{ +#ifdef _WIN32 + return _aligned_malloc(size, alignment); +#else + void* ptr; + return (posix_memalign(&ptr, std::max(alignment, sizeof(void*)), size) == 0) ? ptr : 0; +#endif +} + +/** Implementation-specific aligned memory deallocation function. + * + * @param ptr A pointer which was returned by a call to alloc_aligned(). + */ +inline void deallocate_aligned(void* ptr) +{ +#ifdef _WIN32 + _aligned_free(ptr); +#else + std::free(ptr); +#endif +} + +/** Class to allocate and guard an aligned pointer. + * + * A new_aligned_pointer object allocates aligned heap-allocated memory when + * it is created, and automatically deallocates it when it is destroyed + * unless its `ok()` function is called. + * + * @tparam T The type of the object to allocate on the heap. The allocated + * will have the size and alignment of an object of type T. + */ +template <typename T> +class new_aligned_pointer { + void* m_ptr; +public: + /// Constructor allocates the pointer. + new_aligned_pointer() : + m_ptr(allocate_aligned(sizeof(T), internal::align_of<T>::value)) {} + /// Destructor deallocates the pointer. + ~new_aligned_pointer() { if (m_ptr) deallocate_aligned(m_ptr); } + /// Get the pointer. + operator void*() { return m_ptr; } + /// Return the pointer and release the guard. + T* ok() { + T* ptr = static_cast<T*>(m_ptr); + m_ptr = 0; + return ptr; + } +}; + +//@} + +/// @endcond + +} // namespace internal + +//@{ + +/** Allocate an aligned data structure on the heap. + * + * `cilk::aligned_new<T>([args])` is equivalent to `new T([args])`, except + * that it guarantees that the returned pointer will be at least as aligned + * as the alignment requirements of type `T`. + * + * @ingroup common + */ +template <typename T> +T* aligned_new() +{ + internal::new_aligned_pointer<T> ptr; + new (ptr) T(); + return ptr.ok(); +} + +template <typename T, typename T1> +T* aligned_new(const T1& x1) +{ + internal::new_aligned_pointer<T> ptr; + new (ptr) T(x1); + return ptr.ok(); +} + +template <typename T, typename T1, typename T2> +T* aligned_new(const T1& x1, const T2& x2) +{ + internal::new_aligned_pointer<T> ptr; + new (ptr) T(x1, x2); + return ptr.ok(); +} + +template <typename T, typename T1, typename T2, typename T3> +T* aligned_new(const T1& x1, const T2& x2, const T3& x3) +{ + internal::new_aligned_pointer<T> ptr; + new (ptr) T(x1, x2, x3); + return ptr.ok(); +} + +template <typename T, typename T1, typename T2, typename T3, typename T4> +T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4) +{ + internal::new_aligned_pointer<T> ptr; + new (ptr) T(x1, x2, x3, x4); + return ptr.ok(); +} + +template <typename T, typename T1, typename T2, typename T3, typename T4, typename T5> +T* aligned_new(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5) +{ + internal::new_aligned_pointer<T> ptr; + new (ptr) T(x1, x2, x3, x4, x5); + return ptr.ok(); +} + +//@} + + +/** Deallocate an aligned data structure on the heap. + * + * `cilk::aligned_delete(ptr)` is equivalent to `delete ptr`, except that it + * operates on a pointer that was allocated by aligned_new(). + * + * @ingroup common + */ +template <typename T> +void aligned_delete(const T* ptr) +{ + ptr->~T(); + internal::deallocate_aligned((void*)ptr); +} + +} // namespace cilk + +#endif // __cplusplus + +#endif // METAPROGRAMMING_H_INCLUDED diff --git a/libcilkrts/include/cilk/reducer.h b/libcilkrts/include/cilk/reducer.h index d44823ccea3..dcbc3cebf50 100644 --- a/libcilkrts/include/cilk/reducer.h +++ b/libcilkrts/include/cilk/reducer.h @@ -1,43 +1,51 @@ -/* reducer.h -*-C++-*- - * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. +/* reducer.h -*- C++ -*- * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ - -#ifndef CILK_REDUCER_H_INCLUDED -#define CILK_REDUCER_H_INCLUDED + +/** @file reducer.h + * + * @brief Defines foundation classes for creating Cilk reducers. + * + * @ingroup Reducers + * + * @see @ref pagereducers + * + * @defgroup Reducers Reducers + */ + +#ifndef REDUCER_H_INCLUDED +#define REDUCER_H_INCLUDED #include "cilk/hyperobject_base.h" +#include "cilk/metaprogramming.h" -/* - * C++ and C interfaces for Cilk reducer hyperobjects - */ - -/* Utility macros */ -#define __CILKRTS_MKIDENT(a,b) __CILKRTS_MKIDENT_IMP(a,b,) -#define __CILKRTS_MKIDENT3(a,b,c) __CILKRTS_MKIDENT_IMP(a,b,c) -#define __CILKRTS_MKIDENT_IMP(a,b,c) a ## b ## c #ifdef __cplusplus @@ -45,583 +53,1832 @@ #include <new> -#ifdef CILK_STUB -// Stub implementations are in the cilk::stub namespace namespace cilk { - namespace stub { } - using namespace stub; -} -#endif -// MONOID CONCEPT AND monoid_base CLASS TEMPLATE -// -// In mathematics, a "monoid" comprises a set of values (type), an associative -// operation on that set, and an identity value for that set and that -// operation. So for example (integer, +, 0) is a monoid, as is (real, *, 1). -// The 'Monoid' concept in Cilk++ has a typedef and three functions that -// represent a that map to a monoid, (T, OP, IDENTITY), as follows: -//.. -// value_type is a typedef for T -// reduce(left,right) evaluates '*left = *left OP *right' -// identity(p) constructs IDENTITY value into the uninitilized '*p' -// destroy(p) calls the destructor on the object pointed-to by 'p' -// allocate(size) return a pointer to size bytes of raw memory -// deallocate(p) deallocate the raw memory at p -//.. -// 'left', 'right', and 'p' are all pointers to objects of type 'value_type'. -// All functions must be either 'static' or 'const'. A class that meets the -// requirements of the 'Monoid' concept is usually stateless, but will -// sometimes contain state used to initialize the identity object. - -namespace cilk { - -/// The 'monoid_base' class template is a useful base class for a large set -/// of monoid classes for which the identity value is a default-constructed -/// value of type 'T', allocated using operator new. A derived class of -/// 'monoid_base' need only declare and implement the 'reduce' function. -template <class T> +/** Base class for defining monoids. + * + * The monoid_base class template is useful for creating classes that model + * the monoid concept. It provides the core type and memory management + * functionality. A subclass of monoid_base need only declare and implement + * the `identity` and `reduce` functions. + * + * The monoid_base class also manages the integration between the monoid, the + * reducer class that is based on it, and an optional view class which wraps + * value objects and restricts access to their operations. + * + * @tparam Value The value type for the monoid. + * @tparam View An optional view class that serves as a proxy for the value + * type. + * + * @see monoid_with_view + */ +template <typename Value, typename View = Value> class monoid_base { -public: - /// Type of value for this monoid - typedef T value_type; +protected: + + /** Class for provisionally constructed objects. + * + * The monoid_base::construct() functions manually construct both a monoid + * and a view. If one of these is constructed successfully, and the + * construction of the other (or some other initialization) fails, then + * the first one must be destroyed to avoid a memory leak. Because the + * construction is explicit, the destruction must be explicit, too. + * + * A provisional_guard object wraps a pointer to a newly constructed + * object. A call to its confirm() function confirms that the object is + * really going to be used. If the guard is destroyed without being + * confirmed, then the pointed-to object is destroyed (but not + * deallocated). + * + * Expected usage: + * + * provisional_guard<T1> x1_provisional( new (x1) T1() ); + * … more initialization … + * x1_provisional.confirm(); + * + * or + * + * provisional_guard<T1> x1_provisional( new (x1) T1() ); + * x1_provisional.confirm_if( new (x2) T2() ); + * + * If an exception is thrown in the “more initialization” code in the + * first example, or in the `T2` constructor in the second example, then + * `x1_provisional` will not be confirmed, so when its destructor is + * called during exception unwinding, the `T1` object that was constructed + * in `x1` will be destroyed. + * + * @see provisional() + * + * @tparam Type The type of the provisionally constructed object. + */ + template <typename Type> + class provisional_guard { + Type* m_ptr; + + public: + + /** Constructor. Creates a guard for a provisionally constructed object. + * + * @param ptr A pointer to the provisionally constructed object. + */ + provisional_guard(Type* ptr) : m_ptr(ptr) {} + + /** Destructor. Destroy the object pointed to by the contained pointer + * if it has not been confirmed. + */ + ~provisional_guard() { if (m_ptr) m_ptr->~Type(); } + + /** Confirm the provisional construction. Do *not* delete the contained + * pointer when the guard is destroyed. + */ + void confirm() { m_ptr = 0; } + + /** Confirm provisional construction if argument is non-null. Note that + * if an exception is thrown during evaluation of the argument + * expression, then this function will not be called, and the + * provisional object will not be confirmed. This allows the usage: + * + * x1_provisional.confirm_if( new (x2) T2() ); + * + * @param cond An arbitrary pointer. The provisional object will be + * confirmed if @a cond is not null. + * + * @returns The value of the @a cond argument. + */ + template <typename Cond> + Cond* confirm_if(Cond* cond) { if (cond) m_ptr = 0; return cond; } + }; - /// Constructs IDENTITY value into the uninitilized '*p' - void identity(T* p) const { new ((void*) p) T(); } + + /** Create a provisional_guard object. This function allows simpler code + * when the only use of a provisional_guard is in a + * provisional_guard::confirm_if() call immediately following its + * creation. Instead of + * + * provisional_guard<T>guard( new (ptr_to_T) T() ); + * guard.confirm_if( new (ptr_to_U) U() ); + * + * you can just write + * + * provisional( new (ptr_to_T) T() ).confirm_if( new (ptr_to_U) U() ); + * + * @tparam Type The type of the provisionally constructed object. + * + * @param ptr A pointer to a provisionally constructed object. + * + * @returns A @ref provisional_guard object that guards the + * provisionally constructed object pointed to by @a ptr. + */ + template <typename Type> + static provisional_guard<Type> provisional(Type* ptr) + { return provisional_guard<Type>(ptr); } - /// Calls the destructor on the object pointed-to by 'p' - void destroy(T* p) const { p->~T(); } +public: - /// Return a pointer to size bytes of raw memory + /** Value type of the monoid. + */ + typedef Value value_type; + + /** View type of the monoid. Defaults to be the same as the value type. + * @see monoid_with_view + */ + typedef View view_type; + + /** Should reducers created with this monoid be aligned? + * Default is true. + */ + enum { align_reducer = true }; + + /** Destroy a view. Destroys (without deallocating) the @a View object + * pointed to by @a p. + * + * @param p The address of the @a View object to be destroyed. + */ + void destroy(view_type* p) const { p->~view_type(); } + + /** Allocate raw memory. Allocate @a s bytes of memory with no + * initialization. + * + * @param s The number of bytes of memory to allocate. + * @return An untyped pointer to the allocated memory. + */ void* allocate(size_t s) const { return operator new(s); } - /// Deallocate the raw memory at p + /** Deallocate raw memory. Deallocates the memory pointed to by @a p + * without doing any destruction. + * + * @param p Pointer to the memory to be deallocated. + * + * @pre @a p points to a block of memory that was allocated by a + * call to allocate(). + */ void deallocate(void* p) const { operator delete(p); } + + /** Create the identity value. Constructs (without allocating) a @a View + * object representing the default value of the @a Value type. + * + * @param p A pointer to a block of raw memory large enough to hold a + * @a View object. + * + * @post The memory pointed to by @a p contains a @a View object that + * represents the default value of the @a View type. + * + * @deprecated This function constructs the @a View object with its default + * constructor, which will often, but not always, yield the + * appropriate identity value. Monoid classes should declare + * their identity function explicitly, rather than relying on + * this default definition. + */ + void identity(View* p) const { new ((void*) p) View(); } + + + /** @name Construct the monoid and the view with arbitrary arguments. + * + * A @ref reducer object contains monoid and view data members, which are + * declared as raw storage (byte arrays), so that they are not implicitly + * constructed when the reducer is constructed. Instead, a reducer + * constructor calls one of the monoid class’s static construct() + * functions with the addresses of the monoid and the view, and the + * construct() function uses placement `new` to construct them. + * + * This allows the monoid to determine the order in which the monoid and + * view are constructed, and to make one of them dependent on the other. + * + * Any arguments to the reducer constructor are just passed on as + * additional arguments to the construct() function (after the monoid + * and view addresses). + * + * Any monoid whose needs are satisfied by the suite of construct() + * functions below, such as @ref monoid_with_view, can just inherit them + * from monoid_base. Other monoids will need to provide their own versions + * to override the monoid_base functions. + */ + //@{ + + /** Default-construct the monoid, and pass zero to five const reference + * arguments to the view constructor. + */ + //@{ + + template <typename Monoid> + static void construct(Monoid* monoid, View* view) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + (monoid->identity(view), view) ); } + + template <typename Monoid, typename T1> + static void construct(Monoid* monoid, View* view, const T1& x1) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1) ); } + + template <typename Monoid, typename T1, typename T2> + static void construct(Monoid* monoid, View* view, + const T1& x1, const T2& x2) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1, x2) ); } + + template <typename Monoid, typename T1, typename T2, typename T3> + static void construct(Monoid* monoid, View* view, + const T1& x1, const T2& x2, const T3& x3) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1, x2, x3) ); } + + template <typename Monoid, typename T1, typename T2, typename T3, + typename T4> + static void construct(Monoid* monoid, View* view, + const T1& x1, const T2& x2, const T3& x3, + const T4& x4) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1, x2, x3, x4) ); } + + template <typename Monoid, typename T1, typename T2, typename T3, + typename T4, typename T5> + static void construct(Monoid* monoid, View* view, + const T1& x1, const T2& x2, const T3& x3, + const T4& x4, const T5& x5) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1, x2, x3, x4, x5) ); } + + //@} + + /** Default-construct the monoid, and pass one non-const reference argument + * to the view constructor. + */ + //@{ + template <typename Monoid, typename T1> + static void construct(Monoid* monoid, View* view, T1& x1) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1) ); } + //@} + + /** Copy-construct the monoid, and pass zero to four const reference + * arguments to the view constructor. + */ + //@{ + + template <typename Monoid> + static void construct(Monoid* monoid, View* view, const Monoid& m) + { provisional( new ((void*)monoid) Monoid(m) ).confirm_if( + new ((void*)view) View() ); } + + template <typename Monoid, typename T1> + static void construct(Monoid* monoid, View* view, const Monoid& m, + const T1& x1) + { provisional( new ((void*)monoid) Monoid(m) ).confirm_if( + new ((void*)view) View(x1) ); } + + template <typename Monoid, typename T1, typename T2> + static void construct(Monoid* monoid, View* view, const Monoid& m, + const T1& x1, const T2& x2) + { provisional( new ((void*)monoid) Monoid(m) ).confirm_if( + new ((void*)view) View(x1, x2) ); } + + template <typename Monoid, typename T1, typename T2, typename T3> + static void construct(Monoid* monoid, View* view, const Monoid& m, + const T1& x1, const T2& x2, const T3& x3) + { + provisional( new ((void*)monoid) Monoid(m) ).confirm_if( + new ((void*)view) View(x1, x2, x3) ); + } + + template <typename Monoid, typename T1, typename T2, typename T3, + typename T4> + static void construct(Monoid* monoid, View* view, const Monoid& m, + const T1& x1, const T2& x2, const T3& x3, + const T4& x4) + { + provisional( new ((void*)monoid) Monoid(m) ).confirm_if( + new ((void*)view) View(x1, x2, x3, x4) ); + } + + //@} + + //@} }; -} // end namspace cilk -#ifndef CILK_STUB +/** Monoid class that gets its value type and identity and reduce operations + * from its view. + * + * A simple implementation of the monoid-view-reducer architecture would + * distribute knowledge about the type and operations for the reduction + * between the monoid and the view — the identity and reduction operations are + * specified in the monoid, the reduction operations are implemented in the + * view, and the value type is specified in both the monoid and the view. + * This is inelegant. + * + * monoid_with_view is a subclass of @ref monoid_base that gets its value type + * and its identity and reduction operations from its view class. No + * customization of the monoid_with_view class itself is needed beyond + * instantiating it with an appropriate view class. (Customized subclasses of + * monoid_with_view may be needed for other reasons, such as to keep some + * state for the reducer.) All of the Cilk predefined reducers use + * monoid_with_view or one of its subclasses. + * + * The view class `View` of a monoid_with_view must provide the following public definitions: + * + * Definition | Meaning + * ---------------------------------|-------- + * `value_type` | a typedef of the value type for the reduction + * `View()` | a default constructor which constructs the identity value for the reduction + * `void reduce(const View* other)` | a member function which applies the reduction operation to the values of `this` view and the `other` view, leaving the result as the value of `this` view, and leaving the value of the `other` view undefined (but valid) + * + * @tparam View The view class for the monoid. + * @tparam Align If true, reducers instantiated on this monoid will be + * cache-aligned. By default, library reducers (unlike legacy + * library reducer _wrappers_) are aligned only as required by + * contents. + */ +template <class View, bool Align = false> +class monoid_with_view : public monoid_base<typename View::value_type, View> +{ +public: + /** Should reducers created with this monoid be aligned? + */ + enum { align_reducer = Align }; + + /** Create the identity value. + * + * Implements the monoid `identity` operation by using the @a View class’s + * default constructor. + * + * @param p A pointer to a block of raw memory large enough to hold a + * @p View object. + */ + void identity(View* p) const { new ((void*)p) View(); } + + /** Reduce the values of two views. + * + * Implements the monoid `reduce` operation by calling the left view’s + * `%reduce()` function with the right view as an operand. + * + * @param left The left operand of the reduce operation. + * @param right The right operand of the reduce operation. + * @post The left view contains the result of the reduce + * operation, and the right view is undefined. + */ + void reduce(View* left, View* right) const { left->reduce(right); } +}; -namespace cilk { -/// reducer CLASS TEMPLATE -/// -/// A reducer is instantiated on a Monoid. The Monoid provides the value -/// type, associative reduce function, and identity for the reducer. Function -/// view(), operator*(), and operator()() return the current view of the -/// reducer, although operator()() is deprecated. -template <class Monoid> -class reducer +/** Base class for simple views with (usually) scalar values. + * + * The scalar_view class is intended as a base class which provides about half + * of the required definitions for simple views. It defines the `value_type` + * required by a @ref monoid_with_view (but not the identity constructor and + * reduce operation, which are inherently specific to a particular kind of + * reduction). It also defines the value access functions which will be called + * by the corresponding @ref reducer functions. (It uses copy semantics for + * the view_move_in() and view_move_out() functions, which is appropriate + * for simple scalar types, but not necessarily for more complex types like + * STL containers. + * + * @tparam Type The type of value wrapped by the view. + */ +template <typename Type> +class scalar_view { - typedef typename Monoid::value_type value_type; - - __cilkrts_hyperobject_base base_; - const Monoid monoid_; // implementation of monoid interface - void* initialThis_; // Sanity checker - - // Primary (leftmost) view, on its own cache line to avoid false sharing. - // IMPORTANT: Even though this view is known in advance, access to it from - // outside the reducer should be through the __cilkrts_hyper_lookup() - // function only (which is called by the view() function. This - // restriction is necessary so that the compiler can assume that - // __cilkrts_hyper_lookup() is the ONLY source of the address of this - // object, and can therefore optimize as if it had no aliases. - __CILKRTS_CACHE_ALIGNED(value_type leftmost_); - - // Wrappers around C monoid dispatch functions - static void reduce_wrapper(void* r, void* lhs, void* rhs); - static void identity_wrapper(void* r, void* view); - static void destroy_wrapper(void* r, void* view); - static void* allocate_wrapper(void* r, __STDNS size_t bytes); - static void deallocate_wrapper(void* r, void* view); +protected: + Type m_value; ///< The wrapped accumulator variable. - // Used for certain asserts - bool reducer_is_cache_aligned() const - { return 0 == ((std::size_t) this & (__CILKRTS_CACHE_LINE__ - 1)); } +public: + /** Value type definition required by @ref monoid_with_view. + */ + typedef Type value_type; + + /** Default constructor. + */ + scalar_view() : m_value() {} + + /** Value constructor. + */ + scalar_view(const Type& v) : m_value(v) {} + + /** @name Value functions required by the reducer class. + * + * Note that the move in/out functions use simple assignment semantics. + */ + //@{ + + /** Set the value of the view. + */ + void view_move_in(Type& v) { m_value = v; } + + /** Get the value of the view. + */ + void view_move_out(Type& v) { v = m_value; } + + /** Set the value of the view. + */ + void view_set_value(const Type& v) { m_value = v; } + + /** Get the value of the view. + */ + Type const& view_get_value() const { return m_value; } + + /** Get a reference to the value contained in the view. For legacy + * reducer support only. + */ + Type & view_get_reference() { return m_value; } + + /** Get a reference to the value contained in the view. For legacy + * reducer support only. + */ + Type const& view_get_reference() const { return m_value; } + //@} +}; - void init(); - // disable copy - reducer(const reducer&); - reducer& operator=(const reducer&); +/** Wrapper class for move-in construction. + * + * Some types allow their values to be _moved_ as an alternative to copying. + * Moving a value may be much faster than copying it, but may leave the value + * of the move’s source undefined. Consider the `swap` operation provided by + * many STL container classes: + * + * list<T> x, y; + * x = y; // Copy + * x.swap(y); // Move + * + * The assignment _copies_ the value of `y` into `x` in time linear in the + * size of `y`, leaving `y` unchanged. The `swap` _moves_ the value of `y` + * into `x` in constant time, but it also moves the value of `x` into `y`, + * potentially leaving `y` undefined. + * + * A move_in_wrapper simply wraps a pointer to an object. It is created by a + * call to cilk::move_in(). Passing a move_in_wrapper to a view constructor + * (actually, passing it to a reducer constructor, which passes it to the + * monoid `construct()` function, which passes it to the view constructor) + * allows, but does not require, the value pointed to by the wrapper to be + * moved into the view instead of copied. + * + * A view class exercises this option by defining a _move-in constructor_, + * i.e., a constructor with a move_in_wrapper parameter. The constructor calls + * the wrapper’s `value()` function to get a reference to its pointed-to + * value, and can then use that reference in a move operation. + * + * A move_in_wrapper also has an implicit conversion to its pointed-to value, + * so if a view class does not define a move-in constructor, its ordinary + * value constructor will be called with the wrapped value. For example, an + * @ref ReducersAdd "op_add" view does not have a move-in constructor, so + * + * int x; + * reducer< op_add<int> > xr(move_in(x)); + * + * will simply call the `op_add_view(const int &)` constructor. But an + * @ref ReducersList "op_list_append" view does have a move-in constructor, + * so + * + * list<int> x; + * reducer< op_list_append<int> > xr(move_in(x)); + * + * will call the `op_list_append_view(move_in_wrapper<int>)` constructor, + * which can `swap` the value of `x` into the view. + * + * @note Remember that passing the value of a variable to a reducer + * constructor using a move_in_wrapper leaves the variable undefined. + * You cannot assume that the constructor either will or will not copy + * or move the value. + * + * @tparam Type The type of the wrapped value. + * + * @see cilk::move_in() + */ +template <typename Type> +class move_in_wrapper +{ + Type *m_pointer; +public: + + /** Constructor that captures the address of its argument. This is almost + * always called from the @ref move_in function. + */ + explicit move_in_wrapper(Type& ref) : m_pointer(&ref) { } + + /** Implicit conversion to the wrapped value. This allows a move_in_wrapper + * to be used where a value of the wrapped type is expected, in which case + * the wrapper is completely transparent. + */ + operator Type&() const { return *m_pointer; } + + /** Get a reference to the pointed-to value. This has the same effect as + * the implicit conversion, but makes the intent clearer in a move-in + * constructor. + */ + Type& value() const { return *m_pointer; } +}; - public: - reducer() : monoid_(), leftmost_() - { - init(); - } +/** Function to create a move_in_wrapper for a value. + * + * @tparam Type The type of the argument, which will be the `type` of the + * created wrapper. + * + * @see move_in_wrapper + */ +template <typename Type> +inline +move_in_wrapper<Type> move_in(Type& ref) + { return move_in_wrapper<Type>(ref); } - /// Special case: allow reducer(A) construction from both const and - /// non-const reference to A. Allowing this for all argument combinations - /// is desirable but would result in at least 93 overloads. - template <typename A> - explicit reducer(A& a) - : base_(), monoid_(), leftmost_(a) - { - init(); - } - template <typename A> - explicit reducer(const A& a) - : base_(), monoid_(), leftmost_(a) - { - init(); - } +/** @copydoc move_in(Type&) + * + * @note Applying a function that is explicitly specified as modifying its + * argument to a const argument is obviously an irrational thing to + * do. This move_in() variant is just provided to allow calling a + * move-in constructor with a function return value, which the + * language treats as a const. Using it for any other purpose will + * probably end in tears. + */ +template <typename Type> +inline +move_in_wrapper<Type> move_in(const Type& ref) + { return move_in_wrapper<Type>(ref); } - template <typename A, typename B> - reducer(const A& a, const B& b) - : base_(), monoid_(), leftmost_(a,b) - { - init(); - } - template <typename A, typename B, typename C> - reducer(const A& a, const B& b, const C& c) - : base_(), monoid_(), leftmost_(a,b,c) - { - init(); - } +/** Wrapper class to allow implicit downcasts to reducer subclasses. + * + * The Cilk library contains a collection of reducer wrapper classes which + * were created before the `cilk::reducer<Monoid>` style was developed. For + * example, `cilk::reducer_opadd<Type>` provided essentially the same + * functionality that is now provided by + * `cilk::reducer< cilk::op_add<Type> >`. These legacy reducer classes are + * deprecated, but still supported, and they have been reimplemented as + * subclasses of the corresponding `cilk::reducer` classes. For example: + * + * template <class T> + * reducer_opadd<T> : public reducer< op_add<T> > { ... }; + * + * This reimplementation allows transparent conversion between legacy and + * new reducers. That is, a `reducer<op_add>*` or `reducer<op_add>&` can be + * used anywhere that a `reducer_opadd*` or `reducer_opadd&` is expected, + * and vice versa. + * + * The conversion from the legacy reducer to the new reducer is just an + * up-cast, which is provided for free by C++. The conversion from the new + * reducer to the legacy reducer is a down-cast, though, which requires an + * explicit conversion member function in the `reducer` class. The challenge + * is to define a function in the reducer template class which will convert + * each cilk::reducer specialization to the corresponding legacy reducer, + * if there is one. + * + * The trick is in the legacy_reducer_downcast template class, which provides + * a mapping from `cilk::reducer` specializations to legacy reducer classes. + * `reducer<Monoid>` has a conversion function to convert itself to + * `legacy_reducer_downcast< reducer<Monoid> >::%type`. By default, + * `legacy_reducer_downcast<Reducer>::%type` is just a trivial subclass of + * `Reducer`, which is uninteresting, but a reducer with a legacy counterpart + * will have a specialization of `legacy_reducer_downcast` whose `type` is + * the corresponding legacy reducer. For example: + * + * template <typename Type> + * struct legacy_reducer_downcast< reducer< op_add<Type> > > + * { + * typedef reducer_opadd<Type> type; + * }; + * + * + * @tparam Reducer The new-style reducer class whose corresponding legacy reducer class + * is `type`, if there is such a legacy reducer class. + */ +template <typename Reducer> +struct legacy_reducer_downcast +{ + /** The related legacy reducer class. + * + * By default, this is just a trivial subclass of Reducer, but it can be + * overridden in the specialization of legacy_reducer_downcast for + * a reducer that has a corresponding legacy reducers. + */ + struct type : Reducer { }; +}; - template <typename A, typename B, typename C, typename D> - reducer(const A& a, const B& b, const C& c, const D& d) - : base_(), monoid_(), leftmost_(a,b,c,d) - { - init(); - } - template <typename A, typename B, typename C, typename D, typename E> - reducer(const A& a, const B& b, const C& c, const D& d, const E& e) - : base_(), monoid_(), leftmost_(a,b,c,d,e) - { - init(); - } +namespace internal { +/// @cond internal - // Special case: both const and non-const Monoid reference are needed - // so that reducer(Monoid&) is more specialised than - // template <typename A> explicit reducer(A& a) and - // reducer(const Monoid&) is more specialised than - // template <typename A> explicit reducer(const A& a) - explicit reducer(Monoid& hmod) - : base_(), monoid_(hmod), leftmost_() - { - init(); - } +template <typename Value, typename View> +struct reducer_set_get +{ + static View theView; // Declared but not defined - explicit reducer(const Monoid& hmod) - : base_(), monoid_(hmod), leftmost_() - { - init(); - } + // sizeof(notchar) is guaranteed larger than 1 + struct notchar { char x[2]; }; - // Special case: allow reducer(Monoid,A) construction from both const and - // non-const references to A. Allowing this for all argument combinations - // is desirable but would result in at least 93 overloads. - template <typename A> - reducer(const Monoid& hmod, A& a) - : base_(), monoid_(hmod), leftmost_(a) - { - init(); - } + // check_for_ref returns char if 'get_value' returns by value and notchar + // if 'get_value' returns by reference. + static char check_for_ref(Value, ...); + static notchar check_for_ref(Value&, int); - template <typename A> - reducer(const Monoid& hmod, const A& a) - : base_(), monoid_(hmod), leftmost_(a) - { - init(); - } + enum { GET_VALUE_BY_VALUE = + (1 == sizeof(check_for_ref(theView.view_get_value(), 0))) } ; + + typedef typename condition<GET_VALUE_BY_VALUE, + Value, const Value&>::type get_value_type; + + static void move_in(View& view, Value& v) { view.view_move_in(v); } + static void move_out(View& view, Value& v) { view.view_move_out(v); } + + static void set_value(View& view, const Value& v) + { view.view_set_value(v); } + + static get_value_type get_value(const View& view) + { return view.view_get_value(); } +}; + +template <typename Value> +struct reducer_set_get<Value, Value> +{ + typedef const Value& get_value_type; + + static void move_in(Value& view, Value& v) { view = v; } + static void move_out(Value& view, Value& v) { v = view; } + + static void set_value(Value& view, const Value& v) { view = v; } + + static get_value_type get_value(const Value& view) { return view; } +}; + +/// @endcond - template <typename A, typename B> - reducer(const Monoid& hmod, const A& a, const B& b) - : base_(), monoid_(hmod), leftmost_(a,b) - { - init(); - } - template <typename A, typename B, typename C> - reducer(const Monoid& hmod, const A& a, const B& b, const C& c) - : base_(), monoid_(hmod), leftmost_(a,b,c) +/** Base class defining the data layout that is common to all reducers. + */ +template <typename Monoid> +class reducer_base { + typedef typename Monoid::view_type view_type; + + // The following declarations ensure that the `base`, `monoid`, and + // `initialThis` fields (as well as the `leftmost` field, which is defined + // in the `reducer_content` subclass) are assigned at the same offsets as + // in the “old” reducer implementation (prior to November 2012), which + // declared them as + // + // __cilkrts_hyperobject_base m_base; + // const Monoid m_monoid; + // void* m_initialThis; + // __CILKRTS_CACHE_ALIGNED(view_type m_leftmost); + + // This structure determines what the relative positions of the `base` and + // `monoid` fields would be, and how much space would be allocated for + // them. + // + struct _layout_overlay { + __cilkrts_hyperobject_base base; + Monoid monoid; + _layout_overlay(); // Declared, not defined. + }; + + // This makes the reducer a hyper-object. (Partially initialized in + // the derived reducer_content class.) + // + __cilkrts_hyperobject_base m_base; + + // Reserve enough unconstructed space for the monoid. It is allocated + // here as raw bytes, and is constructed explicitly by a call to the + // monoid_type::construct() function in the constructor of the `reducer` + // subclass. + // + char _monoid_reservation[ + sizeof(_layout_overlay) - + sizeof(__cilkrts_hyperobject_base) ]; + + // Used for sanity checking at destruction. + // + void* m_initialThis; + + // The leftmost view comes next. It is defined in the derived + // reducer_content class. + + /** @name C-callable wrappers for the C++-coded monoid dispatch functions. + */ + //@{ + + static void reduce_wrapper(void* r, void* lhs, void* rhs); + static void identity_wrapper(void* r, void* view); + static void destroy_wrapper(void* r, void* view); + static void* allocate_wrapper(void* r, __STDNS size_t bytes); + static void deallocate_wrapper(void* r, void* view); + + //@} + +protected: + + /** Constructor. + * + * @param leftmost The address of the leftmost view in the reducer. + */ + reducer_base(char* leftmost) { - init(); + static const cilk_c_monoid c_monoid_initializer = { + (cilk_c_reducer_reduce_fn_t) &reduce_wrapper, + (cilk_c_reducer_identity_fn_t) &identity_wrapper, + (cilk_c_reducer_destroy_fn_t) &destroy_wrapper, + (cilk_c_reducer_allocate_fn_t) &allocate_wrapper, + (cilk_c_reducer_deallocate_fn_t) &deallocate_wrapper + }; + + m_base.__c_monoid = c_monoid_initializer; + m_base.__flags = 0; + m_base.__view_offset = (char*)leftmost - (char*)this; + m_base.__view_size = sizeof(view_type); + m_initialThis = this; + + __cilkrts_hyper_create(&m_base); } - - template <typename A, typename B, typename C, typename D> - reducer(const Monoid& hmod, const A& a, const B& b, const C& c, - const D& d) - : base_(), monoid_(hmod), leftmost_(a,b,c,d) + + /** Destructor. + */ + __CILKRTS_STRAND_STALE(~reducer_base()) { - init(); + // Make sure we haven't been memcopy'd or corrupted + __CILKRTS_ASSERT(this == m_initialThis); + __cilkrts_hyper_destroy(&m_base); } - template <typename A, typename B, typename C, typename D, typename E> - reducer(const Monoid& hmod, const A& a, const B& b, const C& c, - const D& d, const E& e) - : base_(), monoid_(hmod), leftmost_(a,b,c,d,e) + /** Monoid data member. + * + * @return A pointer to the reducer’s monoid data member. + */ + Monoid* monoid_ptr() + { return & reinterpret_cast<_layout_overlay*>(this)->monoid; } + + /** Leftmost view data member. + * + * @return A pointer to the reducer’s leftmost view data member. + * + * @note This function returns the address of the *leftmost* view, + * which is unique for the lifetime of the reducer. It is + * intended to be used in constructors and destructors. + * Use the reducer::view() function to access the per-strand + * view instance. + */ + view_type* leftmost_ptr() { - init(); + char* view_addr = (char*)this + m_base.__view_offset; + return reinterpret_cast<view_type*>(view_addr); } + +public: - __CILKRTS_STRAND_STALE(~reducer()); - - /* access the unwrapped object */ - value_type& view() { - // Look up reducer in current map. IMPORTANT: Even though the - // leftmost view is known in advance, access to it should be through - // the __cilkrts_hyper_lookup() function only. This restriction is - // necessary so that the compiler can assume that - // __cilkrts_hyper_lookup() is the ONLY source of the address of this - // object, and can therefore optimize as if it had no aliases. - return *static_cast<value_type *>(__cilkrts_hyper_lookup(&base_)); + /** @name Access the current view. + * + * These functions return a reference to the instance of the reducer’s + * view that was created for the current strand of a parallel computation + * (and create it if it doesn’t already exist). Note the difference from + * the (private) leftmost_ptr() function, which returns a pointer to the + * _leftmost_ view, which is the same in all strands. + */ + //@{ + + /** Per-strand view instance. + * + * @return A reference to the per-strand view instance. + */ + view_type& view() + { + return *static_cast<view_type *>(__cilkrts_hyper_lookup(&m_base)); } - - value_type const& view() const { - /* look up reducer in current map */ - return const_cast<reducer*>(this)->view(); + + /** @copydoc view() + */ + const view_type& view() const + { + return const_cast<reducer_base*>(this)->view(); } - - /// "Dereference" reducer to return the current view. - value_type& operator*() { return view(); } - value_type const& operator*() const { return view(); } - - /// "Dereference" reducer to return the current view. - value_type* operator->() { return &view(); } - value_type const* operator->() const { return &view(); } - - /// operator()() is deprecated. Use operator*() instead. - value_type& operator()() { return view(); } - value_type const& operator()() const { return view(); } - - const Monoid& monoid() const { return monoid_; } + + //@} }; template <typename Monoid> -void reducer<Monoid>::init() +void reducer_base<Monoid>::reduce_wrapper(void* r, void* lhs, void* rhs) { - static const cilk_c_monoid c_monoid_initializer = { - (cilk_c_reducer_reduce_fn_t) &reduce_wrapper, - (cilk_c_reducer_identity_fn_t) &identity_wrapper, - (cilk_c_reducer_destroy_fn_t) &destroy_wrapper, - (cilk_c_reducer_allocate_fn_t) &allocate_wrapper, - (cilk_c_reducer_deallocate_fn_t) &deallocate_wrapper - }; - -#ifdef CILK_CHECK_REDUCER_ALIGNMENT - // ASSERT THAT LEFTMOST VIEW IS CACHE-LINE ALIGNED: - // We use an attribute to ensure that the leftmost view, and therefore the - // entire reducer object, is cache-line (64-byte) aligned. The compiler - // enforces this alignment for static- and automatic-duration objects. - // However, if a reducer or a structure containing a reducer is allocated - // from the heap using a custom allocator (which typically guarantee only - // 8- or 16-byte alignment), the compiler cannot guarantee this cache-line - // alignment. Certain vector instructions require that the operands be - // aligned on vector boundaries (up to 16-bytes in SSE, 32-bytes in AVX - // and 64-bytes in MIC). At high optimazation levels, the allocator's - // failure to keep the promised alignment can cause a program to fault - // mysteriously in a vector instruction. The assertion is intended to - // catch this situation. If the assertion fails, the user is advised - // to change the way that reducer or the the structure containing the - // reducer is allocated such that it is guaranteed to be on a 64-byte - // boundary, thus preventing both the possible crash and false sharing. - __CILKRTS_ASSERT(reducer_is_cache_aligned()); -#endif // CILK_CHECK_REDUCER_ALIGNMENT - - base_.__c_monoid = c_monoid_initializer; - base_.__flags = 0; - base_.__view_offset = (char*) &leftmost_ - (char*) this; - base_.__view_size = sizeof(value_type); - initialThis_ = this; - - __cilkrts_hyper_create(&base_); + Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr(); + monoid->reduce(static_cast<view_type*>(lhs), + static_cast<view_type*>(rhs)); } template <typename Monoid> -void reducer<Monoid>::reduce_wrapper(void* r, void* lhs, void* rhs) +void reducer_base<Monoid>::identity_wrapper(void* r, void* view) { - reducer* self = static_cast<reducer*>(r); - self->monoid_.reduce(static_cast<value_type*>(lhs), - static_cast<value_type*>(rhs)); + Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr(); + monoid->identity(static_cast<view_type*>(view)); } template <typename Monoid> -void reducer<Monoid>::identity_wrapper(void* r, void* view) +void reducer_base<Monoid>::destroy_wrapper(void* r, void* view) { - reducer* self = static_cast<reducer*>(r); - self->monoid_.identity(static_cast<value_type*>(view)); + Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr(); + monoid->destroy(static_cast<view_type*>(view)); } template <typename Monoid> -void reducer<Monoid>::destroy_wrapper(void* r, void* view) +void* reducer_base<Monoid>::allocate_wrapper(void* r, __STDNS size_t bytes) { - reducer* self = static_cast<reducer*>(r); - self->monoid_.destroy(static_cast<value_type*>(view)); + Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr(); + return monoid->allocate(bytes); } template <typename Monoid> -void* reducer<Monoid>::allocate_wrapper(void* r, __STDNS size_t bytes) +void reducer_base<Monoid>::deallocate_wrapper(void* r, void* view) { - reducer* self = static_cast<reducer*>(r); - return self->monoid_.allocate(bytes); + Monoid* monoid = static_cast<reducer_base*>(r)->monoid_ptr(); + monoid->deallocate(static_cast<view_type*>(view)); } + +/** Base class defining the data members of a reducer. + * + * @tparam Aligned The `m_view` data member, and therefore the entire + * structure, are cache-line aligned if this parameter + * is `true'. + */ +template <typename Monoid, bool Aligned = Monoid::align_reducer> +class reducer_content; + +/** Base class defining the data members of an aligned reducer. + */ template <typename Monoid> -void reducer<Monoid>::deallocate_wrapper(void* r, void* view) +class reducer_content<Monoid, true> : public reducer_base<Monoid> { - reducer* self = static_cast<reducer*>(r); - self->monoid_.deallocate(static_cast<value_type*>(view)); -} + typedef typename Monoid::view_type view_type; + + // The leftmost view is defined as raw bytes. It will be constructed + // by the monoid `construct` function. It is cache-aligned, which + // will push it into a new cache line. Furthermore, its alignment causes + // the reducer as a whole to be cache-aligned, which makes the reducer + // size a multiple of a cache line. Since there is nothing in the reducer + // after the view, all this means that the leftmost view gets one or more + // cache lines all to itself, which prevents false sharing. + // + __CILKRTS_CACHE_ALIGN + char m_leftmost[sizeof(view_type)]; + + /** Test if the reducer is cache-line-aligned. + * + * Used in assertions. + */ + bool reducer_is_cache_aligned() const + { return 0 == ((std::size_t) this & (__CILKRTS_CACHE_LINE__ - 1)); } + +protected: + /** Constructor. + */ + reducer_content() : reducer_base<Monoid>((char*)&m_leftmost) + { +#ifndef CILK_IGNORE_REDUCER_ALIGNMENT + assert(reducer_is_cache_aligned() && + "Reducer should be cache aligned. Please see comments following this assertion for explanation and fixes."); +#endif + /* "REDUCER SHOULD BE CACHE ALIGNED" ASSERTION. + * + * This Reducer class instantiation specifies cache-line alignment of the + * leftmost view field (and, implicitly, of the reducer itself). You got + * this assertion because a reducer with this class was allocated at a + * non-cache-aligned address, probably because it was allocated on the + * heap with `new`. This can be a problem for two reasons: + * + * 1. If the leftmost view is not on a cache line by itself, there might + * be a slowdown resulting from accesses to the same cache line from + * different threads. + * + * 2. The compiler thinks that reducer is cache-line aligned, but it + * really isn't. If the reducer is contained in a structure, then the + * compiler will believe that the containing structure, and other + * fields contained in it, are also more aligned than they really + * are. In particular, if the structure contains a numeric array that + * is used in a vectorizable loop, then the compiler might generate + * invalid vector instructions, resulting in a runtime error. + * + * The compiler will always allocate reducer variables, and structure + * variables containing reducers, with their required alignment. + * Reducers, and structures containing a reducer, which are allocated + * on the heap with `new` will _not_ be properly aligned. + * + * There are three ways that you can fix this assertion failure. + * + * A. Rewrite your code to use the new-style `reducer< op_XXX<Type> >` + * instead of the legacy `reducer_XXX<type>`. The new-style reducers + * are not declared to be cache-aligned, and will work properly if + * they are not cache-aligned. + * + * B. If you must allocate an old-style reducer or a structure containing + * a reducer on the heap, figure out how to align it correctly. The + * suggested fix is to use `cilk::aligned_new()` and + * `cilk::aligned_delete()` instead of `new` and `delete`, as follows: + * + * Type* ptr = cilk::aligned_new<Type>(constructor-arguments); + * cilk::aligned_delete(ptr); + * + * C. Define the macro CILK_IGNORE_REDUCER_ALIGNMENT, which will suppress + * the assertion check. Do this only if you are comfortable that + * problem (2) above will not occur. + */ + } +}; + +/** Base class defining the data members of an unaligned reducer. + */ template <typename Monoid> -__CILKRTS_STRAND_STALE(reducer<Monoid>::~reducer()) +class reducer_content<Monoid, false> : public reducer_base<Monoid> { - // Make sure we haven't been memcopy'd or corrupted - __CILKRTS_ASSERT(this == initialThis_); - __cilkrts_hyper_destroy(&base_); -} + typedef typename Monoid::view_type view_type; ///< The view type. + + // Reserve space for the leftmost view. The view will be allocated at an + // aligned offset in this space at runtime, to guarantee that the view + // will get one or more cache lines all to itself, to prevent false + // sharing. + // + // The number of bytes to reserve is determined as follows: + // * Start with the view size. + // * Round up to a multiple of the cache line size, to get the total size + // of the cache lines that will be dedicated to the view. + // * Add (cache line size - 1) filler bytes to guarantee that the reserved + // area will contain a cache-aligned block of the required cache lines, + // no matter where the reserved area starts. + // + char m_leftmost[ + // View size rounded up to multiple cache lines + ( (sizeof(view_type) + __CILKRTS_CACHE_LINE__ - 1) + & (__CILKRTS_CACHE_LINE__ - 1) + ) + // plus filler to allow alignment. + + __CILKRTS_CACHE_LINE__ - 1 + ]; + +protected: + + /** Constructor. Find the first cache-aligned position in the reserved + * area, and pass it to the base constructor as the leftmost view + * address. + */ + reducer_content() : reducer_base<Monoid>( + (char*)( + ((std::size_t)&m_leftmost + __CILKRTS_CACHE_LINE__ - 1) + / __CILKRTS_CACHE_LINE__ * __CILKRTS_CACHE_LINE__) ) + {} +}; -} // end namespace cilk -#else // if defined(CILK_STUB) +} // namespace internal -/************************************************************************** - * Stub reducer implementation - **************************************************************************/ -namespace cilk { +// The __cilkrts_hyperobject_ functions are defined differently depending on +// whether a file is compiled with or without the CILK_STUB option. Therefore, +// reducers compiled in the two modes should be link-time incompatible, so that +// object files compiled with stubbed reducers won't be linked into an +// unstubbed program, or vice versa. We achieve this by putting the reducer +// class definition into the cilk::stub namespace in a stubbed compilation. + +#ifdef CILK_STUB namespace stub { +#endif +/** Reducer class. + * + * A reducer is instantiated on a Monoid. The Monoid provides the value + * type, associative reduce function, and identity for the reducer. + * + * @tparam Monoid The monoid class that the reducer is instantiated on. It must model + * the @ref reducers_monoid_concept "monoid concept". + * + * @see @ref pagereducers + */ template <class Monoid> -class reducer { - typedef typename Monoid::value_type value_type; - - const Monoid monoid_; - value_type obj_; +class reducer : public internal::reducer_content<Monoid> +{ + typedef internal::reducer_content<Monoid> base; + using base::monoid_ptr; + using base::leftmost_ptr; + public: + typedef Monoid monoid_type; ///< The monoid type. + typedef typename Monoid::value_type value_type; ///< The value type. + typedef typename Monoid::view_type view_type; ///< The view type. - /* disable copy */ - reducer(const reducer&); - reducer& operator=(const reducer&); + private: + typedef internal::reducer_set_get<value_type, view_type> set_get; + + reducer(const reducer&); ///< Disallow copying. + reducer& operator=(const reducer&); ///< Disallow assignment. public: - reducer() : monoid_(), obj_() { } - - // Special case: allow reducer(A) construction from both const and - // non-const reference to A. Allowing this for all argument combinations - // is desirable but would result in at least 93 overloads. - template <typename A> - explicit reducer(A& a) - : monoid_(), obj_(a) { - } - - template <typename A> - explicit reducer(const A& a) - : monoid_(), obj_(a) { + + /** @name Constructors + * + * All reducer constructors call the static `construct()` function of the monoid class to + * construct the reducer's monoid and leftmost view. + * + * The reducer constructor arguments are simply passed through to the construct() function. + * Thus, the constructor parameters accepted by a particular reducer class are determined + * by its monoid class. + */ + //@{ + + /** 0 – 6 const reference parameters. + */ + //@{ + + reducer() + { + monoid_type::construct(monoid_ptr(), leftmost_ptr()); } - template <typename A, typename B> - reducer(const A& a, const B& b) - : monoid_(), obj_(a, b) { + template <typename T1> + reducer(const T1& x1) + { + monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1); } - template <typename A, typename B, typename C> - reducer(const A& a, const B& b, const C& c) - : monoid_(), obj_(a, b, c) + template <typename T1, typename T2> + reducer(const T1& x1, const T2& x2) { + monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2); } - template <typename A, typename B, typename C, typename D> - reducer(const A& a, const B& b, const C& c, const D& d) - : monoid_(), obj_(a, b, c, d) + template <typename T1, typename T2, typename T3> + reducer(const T1& x1, const T2& x2, const T3& x3) { + monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3); } - template <typename A, typename B, typename C, typename D, typename E> - reducer(const A& a, const B& b, const C& c, const D& d, const E& e) - : monoid_(), obj_(a, b, c, d, e) + template <typename T1, typename T2, typename T3, typename T4> + reducer(const T1& x1, const T2& x2, const T3& x3, const T4& x4) { + monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4); } - // Special case: both const and non-const Monoid reference are needed - // so that reducer(Monoid&) is more specialised than - // template <typename A> explicit reducer(A& a) and - // reducer(const Monoid&) is more specialised than - // template <typename A> explicit reducer(const A& a) - explicit reducer(Monoid& m) : monoid_(m), obj_() { } - explicit reducer(const Monoid& m) : monoid_(m), obj_() { } - - // Special case: allow reducer(Monoid,A) construction from both const and - // non-const references to A. Allowing this for all argument combinations - // is desirable but would result in at least 93 overloads. - template <typename A> - reducer(const Monoid& m, A& a) - : monoid_(m), obj_(a) { + template <typename T1, typename T2, typename T3, typename T4, typename T5> + reducer(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5) + { + monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4, x5); } - template <typename A> - reducer(const Monoid& m, const A& a) - : monoid_(m), obj_(a) { + template <typename T1, typename T2, typename T3, typename T4, typename T5, typename T6> + reducer(const T1& x1, const T2& x2, const T3& x3, const T4& x4, const T5& x5, const T6& x6) + { + monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1, x2, x3, x4, x5, x6); } - - template <typename A, typename B> - reducer(const Monoid& m, const A& a, const B& b) - : monoid_(m), obj_(a, b) { + + //@} + + /** 1 non-const reference parameter. + */ + //@{ + + template <typename T1> + reducer(T1& x1) + { + monoid_type::construct(monoid_ptr(), leftmost_ptr(), x1); } + + //@} - template <typename A, typename B, typename C> - reducer(const Monoid& m, const A& a, const B& b, const C& c) - : monoid_(m), obj_(a, b, c) + /** Destructor. + */ + __CILKRTS_STRAND_STALE(~reducer()) { + leftmost_ptr()->~view_type(); + monoid_ptr()->~monoid_type(); } - template <typename A, typename B, typename C, typename D> - reducer(const Monoid& m, const A& a, const B& b, const C& c, const D& d) - : monoid_(m), obj_(a, b, c, d) + //@{ + /** Get the monoid. + * + * @return A reference to the monoid object belonging to this reducer. + */ + Monoid& monoid() { return *monoid_ptr(); } + + const Monoid& monoid() const + { return const_cast<reducer*>(this)->monoid(); } + //@} + + //@{ + /** Access the current view. + * + * Return a reference to the instance of the reducer’s view that was + * created for the current strand of a parallel computation (and create + * it if it doesn’t already exist). + */ + view_type& view() { return base::view(); } + const view_type& view() const { return base::view(); } + //@} + + + /** @name Dereference the reducer to get the view. + * + * “Dereferencing” a reducer yields the view for the current strand. The + * view, in turn, acts as a proxy for its contained value, exposing only + * those operations which are consistent with the reducer’s monoid. Thus, + * all modifications of the reducer’s accumulator variable are written as + * + * *reducer OP ... + * + * or + * + * reducer->func(...) + * + * (The permitted operations on a reducer’s accumulator are listed in the + * documentation for that particular kind of reducer.) + * + * @note `*r` is a synonym for `r.view()`. Recommended style is to use + * `*r` (or `r->`) in the common case where code is simply + * updating the accumulator variable wrapped in the view, and to + * use `r.view()` in the unusual case where it is desirable to + * call attention to the view itself. + */ + //@{ + + //@{ + /** Dereference operator. + * + * @return A reference to the per-strand view instance. + */ + view_type& operator*() { return view(); } + view_type const& operator*() const { return view(); } + //@} + + //@{ + /** Pointer operator. + * + * @return A pointer to the per-strand view instance. + */ + view_type* operator->() { return &view(); } + view_type const* operator->() const { return &view(); } + //@} + + //@{ + /** Deprecated view access. + * + * `r()` is a synonym for `*r` which was used with early versions of Cilk + * reducers. `*r` is now the preferred usage. + * + * @deprecated Use operator*() instead of operator()(). + * + * @return A reference to the per-strand view instance. + */ + view_type& operator()() { return view(); } + view_type const& operator()() const { return view(); } + //@} + + //@} + + /** @name Set and get the value. + * + * These functions are used to set an initial value for the reducer before + * starting the reduction, or to get the final value after the reduction + * is complete. + * + * @note These functions are completely different from the view + * operations that are made available via operator*() and + * operator->(), which are used to _modify_ the reducer’s value + * _during_ the reduction. + * + * @warning These functions _can_ be called at any time, and in + * general, they will refer to the value contained in the view + * for the current strand. However, using them other than to + * set the reduction’s initial value or get its final value + * will almost always result in undefined behavior. + */ + //@{ + + /** Move a value into the reducer. + * + * This function is used to set the initial value of the reducer’s + * accumulator variable by either copying or _moving_ the value of @a obj + * into it. Moving a value can often be performed in constant time, even + * for large container objects, but has the side effect of leaving the + * value of @a obj undefined. (See the description of the + * @ref move_in_wrapper class for a discussion of moving values.) + * + * @par Usage + * A move_in() call to initialize a reducer is often paired with a + * move_out() call to get its final value: + * + * reducer<Type> xr; + * xr.move_in(x); + * … do the reduction … + * xr.move_out(x); + * + * @par Assumptions + * - You cannot assume either that this will function will copy its + * value or that it will move it. + * - You must assume that the value of @a obj will be undefined + * after the call to move_in(). + * - You can assume that move_in() will be at least as efficient as + * set_value(), and you should therefore prefer move_in() unless + * you need the value of @a obj to be unchanged after the call. + * (But you should usually prefer the move-in constructor over a + * move_in() call — see the note below.) + * + * @note The behavior of a default constructor followed by move-in + * initialization: + * + * reducer<Type> xr; + * xr.move_in(x); + * + * @note is not necessarily the same as a move-in constructor: + * + * reducer<Type> xr(move_in(x)); + * + * @note In particular, when @a Type is a container type with a + * non-empty allocator, the move-in constructor will create the + * accumulator variable with the same allocator as the input + * argument @a x, while the default constructor will create the + * accumulator variable with a default allocator. The mismatch of + * allocators in the latter case means that the input argument + * @a x may have to be copied in linear time instead of being + * moved in constant time. + * + * @note Best practice is to prefer the move-in constructor over the + * move-in function unless the move-in function is required for + * some specific reason. + * + * @warning Calling this function other than to set the initial value + * for a reduction will almost always result in undefined + * behavior. + * + * @param obj The object containing the value that will be moved into the + * reducer. + * + * @post The reducer contains the value that was initially in @a obj. + * @post The value of @a obj is undefined. + * + * @see set_value() + */ + void move_in(value_type& obj) { set_get::move_in(view(), obj);} + + /** Move the value out of the reducer. + * + * This function is used to retrieve the final value of the reducer’s + * accumulator variable by either copying or _moving_ the value of @a obj + * into it. Moving a value can often be performed in constant time, even + * for large container objects, but has the side effect of leaving the + * value of the reducer’s accumulator variable undefined. (See the + * description of the @ref move_in_wrapper class for a discussion of + * moving values.) + * + * @par Usage + * A move_in() call to initialize a reducer is often paired with a + * move_out() call to get its final value: + * + * reducer<Type> xr; + * xr.move_in(x); + * … do the reduction … + * xr.move_out(x); + * + * @par Assumptions + * - You cannot assume either that this will function will copy its + * value or that it will move it. + * - You must assume that the value of the reducer’s accumulator + * variable will be undefined after the call to move_out(). + * - You can assume that move_out() will be at least as efficient as + * get_value(), and you should therefore prefer move_out() unless + * you need the accumulator variable to be preserved after the + * call. + * + * @warning Calling this function other than to retrieve the final + * value of a reduction will almost always result in undefined + * behavior. + * + * @param obj The object that the value of the reducer will be moved into. + * + * @post @a obj contains the value that was initially in the reducer. + * @post The value of the reducer is undefined. + * + * @see get_value() + */ + void move_out(value_type& obj) { set_get::move_out(view(), obj); } + + /** Set the value of the reducer. + * + * This function sets the initial value of the reducer’s accumulator + * variable to the value of @a obj. + * + * @note The behavior of a default constructor followed by + * initialization: + * + * reducer<Type> xr; + * xr.set_value(x); + * + * @note is not necessarily the same as a value constructor: + * + * reducer<Type> xr(x); + * + * @note In particular, when @a Type is a container type with a + * non-empty allocator, the value constructor will create the + * accumulator variable with the same allocator as the input + * argument @a x, while the default constructor will create the + * accumulator variable with a default allocator. + * + * @warning Calling this function other than to set the initial value + * for a reduction will almost always result in undefined + * behavior. + * + * @param obj The object containing the value that will be copied into + * the reducer. + * + * @post The reducer contains a copy of the value in @a obj. + * + * @see move_in() + */ + void set_value(const value_type& obj) { set_get::set_value(view(), obj); } + + /** Get the value of the reducer. + * + * This function gets the final value of the reducer’s accumulator + * variable. + * + * @warning Calling this function other than to retrieve the final + * value of a reduction will almost always result in undefined + * behavior. + * + * @return A reference to the value contained in the reducer. + * + * @see move_out() + */ + typename set_get::get_value_type get_value() const + { return set_get::get_value(view()); } + + //@} + + /** Implicit downcast to legacy reducer wrapper, if any. + * + * @see legacy_reducer_downcast + */ + operator typename legacy_reducer_downcast<reducer>::type& () { + typedef typename legacy_reducer_downcast<reducer>::type downcast_type; + return *reinterpret_cast<downcast_type*>(this); } - template <typename A, typename B, typename C, typename D, typename E> - reducer(const Monoid& m, const A& a, const B& b, const C& c, - const D& d, const E& e) - : monoid_(m), obj_(a, b, c, d, e) + + /** Implicit downcast to legacy reducer wrapper, if any. + * + * @see legacy_reducer_downcast + */ + operator const typename legacy_reducer_downcast<reducer>::type& () const { + typedef typename legacy_reducer_downcast<reducer>::type downcast_type; + return *reinterpret_cast<const downcast_type*>(this); } +}; - ~reducer() { } +#ifdef CILK_STUB +} // namespace stub +using stub::reducer; +#endif - value_type& view() { return obj_; } - value_type const& view() const { return obj_; } +} // end namespace cilk - value_type& operator()() { return view(); } - value_type const& operator()() const { return view(); } +#endif /* __cplusplus */ - const Monoid& monoid() const { return monoid_; } +/** @page page_reducers_in_c Creating and Using Reducers in C + * + * @tableofcontents + * + * The Cilk runtime supports reducers written in C as well as in C++. The basic logic is the + * same, but the implementation details are very different. The C++ reducer implementation uses + * templates heavily to create very generic components. The C reducer implementation uses + * macros, which are a much blunter instrument. The most immediate consequence is that the + * monoid/view/reducer architecture is mostly implicit rather than explicit in C reducers. + * + * @section reducers_c_overview Overview of Using Reducers in C + * + * The basic usage pattern for C reducers is: + * + * 1. Create and initialize a reducer object. + * 2. Tell the Cilk runtime about the reducer. + * 3. Update the value contained in the reducer in a parallel computation. + * 4. Tell the Cilk runtime that you are done with the reducer. + * 5. Retrieve the value from the reducer. + * + * @subsection reducers_c_creation Creating and Initializing a C Reducer + * + * The basic pattern for creating and initializing a reducer object in C is + * + * CILK_C_DECLARE_REDUCER(value-type) reducer-name = + * CILK_C_INIT_REDUCER(value-type, + * reduce-function, + * identity-function, + * destroy-function, + * initial-value); + * + * This is simply an initialized definition of a variable named _reducer-name_. The + * @ref CILK_C_DECLARE_REDUCER macro expands to an anonymous `struct` declaration for a reducer + * object containing a view of type _value-type_, and the @ref CILK_C_INIT_REDUCER macro + * expands to a struct initializer. + * + * @subsection reducers_c_reduce_func Reduce Functions + * + * The reduce function for a reducer is called when a parallel execution strand terminates, to + * combine the values computed by the terminating strand and the strand to its left. It takes + * three arguments: + * + * - `void* reducer` — the address of the reducer. + * - `void* left` — the address of the value for the left strand. + * - `void* right` — the address of the value for the right (terminating) strand. + * + * It must apply the reducer’s reduction operation to the `left` and `right` values, leaving + * the result in the `left` value. The `right` value is undefined after the reduce function + * call. + * + * @subsection reducers_c_identity_func Identity Functions + * + * The identity function for a reducer is called when a parallel execution strand begins, to + * initialize its value to the reducer’s identity value. It takes two arguments: + * + * - `void* reducer` — the address of the reducer. + * - `void* v` — the address of a freshly allocated block of memory of size + * `sizeof(value-type)`. + * + * It must initialize the memory pointed to by `v` so that it contains the reducer’s identity + * value. + * + * @subsection reducers_c_destroy_func Destroy Functions + * + * The destroy function for a reducer is called when a parallel execution strand terminates, to + * do any necessary cleanup before its value is deallocated. It takes two arguments: + * + * - `void* reducer` — the address of the reducer. + * - `void* p` — the address of the value for the terminating strand. + * + * It must release any resources belonging to the value pointed to by `p`, to avoid a resource + * leak when the memory containing the value is deallocated. + * + * The runtime function `__cilkrts_hyperobject_noop_destroy` can be used for the destructor + * function if the reducer’s values do not need any cleanup. + * + * @subsection reducers_c_register Tell the Cilk Runtime About the Reducer + * + * Call the @ref CILK_C_REGISTER_REDUCER macro to register the reducer with the Cilk runtime: + * + * CILK_C_REGISTER_REDUCER(reducer-name); + * + * The runtime will manage reducer values for all registered reducers when parallel execution + * strands begin and end. + * + * @subsection reducers_c_update Update the Value Contained in the Reducer + * + * The @ref REDUCER_VIEW macro returns a reference to the reducer’s value for the current + * parallel strand: + * + * REDUCER_VIEW(reducer-name) = REDUCER_VIEW(reducer-name) OP x; + * + * C++ reducer views restrict access to the wrapped value so that it can only be modified in + * ways consistent with the reducer’s operation. No such protection is provided for C reducers. + * It is + * entirely the responsibility of the user to avoid modifying the value in any + * inappropriate way. + * + * @subsection c_reducers_unregister Tell the Cilk Runtime That You Are Done with the Reducer + * + * When the parallel computation is complete, call the @ref CILK_C_UNREGISTER_REDUCER macro to + * unregister the reducer with the Cilk runtime: + * + * CILK_C_UNREGISTER_REDUCER(reducer-name); + * + * The runtime will stop managing reducer values for the reducer. + * + * @subsection c_reducers_retrieve Retrieve the Value from the Reducer + * + * When the parallel computation is complete, use the @ref REDUCER_VIEW macro to retrieve the + * final value computed by the reducer. + * + * @subsection reducers_c_example_custom Example — Creating and Using a Custom C Reducer + * + * The `IntList` type represents a simple list of integers. + * + * struct _intListNode { + * int value; + * _intListNode* next; + * } IntListNode; + * typedef struct { IntListNode* head; IntListNode* tail; } IntList; + * + * // Initialize a list to be empty + * void IntList_init(IntList* list) { list->head = list->tail = 0; } + * + * // Append an integer to the list + * void IntList_append(IntList* list, int x) + * { + * IntListNode* node = (IntListNode*) malloc(sizeof(IntListNode)); + * if (list->tail) list->tail->next = node; else list->head = node; + * list->tail = node; + * } + * + * // Append the right list to the left list, and leave the right list empty + * void IntList_concat(IntList* left, IntList* right) + * { + * if (left->head) { + * left->tail->next = right->head; + * if (right->tail) left->tail = right->tail; + * } + * else { + * *left = *right; + * } + * IntList_init(*right); + * } + * + * This code creates a reducer that supports creating an `IntList` by appending values to it. + * + * void identity_IntList(void* reducer, void* list) + * { + * IntList_init((IntList*)list); + * } + * + * void reduce_IntList(void* reducer, void* left, void* right) + * { + * IntList_concat((IntList*)left, (IntList*)right); + * } + * + * CILK_C_DECLARE_REDUCER(IntList) my_list_int_reducer = + * CILK_C_INIT_REDUCER(IntList, + * reduce_int_list, + * identity_int_list, + * __cilkrts_hyperobject_noop_destroy); + * // Initial value omitted // + * ListInt_init(&REDUCER_VIEW(my_int_list_reducer)); + * + * CILK_C_REGISTER_REDUCER(my_int_list_reducer); + * cilk_for (int i = 0; i != n; ++i) { + * IntList_append(&REDUCER_VIEW(my_int_list_reducer), a[i]); + * } + * CILK_C_UNREGISTER_REDUCER(my_int_list_reducer); + * + * IntList result = REDUCER_VIEW(my_int_list_reducer); + * + * @section reducers_c_predefined Predefined C Reducers + * + * Some of the predefined reducer classes in the Cilk library come with a set of predefined + * macros to provide the same capabilities in C. In general, two macros are provided for each + * predefined reducer family: + * + * - `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)` — Declares a + * reducer object named _reducer-name_ with initial value _initial-value_ to perform + * a reduction using the _operation_ on values of the type specified by _type-name_. + * This is the equivalent of the general code described in @ref reducers_c_creation : + * + * CILK_C_DECLARE_REDUCER(type) reducer-name = + * CILK_C_INIT_REDUCER(type, ..., initial-value); + * + * where _type_ is the C type corresponding to _type_name_. See @ref reducers_c_type_names + * below for the _type-names_ that you can use. + * + * - `CILK_C_REDUCER_operation_TYPE(type-name)` — Expands to the `typedef` name for the type + * of the reducer object declared by + * `CILK_C_REDUCER_operation(reducer-name, type-name, initial-value)`. + * + * See @ref reducers_c_example_predefined. + * + * The predefined C reducers are: + * + * | Operation | Name | Documentation | + * |-------------------|---------------|-------------------------------| + * | addition | `OPADD` | @ref ReducersAdd | + * | bitwise and | `OPAND` | @ref ReducersAnd | + * | bitwise or | `OPOR` | @ref ReducersOr | + * | bitwise xor | `OPXOR` | @ref ReducersXor | + * | multiplication | `OPMUL` | @ref ReducersMul | + * | minimum | `MIN` | @ref ReducersMinMax | + * | minimum & index | `MIN_INDEX` | @ref ReducersMinMax | + * | maximum | `MIN` | @ref ReducersMinMax | + * | maximum & index | `MIN_INDEX` | @ref ReducersMinMax | + * + * @subsection reducers_c_type_names Numeric Type Names + * + * The type and function names created by the C reducer definition macros incorporate both the + * reducer kind (`opadd`, `opxor`, etc.) and the value type of the reducer (`int`, `double`, + * etc.). The value type is represented by a _numeric type name_ string. The types supported + * in C reducers, and their corresponding numeric type names, are given in the following table: + * + * | Type | Numeric Type Name | + * |-----------------------|-------------------------------| + * | `char` | `char` | + * | `unsigned char` | `uchar` | + * | `signed char` | `schar` | + * | `wchar_t` | `wchar_t` | + * | `short` | `short` | + * | `unsigned short` | `ushort` | + * | `int` | `int` | + * | `unsigned int` | `uint` | + * | `unsigned int` | `unsigned` (alternate name) | + * | `long` | `long` | + * | `unsigned long` | `ulong` | + * | `long long` | `longlong` | + * | `unsigned long long` | `ulonglong` | + * | `float` | `float` | + * | `double` | `double` | + * | `long double` | `longdouble` | + * + * @subsection reducers_c_example_predefined Example — Using a Predefined C Reducer + * + * To compute the sum of all the values in an array of `unsigned int`: + * + * CILK_C_REDUCER_OPADD(sum, uint, 0); + * CILK_C_REGISTER_REDUCER(sum); + * cilk_for(int i = 0; i != n; ++i) { + * REDUCER_VIEW(sum) += a[i]; + * } + * CILK_C_UNREGISTER_REDUCER(sum); + * printf("The sum is %u\n", REDUCER_VIEW(sum)); + */ -}; // stub::reducer + + /** @name C language reducer macros + * + * These macros are used to declare and work with reducers in C code. + * + * @see @ref page_reducers_in_c + */ + //@{ -} // end namespace stub -} // end namespace cilk +/// @cond internal -#endif // CILK_STUB +/** @name Compound identifier macros. + * + * These macros are used to construct an identifier by concatenating two or three identifiers. + */ +//@{ -#endif /* __cplusplus */ +/** Expand to an identifier formed by concatenating two identifiers. + */ +#define __CILKRTS_MKIDENT(a,b) __CILKRTS_MKIDENT_IMP(a,b,) -/*===================== C interfaces ===================================*/ +/** Expand to an identifier formed by concatenating three identifiers. + */ +#define __CILKRTS_MKIDENT3(a,b,c) __CILKRTS_MKIDENT_IMP(a,b,c) + +/** Helper macro to do the concatenation. + */ +#define __CILKRTS_MKIDENT_IMP(a,b,c) a ## b ## c +//@} + +/** Compiler-specific keyword for the “type of” operator. + */ #if defined(__GNUC__) && !defined(__INTEL_COMPILER) # define _Typeof __typeof__ #endif -/* MACROS FOR DEFINING AND USING C REDUCERS - * - * Example use of these macros - * - * double array[ARRAY_LEN]; - * double sum() - * { - * extern void* double_summing_identity(); - * extern void double_summing_reduce(void* lhs, void* rhs); +/** @name Predefined reducer function declaration macros. * - * CILK_C_DECLARE_REDUCER(double) total = - * CILK_C_INIT_REDUCER(sizeof(double), - * double_summing_reduce, - * double_summing_identity, - * free, - * 0); - * int i; - * - * CILK_C_REGISTER_REDUCER(total); - * - * cilk_for (i = 0; i < ARRAY_LEN; ++i) - * REDUCER_VIEW(total) += array[i]; + * These macros are used to create the function headers for the identity, reduction, + * and destructor functions for a builtin reducer family. The macro can be followed by + * a semicolon to create a declaration, or by a brace-enclosed body to create a definition. + */ +//@{ + +/** Create an identity function header. * - * CILK_C_UNREGISTER_REDUCER(total); + * @note The name of the function’s value pointer parameter will always be `v`. * - * // Never access total.value directly -- the compiler optimizer assumes - * // that REDUCER_VIEW(total) is the ONLY way to refer to the value. - * return REDUCER_VIEW(total); - * } + * @param name The reducer family name. + * @param tn The type name. */ - -/*************************************************************************** - * Common to real and stub implementations - ***************************************************************************/ - -__CILKRTS_BEGIN_EXTERN_C - #define __CILKRTS_DECLARE_REDUCER_IDENTITY(name,tn) CILK_EXPORT \ void __CILKRTS_MKIDENT3(name,_identity_,tn)(void* key, void* v) + +/** Create a reduction function header. + * + * @param name The reducer family name. + * @param tn The type name. + * @param l The name to use for the function’s left value pointer parameter. + * @param r The name to use for the function’s right value pointer parameter. + */ #define __CILKRTS_DECLARE_REDUCER_REDUCE(name,tn,l,r) CILK_EXPORT \ void __CILKRTS_MKIDENT3(name,_reduce_,tn)(void* key, void* l, void* r) + +/** Create a destructor function header. + * + * @param name The reducer family name. + * @param tn The type name. + * @param p The name to use for the function’s value pointer parameter. + */ #define __CILKRTS_DECLARE_REDUCER_DESTROY(name,tn,p) CILK_EXPORT \ void __CILKRTS_MKIDENT3(name,_destroy_,tn)(void* key, void* p) -__CILKRTS_END_EXTERN_C +//@} +/// @endcond -#ifndef CILK_STUB /*************************************************************************** * Real implementation ***************************************************************************/ -__CILKRTS_BEGIN_EXTERN_C - -/* Declare a reducer with 'Type' value type */ +/** Declaration of a C reducer structure type. + * + * This macro expands into an anonymous structure declaration for a C reducer structure + * which contains a @a Type value. For example: + * + * CILK_C_DECLARE_REDUCER(int) my_add_int_reducer = + * CILK_C_INIT_REDUCER(int, …); + * + * @param Type The type of the value contained in the reducer object. + * + * @see @ref reducers_c_creation + */ #define CILK_C_DECLARE_REDUCER(Type) struct { \ __cilkrts_hyperobject_base __cilkrts_hyperbase; \ - __CILKRTS_CACHE_ALIGNED(Type value); \ + __CILKRTS_CACHE_ALIGN Type value; \ } -/* Initialize a reducer using the Identity, Reduce, and Destroy functions - * (the monoid) and with an arbitrary-length comma-separated initializer list. +/** Initializer for a C reducer structure. + * + * This macro expands into a brace-enclosed structure initializer for a C reducer structure + * that was declared with `CILK_C_DECLARE_REDUCER(Type)`. For example: + * + * CILK_C_DECLARE_REDUCER(int) my_add_int_reducer = + * CILK_C_INIT_REDUCER(int, + * add_int_reduce, + * add_int_identity, + * __cilkrts_hyperobject_noop_destroy, + * 0); + * + * @param Type The type of the value contained in the reducer object. Must be the same as + * the @a Type argument of the CILK_C_DECLARE_REDUCER macro call that created + * the reducer. + * @param Reduce The address of the @ref reducers_c_reduce_func "reduce function" for the + * reducer. + * @param Identity The address of the @ref reducers_c_identity_func "identity function" for + * the reducer. + * @param Destroy The address of the @ref reducers_c_destroy_func "destroy function" for the + * reducer. + * @param ... The initial value for the reducer. (A single expression if @a Type is a + * scalar type; a list of values if @a Type is a struct or array type.) + * + * @see @ref reducers_c_creation */ -#define CILK_C_INIT_REDUCER(T,Reduce,Identity,Destroy, ...) \ - { { { Reduce,Identity,Destroy, \ - __cilkrts_hyperobject_alloc,__cilkrts_hyperobject_dealloc }, \ - 0, __CILKRTS_CACHE_LINE__, sizeof(T) }, __VA_ARGS__ } -/* Register a local reducer. */ +#define CILK_C_INIT_REDUCER(Type, Reduce, Identity, Destroy, ...) \ + { { { Reduce \ + , Identity \ + , Destroy \ + , __cilkrts_hyperobject_alloc \ + , __cilkrts_hyperobject_dealloc \ + } \ + , 0 \ + , __CILKRTS_CACHE_LINE__ \ + , sizeof(Type) \ + } \ + , __VA_ARGS__ \ + } + +/** Register a reducer with the Cilk runtime. + * + * The runtime will manage reducer values for all registered reducers when parallel execution + * strands begin and end. For example: + * + * CILK_C_REGISTER_REDUCER(my_add_int_reducer); + * cilk_for (int i = 0; i != n; ++i) { + * … + * } + * + * @param Expr The reducer to be registered. + * + * @see @ref page_reducers_in_c + */ #define CILK_C_REGISTER_REDUCER(Expr) \ __cilkrts_hyper_create(&(Expr).__cilkrts_hyperbase) -/* Unregister a local reducer. */ +/** Unregister a reducer with the Cilk runtime. + * + * The runtime will stop managing reducer values for a reducer after it is unregistered. For + * example: + * + * cilk_for (int i = 0; i != n; ++i) { + * … + * } + * CILK_C_UNREGISTER_REDUCER(my_add_int_reducer); + * + * @param Expr The reducer to be unregistered. + * + * @see @ref page_reducers_in_c + */ #define CILK_C_UNREGISTER_REDUCER(Expr) \ __cilkrts_hyper_destroy(&(Expr).__cilkrts_hyperbase) -/* Get the current view for a reducer */ +/** Get the current view for a reducer. + * + * The `REDUCER_VIEW(reducer-name)` returns a reference to the reducer’s value for the + * current parallel strand. This can be used to initialize thevalue of the reducer before it + * is used, to modify the value of the reducer on the current parallel strand, or to retrieve + * the final value of the reducer at the end of the parallel computation. + * + * REDUCER_VIEW(my_add_int_reducer) = REDUCER_VIEW(my_add_int_reducer) + x; + * + * @note C++ reducer views restrict access to the wrapped value so that it can only be + * modified in ways consistent with the reducer’s operation. No such protection is provided + * for C reducers. It is entirely the responsibility of the user to refrain from modifying the + * value in any inappropriate way. + * + * @param Expr The reducer whose value is to be returned. + * + * @see @ref page_reducers_in_c + */ #define REDUCER_VIEW(Expr) (*(_Typeof((Expr).value)*) \ __cilkrts_hyper_lookup(&(Expr).__cilkrts_hyperbase)) -__CILKRTS_END_EXTERN_C - -#else /* if defined(CILK_STUB) */ - -/*************************************************************************** - * Stub implementation - ***************************************************************************/ - -__CILKRTS_BEGIN_EXTERN_C - -/* Declare a reducer with 'Type' value type */ -#define CILK_C_DECLARE_REDUCER(Type) struct { \ - Type value; \ - } - -/* Initialize a reducer using the Identity, Reduce, and Destroy functions - * (the monoid) and with an arbitrary-length comma-separated initializer list. - */ -#define CILK_C_INIT_REDUCER(T,Identity,Reduce,Destroy, ...) \ - { __VA_ARGS__ } - -/* Register a local reducer. */ -#define CILK_C_REGISTER_REDUCER(Expr) ((void) Expr) - -/* Unregister a local reducer. */ -#define CILK_C_UNREGISTER_REDUCER(Expr) ((void) Expr) - -/* Get the current view for a reducer */ -#define REDUCER_VIEW(Expr) ((Expr).value) - -__CILKRTS_END_EXTERN_C - -#endif /* CILK_STUB */ +//@} C language reducer macros #endif // CILK_REDUCER_H_INCLUDED diff --git a/libcilkrts/include/cilk/reducer_file.h b/libcilkrts/include/cilk/reducer_file.h index 828a7bf9254..39bae92bd76 100644 --- a/libcilkrts/include/cilk/reducer_file.h +++ b/libcilkrts/include/cilk/reducer_file.h @@ -1,26 +1,31 @@ /* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * */ diff --git a/libcilkrts/include/cilk/reducer_list.h b/libcilkrts/include/cilk/reducer_list.h index d021577f816..b45acb0e915 100644 --- a/libcilkrts/include/cilk/reducer_list.h +++ b/libcilkrts/include/cilk/reducer_list.h @@ -1,517 +1,1122 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. +/* reducer_list.h -*- C++ -*- * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ -/* - * reducer_list.h - * - * Purpose: Reducer hyperobject to accumulate a list of elements. - * - * Classes: reducer_list_append<Type, Allocator> - * reducer_list_prepend<Type, Allocator> - * - * Description: - * ============ - * This component provides reducer-type hyperobject representations that allow - * either prepending or appending values to an STL list. By replacing the - * variable with the hyperobject defined in this component, the data race is - * eliminated. - * - * Usage Example: - * ============== - * Assume we wish to traverse an array of objects, performing an operation on - * each object and accumulating the result of the operation into an STL list - * variable. - *.. - * int compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * std::list<int> result; - * for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * result.push_back(compute(myArray[i])); - * } +/** @file reducer_list.h * - * std::cout << "The result is: "; - * for (std::list<int>::iterator i = result.begin(); i != result.end(); - * ++i) - * { - * std::cout << *i << " " << std::endl; - * } + * @brief Defines classes for doing parallel list creation by appending or + * prepending. * - * return 0; - * } - *.. - * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel, - * but doing so will create a data race on the 'result' list. - * The race is solved by changing 'result' to a 'reducer_list_append' - * hyperobject: - *.. - * int compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * cilk::reducer_list_append<int> result; - * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * result->push_back(compute(myArray[i])); - * } + * @ingroup ReducersList * - * std::cout << "The result is: "; - * const std::list &r = result->get_value(); - * for (std::list<int>::const_iterator i = r.begin(); i != r.end(); ++i) + * @see ReducersList + */ + +#ifndef REDUCER_LIST_H_INCLUDED +#define REDUCER_LIST_H_INCLUDED + +#include <cilk/reducer.h> +#include <list> + +/** @defgroup ReducersList List Reducers + * + * List append and prepend reducers allow the creation of a standard list by + * concatenating a set of lists or values in parallel. + * + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file `reducers.md`, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redlist_usage Usage Example + * + * // Create a list containing the labels of the nodes of a tree in + * // “inorder” (left subtree, root, right subtree). + * + * struct Tree { Tree* left; Tree* right; string label; ... }; + * + * list<string> x; + * cilk::reducer< cilk::op_list_append<string> > xr(cilk::move_in(x)); + * collect_labels(tree, xr); + * xr.move_out(x); + * + * void collect_labels(Tree* node, + * cilk::reducer< cilk::op_list_append<string> >& xr) * { - * std::cout << *i << " " << std::endl; + * if (node) { + * cilk_spawn collect_labels(node->left, xr); + * xr->push_back(node->label); + * collect_labels(node->right, xr); + * cilk_sync; + * } * } * - * return 0; - * } - *.. + * @section redlist_monoid The Monoid + * + * @subsection redlist_monoid_values Value Set + * + * The value set of a list reducer is the set of values of the class + * `std::list<Type, Allocator>`, which we refer to as “the reducer’s list + * type”. + * + * @subsection redlist_monoid_operator Operator + * + * The operator of a list append reducer is defined as + * + * x CAT y == (every element of x, followed by every element of y) + * + * The operator of a list prepend reducer is defined as + * + * x RCAT y == (every element of y, followed by every element of x) + * + * @subsection redlist_monoid_identity Identity + * + * The identity value of a list reducer is the empty list, which is the value + * of the expression `std::list<Type, Allocator>([allocator])`. + * + * @section redlist_operations Operations + * + * In the operation descriptions below, the type name `List` refers to the + * reducer’s string type, `std::list<Type, Allocator>`. + * + * @subsection redlist_constructors Constructors + * + * Any argument list which is valid for a `std::list` constructor is valid for + * a list reducer constructor. The usual move-in constructor is also provided: + * + * reducer(move_in(List& variable)) + * + * A list reducer with no constructor arguments, or with only an allocator + * argument, will initially contain the identity value, an empty list. + * + * @subsection redlist_get_set Set and Get + * + * r.set_value(const List& value) + * const List& = r.get_value() const + * r.move_in(List& variable) + * r.move_out(List& variable) + * + * @subsection redlist_view_ops View Operations + * + * The view of a list append reducer provides the following member functions: + * + * void push_back(const Type& element) + * void insert_back(List::size_type n, const Type& element) + * template <typename Iter> void insert_back(Iter first, Iter last) + * void splice_back(List& x) + * void splice_back(List& x, List::iterator i) + * void splice_back(List& x, List::iterator first, List::iterator last) + * + * The view of a list prepend reducer provides the following member functions: + * + * void push_front(const Type& element) + * void insert_front(List::size_type n, const Type& element) + * template <typename Iter> void insert_front(Iter first, Iter last) + * void splice_front(List& x) + * void splice_front(List& x, List::iterator i) + * void splice_front(List& x, List::iterator first, List::iterator last) + * + * The `push_back` and `push_front` functions are the same as the + * corresponding `std::list` functions. The `insert_back`, `splice_back`, + * `insert_front`, and `splice_front` functions are the same as the + * `std::list` `insert` and `splice` functions, with the first parameter + * fixed to the end or beginning of the list, respectively. + * + * @section redlist_performance Performance Considerations + * + * An efficient reducer requires that combining the values of two views (using + * the view `reduce()` function) be a constant-time operations. Two lists can + * be merged in constant time using the `splice()` function if they have the + * same allocator. Therefore, the lists for new views are created (by the view + * identity constructor) using the same allocator as the list that was created + * when the reducer was constructed. * - * Operations provided: - * ==================== + * The performance of adding elements to a list reducer depends on the view + * operations that are used: * - * 'reducer_list_prepend' and 'reducer_list_append' support accumulation of an - * ordered list of items. Lists accumulated in Cilk++ strands will be merged - * to maintain the order of the lists - the order will be the same as if the - * application was run on a single core. + * * The `push` functions add a single element to the list, and therefore + * take constant time. + * * An `insert` function that inserts _N_ elements adds each of them + * individually, and therefore takes _O(N)_ time. + * * A `splice` function that inserts _N_ elements just adjusts a couple of + * pointers, and therefore takes constant time, _if the splice is from a + * list with the same allocator as the reducer_. Otherwise, it is + * equivalent to an `insert`, and takes _O(N)_ time. * - * The the current value of the reducer can be gotten and set using the - * 'get_value', 'get_reference', and 'set_value' methods. As with most - * reducers, these methods produce deterministic results only if called before - * the first spawn after creating a 'hyperobject' or when all strands spawned - * since creating the 'hyperobject' have been synced. + * This means that for best performance, if you will be adding elements to a + * list reducer in batches, you should `splice` them from a list having the + * same allocator as the reducer. + * + * The reducer `move_in` and `move_out` functions do a constant-time `swap` if + * the variable has the same allocator as the reducer, and a linear-time copy + * otherwise. + * + * Note that the allocator of a list reducer is determined when the reducer is + * constructed. The following two examples may have very different behavior: + * + * list<Element, Allocator> a_list; + * + * reducer< list_append<Element, Allocator> reducer1(move_in(a_list)); + * ... parallel computation ... + * reducer1.move_out(a_list); + * + * reducer< list_append<Element, Allocator> reducer2; + * reducer2.move_in(a_list); + * ... parallel computation ... + * reducer2.move_out(a_list); + * + * * `reducer1` will be constructed with the same allocator as `a_list`, + * because the list was was specified in the constructor. The `move_in` + * and`move_out` can therefore be done with a `swap` in constant time. + * * `reducer2` will be constructed with a _default_ allocator, + * “`Allocator()`”, which may or may not be the same as the allocator of + * `a_list`. Therefore, the `move_in` and `move_out` may have to be done + * with a copy in _O(N)_ time. + * + * (All instances of an allocator type with no internal state (like + * `std::allocator`) are “the same”. You only need to worry about the “same + * allocator” issue when you create list reducers with custom allocator types.) + * + * @section redlist_types Type and Operator Requirements + * + * `std::list<Type, Allocator>` must be a valid type. */ -#ifndef REDUCER_LIST_H_INCLUDED -#define REDUCER_LIST_H_INCLUDED -#include <cilk/reducer.h> -#include <list> +namespace cilk { -namespace cilk -{ +namespace internal { -/** - * @brief Reducer hyperobject to accumulate a list of elements where elements - * are added to the end of the list. +/** @ingroup ReducersList */ +//@{ + +/** Base class for list append and prepend view classes. + * + * @note This class provides the definitions that are required for a class + * that will be used as the parameter of a @ref list_monoid_base + * specialization. + * + * @tparam Type The list element type (not the list type). + * @tparam Allocator The list's allocator class. + * + * @see ReducersList + * @see list_monoid_base */ -template<class _Ty, - class _Ax = std::allocator<_Ty> > -class reducer_list_append +template <typename Type, typename Allocator> +class list_view_base { -public: - /// std::list reducer_list_prepend is based on - typedef std::list<_Ty, _Ax> list_type; - /// Type of elements in a reducer_list_prepend - typedef _Ty list_value_type; - /// Type of elements in a reducer_list_prepend - typedef _Ty basic_value_type; +protected: + /// The type of the contained list. + typedef std::list<Type, Allocator> list_type; + + /// The list accumulator variable. + list_type m_value; public: - /// Definition of data view, operation, and identity for reducer_list_append - struct Monoid: monoid_base<std::list<_Ty, _Ax> > - { - static void reduce (std::list<_Ty, _Ax> *left, - std::list<_Ty, _Ax> *right); - }; -private: - reducer<Monoid> imp_; + /** @name Monoid support. + */ + //@{ + + /// Required by @ref monoid_with_view + typedef list_type value_type; -public: + /// Required by @ref list_monoid_base + Allocator get_allocator() const + { + return m_value.get_allocator(); + } + + //@} + + + /** @name Constructors. + */ + //@{ + + /// Standard list constructor. + explicit list_view_base(const Allocator& a = Allocator()) : m_value(a) {} + explicit list_view_base( + typename list_type::size_type n, + const Type& value = Type(), + const Allocator& a = Allocator() ) : m_value(n, value, a) {} + template <typename Iter> + list_view_base(Iter first, Iter last, const Allocator& a = Allocator()) : + m_value(first, last, a) {} + list_view_base(const list_type& list) : m_value(list) {} - // Default Constructor - Construct a reducer with an empty list - reducer_list_append(); + /// Move-in constructor. + explicit list_view_base(move_in_wrapper<value_type> w) + : m_value(w.value().get_allocator()) + { + m_value.swap(w.value()); + } + + //@} + + /** @name Reducer support. + */ + //@{ + + /// Required by reducer::move_in() + void view_move_in(value_type& v) + { + if (m_value.get_allocator() == v.get_allocator()) + // Equal allocators. Do a (fast) swap. + m_value.swap(v); + else + // Unequal allocators. Do a (slow) copy. + m_value = v; + v.clear(); + } + + /// Required by reducer::move_out() + void view_move_out(value_type& v) + { + if (m_value.get_allocator() == v.get_allocator()) + // Equal allocators. Do a (fast) swap. + m_value.swap(v); + else + // Unequal allocators. Do a (slow) copy. + v = m_value; + m_value.clear(); + } + + /// Required by reducer::set_value() + void view_set_value(const value_type& v) { m_value = v; } - // Construct a reducer with an initial list - reducer_list_append(const std::list<_Ty, _Ax> &initial_value); + /// Required by reducer::get_value() + value_type const& view_get_value() const { return m_value; } + + // Required by legacy wrapper get_reference() + value_type & view_get_reference() { return m_value; } + value_type const& view_get_reference() const { return m_value; } + + //@} +}; - // Return a const reference to the current list - const std::list<_Ty, _Ax> &get_value() const; - // Return a reference to the current list - std::list<_Ty, _Ax> &get_reference(); - std::list<_Ty, _Ax> const &get_reference() const; +/** Base class for list append and prepend monoid classes. + * + * The key to efficient reducers is that the `identity` operation, which + * creates a new per-strand view, and the `reduce` operation, which combines + * two per-strand views, must be constant-time operations. Two lists can be + * concatenated in constant time only if they have the same allocator. + * Therefore, all the per-strand list accumulator variables must be created + * with the same allocator as the leftmost view list. + * + * This means that a list reduction monoid must have a copy of the allocator + * of the leftmost view’s list, so that it can use it in the `identity` + * operation. This, in turn, requires that list reduction monoids have a + * specialized `construct()` function, which constructs the leftmost view + * before the monoid, and then passes the leftmost view’s allocator to the + * monoid constructor. + * + * @tparam View The list append or prepend view class. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. + * + * @see ReducersList + * @see list_view_base + */ +template <typename View, bool Align> +class list_monoid_base : public monoid_with_view<View, Align> +{ + typedef typename View::value_type list_type; + typedef typename list_type::allocator_type allocator_type; + allocator_type m_allocator; + + using monoid_base<list_type, View>::provisional; + +public: - // Replace the list's contents with the given list - void set_value(const list_type &value); + /** Constructor. + * + * There is no default constructor for list monoids, because the allocator + * must always be specified. + * + * @param allocator The list allocator to be used when + * identity-constructing new views. + */ + list_monoid_base(const allocator_type& allocator = allocator_type()) : + m_allocator(allocator) {} - // Add an element to the end of the list - void push_back(const _Ty element); + /** Create an identity view. + * + * List view identity constructors take the list allocator as an argument. + * + * @param v The address of the uninitialized memory in which the view + * will be constructed. + */ + void identity(View *v) const { ::new((void*) v) View(m_allocator); } + + /** @name construct functions + * + * All `construct()` functions first construct the leftmost view, using + * the optional @a x1, @a x2, and @a x3 arguments that were passed in from + * the reducer constructor. They then call the view’s `get_allocator()` + * function to get the list allocator from its contained list, and pass it + * to the monoid constructor. + */ + //@{ - reducer_list_append& operator*() { return *this; } - reducer_list_append const& operator*() const { return *this; } + template <typename Monoid> + static void construct(Monoid* monoid, View* view) + { provisional( new ((void*)view) View() ).confirm_if( + new ((void*)monoid) Monoid(view->get_allocator()) ); } - reducer_list_append* operator->() { return this; } - reducer_list_append const* operator->() const { return this; } + template <typename Monoid, typename T1> + static void construct(Monoid* monoid, View* view, const T1& x1) + { provisional( new ((void*)view) View(x1) ).confirm_if( + new ((void*)monoid) Monoid(view->get_allocator()) ); } -private: - // Not copyable - reducer_list_append(const reducer_list_append&); - reducer_list_append& operator=(const reducer_list_append&); + template <typename Monoid, typename T1, typename T2> + static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2) + { provisional( new ((void*)view) View(x1, x2) ).confirm_if( + new ((void*)monoid) Monoid(view->get_allocator()) ); } -}; // class reducer_list_append + template <typename Monoid, typename T1, typename T2, typename T3> + static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2, + const T3& x3) + { provisional( new ((void*)view) View(x1, x2, x3) ).confirm_if( + new ((void*)monoid) Monoid(view->get_allocator()) ); } -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// + //@} +}; -// ------------------------------------------ -// template class reducer_list_append::Monoid -// ------------------------------------------ +//@} -/** - * Appends list from "right" reducer_list onto the end of the "left". - * When done, the "right" reducer_list is empty. - * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @param left reducer_list to be reduced into - * @param right reducer_list to be reduced from - */ -template<class _Ty, class _Ax> -void -reducer_list_append<_Ty, _Ax>::Monoid::reduce(std::list<_Ty, _Ax> *left, - std::list<_Ty, _Ax> *right) -{ - left->splice(left->end(), *right); -} +} // namespace internal -/** - * Default constructor - creates an empty list - * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - */ -template<class _Ty, class _Ax> -reducer_list_append<_Ty, _Ax>::reducer_list_append() : - imp_() -{ -} -/** - * Construct a reducer_list_append based on a list - * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @param initial_value - [in] Inital list - */ -template<class _Ty, class _Ax> -reducer_list_append<_Ty, _Ax>::reducer_list_append(const std::list<_Ty, _Ax> &initial_value) : - imp_(std::list<_Ty, _Ax>(initial_value)) -{ -} +/** @ingroup ReducersList */ +//@{ -/** - * Allows read-only access to the list - same as get_reference() +/** The list append reducer view class. * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial result. + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_list_append<Type, Allocator> >`. It holds the + * accumulator variable for the reduction, and allows only append operations + * to be performed on it. * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @returns A const reference to the list that is the current contents of this view. - */ -template<class _Ty, class _Ax> -const std::list<_Ty, _Ax> &reducer_list_append<_Ty, _Ax>::get_value() const -{ - return imp_.view(); -} - -/** - * Allows mutable access to list + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `push_back` operation would be used in an expression like + * `r->push_back(a)`, where `r` is a list append reducer variable. * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial result. + * @tparam Type The list element type (not the list type). + * @tparam Allocator The list allocator type. * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @returns A reference to the list that is the current contents of this view. + * @see ReducersList + * @see op_list_append */ -template<class _Ty, class _Ax> -std::list<_Ty, _Ax> &reducer_list_append<_Ty, _Ax>::get_reference() +template <class Type, + class Allocator = typename std::list<Type>::allocator_type> +class op_list_append_view : public internal::list_view_base<Type, Allocator> { - return imp_.view(); -} + typedef internal::list_view_base<Type, Allocator> base; + typedef std::list<Type, Allocator> list_type; + typedef typename list_type::iterator iterator; + + iterator end() { return this->m_value.end(); } -/** - * Allows read-only access to list - * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial result. - * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @returns A const reference to the list that is the current contents of this view - */ -template<class _Ty, class _Ax> -const std::list<_Ty, _Ax> &reducer_list_append<_Ty, _Ax>::get_reference() const -{ - return imp_.view(); -} +public: -/** - * Replace the list's contents - * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @param value - The list to replace the current contents of this view - */ -template<class _Ty, class _Ax> -void reducer_list_append<_Ty, _Ax>::set_value(const list_type &value) -{ - // Clean out any value in our list - imp_.view().clear(); + /** @name Constructors. + * + * All op_list_append_view constructors simply pass their arguments on to + * the @ref internal::list_view_base base class constructor. + * + * @ref internal::list_view_base supports all the std::list constructor + * forms, as well as the reducer move_in constructor form. + */ + //@{ + + op_list_append_view() : base() {} + + template <typename T1> + op_list_append_view(const T1& x1) : base(x1) {} + + template <typename T1, typename T2> + op_list_append_view(const T1& x1, const T2& x2) : base(x1, x2) {} + + template <typename T1, typename T2, typename T3> + op_list_append_view(const T1& x1, const T2& x2, const T3& x3) : + base(x1, x2, x3) {} - // If the new list is empty, we're done - if (value.empty()) - return; + //@} - // Copy each element into our list - imp_.view() = value; -} + /** @name View modifier operations. + */ + //@{ + + /** Add an element at the end of the list. + * + * This is equivalent to `list.push_back(element)` + */ + void push_back(const Type& element) + { this->m_value.push_back(element); } -/** - * Adds an element to the end of the list - * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @param element - The element to be added to the end of the list - */ -template<class _Ty, class _Ax> -void reducer_list_append<_Ty, _Ax>::push_back(const _Ty element) -{ - imp_.view().push_back(element); -} + /** Insert elements at the end of the list. + * + * This is equivalent to `list.insert(list.end(), n, element)` + */ + void insert_back(typename list_type::size_type n, const Type& element) + { this->m_value.insert(end(), n, element); } -/** - * @brief Reducer hyperobject to accumulate a list of elements where elements are - * added to the beginning of the list. - */ -template<class _Ty, - class _Ax = std::allocator<_Ty> > -class reducer_list_prepend -{ -public: - /// std::list reducer_list_prepend is based on - typedef std::list<_Ty, _Ax> list_type; - /// Type of elements in a reducer_list_prepend - typedef _Ty list_value_type; - /// Type of elements in a reducer_list_prepend - typedef _Ty basic_value_type; + /** Insert elements at the end of the list. + * + * This is equivalent to `list.insert(list.end(), first, last)` + */ + template <typename Iter> + void insert_back(Iter first, Iter last) + { this->m_value.insert(end(), first, last); } -public: - /// @brief Definition of data view, operation, and identity for reducer_list_prepend - struct Monoid: monoid_base<std::list<_Ty, _Ax> > - { - static void reduce (std::list<_Ty, _Ax> *left, - std::list<_Ty, _Ax> *right); - }; + /** Splice elements at the end of the list. + * + * This is equivalent to `list.splice(list.end(), x)` + */ + void splice_back(list_type& x) { + if (x.get_allocator() == this->m_value.get_allocator()) + this->m_value.splice(end(), x); + else { + insert_back(x.begin(), x.end()); + x.clear(); + } + } -private: - reducer<Monoid> imp_; + /** Splice elements at the end of the list. + * + * This is equivalent to `list.splice(list.end(), x, i)` + */ + void splice_back(list_type& x, iterator i) { + if (x.get_allocator() == this->m_value.get_allocator()) + this->m_value.splice(end(), x, i); + else { + push_back(*i); + x.erase(i); + } + } -public: + /** Splice elements at the end of the list. + * + * This is equivalent to `list.splice(list.end(), x, first, last)` + */ + void splice_back(list_type& x, iterator first, iterator last) { + if (x.get_allocator() == this->m_value.get_allocator()) + this->m_value.splice(end(), x, first, last); + else { + insert_back(first, last); + x.erase(first, last); + } + } + + //@} - // Default Constructor - Construct a reducer with an empty list - reducer_list_prepend(); + /** Reduction operation. + * + * This function is invoked by the @ref op_list_append monoid to combine + * the views of two strands when the right strand merges with the left + * one. It appends the value contained in the right-strand view to the + * value contained in the left-strand view, and leaves the value in the + * right-strand view undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_list_append monoid to implement the + * monoid reduce operation. + */ + void reduce(op_list_append_view* right) + { + __CILKRTS_ASSERT( + this->m_value.get_allocator() == right->m_value.get_allocator()); + this->m_value.splice(end(), right->m_value); + } +}; - // Construct a reducer with an initial list - reducer_list_prepend(const std::list<_Ty, _Ax> &initial_value); - // Return a const reference to the current list - const std::list<_Ty, _Ax> &get_value() const; +/** The list prepend reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_list_prepend<Type, Allocator> >`. It holds the + * accumulator variable for the reduction, and allows only prepend operations + * to be performed on it. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `push_front` operation would be used in an expression like + * `r->push_front(a)`, where `r` is a list prepend reducer variable. + * + * @tparam Type The list element type (not the list type). + * @tparam Allocator The list allocator type. + * + * @see ReducersList + * @see op_list_prepend + */ +template <class Type, + class Allocator = typename std::list<Type>::allocator_type> +class op_list_prepend_view : public internal::list_view_base<Type, Allocator> +{ + typedef internal::list_view_base<Type, Allocator> base; + typedef std::list<Type, Allocator> list_type; + typedef typename list_type::iterator iterator; + + iterator begin() { return this->m_value.begin(); } - // Return a reference to the current list - std::list<_Ty, _Ax> &get_reference(); - std::list<_Ty, _Ax> const &get_reference() const; +public: - // Replace the list's contents with the given list - void set_value(const list_type &value); + /** @name Constructors. + * + * All op_list_prepend_view constructors simply pass their arguments on to + * the @ref internal::list_view_base base class constructor. + * + * @ref internal::list_view_base supports all the std::list constructor + * forms, as well as the reducer move_in constructor form. + * + */ + //@{ + + op_list_prepend_view() : base() {} + + template <typename T1> + op_list_prepend_view(const T1& x1) : base(x1) {} + + template <typename T1, typename T2> + op_list_prepend_view(const T1& x1, const T2& x2) : base(x1, x2) {} + + template <typename T1, typename T2, typename T3> + op_list_prepend_view(const T1& x1, const T2& x2, const T3& x3) : + base(x1, x2, x3) {} - // Add an element to the beginning of the list - void push_front(const _Ty element); + //@} - reducer_list_prepend& operator*() { return *this; } - reducer_list_prepend const& operator*() const { return *this; } + /** @name View modifier operations. + */ + //@{ + + /** Add an element at the beginning of the list. + * + * This is equivalent to `list.push_front(element)` + */ + void push_front(const Type& element) + { this->m_value.push_front(element); } - reducer_list_prepend* operator->() { return this; } - reducer_list_prepend const* operator->() const { return this; } + /** Insert elements at the beginning of the list. + * + * This is equivalent to `list.insert(list.begin(), n, element)` + */ + void insert_front(typename list_type::size_type n, const Type& element) + { this->m_value.insert(begin(), n, element); } -private: - // Not copyable - reducer_list_prepend(const reducer_list_prepend&); - reducer_list_prepend& operator=(const reducer_list_prepend&); + /** Insert elements at the beginning of the list. + * + * This is equivalent to `list.insert(list.begin(), first, last)` + */ + template <typename Iter> + void insert_front(Iter first, Iter last) + { this->m_value.insert(begin(), first, last); } -}; // class reducer_list_prepend + /** Splice elements at the beginning of the list. + * + * This is equivalent to `list.splice(list.begin(), x)` + */ + void splice_front(list_type& x) { + if (x.get_allocator() == this->m_value.get_allocator()) + this->m_value.splice(begin(), x); + else { + insert_front(x.begin(), x.begin()); + x.clear(); + } + } -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// + /** Splice elements at the beginning of the list. + * + * This is equivalent to `list.splice(list.begin(), x, i)` + */ + void splice_front(list_type& x, iterator i) { + if (x.get_allocator() == this->m_value.get_allocator()) + this->m_value.splice(begin(), x, i); + else { + push_front(*i); + x.erase(i); + } + } -// ------------------------------------ -// template class reducer_list_prepend::Monoid -// ------------------------------------ + /** Splice elements at the beginning of the list. + * + * This is equivalent to `list.splice(list.begin(), x, first, last)` + */ + void splice_front(list_type& x, iterator first, iterator last) { + if (x.get_allocator() == this->m_value.get_allocator()) + this->m_value.splice(begin(), x, first, last); + else { + insert_front(first, last); + x.erase(first, last); + } + } + + //@} + + /** Reduction operation. + * + * This function is invoked by the @ref op_list_prepend monoid to combine + * the views of two strands when the right strand merges with the left + * one. It prepends the value contained in the right-strand view to the + * value contained in the left-strand view, and leaves the value in the + * right-strand view undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_list_prepend monoid to implement the + * monoid reduce operation. + */ + /** Reduce operation. + * + * Required by @ref monoid_base. + */ + void reduce(op_list_prepend_view* right) + { + __CILKRTS_ASSERT( + this->m_value.get_allocator() == right->m_value.get_allocator()); + this->m_value.splice(begin(), right->m_value); + } +}; -/** - * Appends list from "right" reducer_list onto the end of the "left". - * When done, the "right" reducer_list is empty. - */ -template<class _Ty, class _Ax> -void -reducer_list_prepend<_Ty, _Ax>::Monoid::reduce(std::list<_Ty, _Ax> *left, - std::list<_Ty, _Ax> *right) -{ - left->splice(left->begin(), *right); -} -/** - * Default constructor - creates an empty list - */ -template<class _Ty, class _Ax> -reducer_list_prepend<_Ty, _Ax>::reducer_list_prepend() : - imp_(std::list<_Ty, _Ax>()) -{ -} -/** - * Construct a reducer_list_prepend based on a list. +/** Monoid class for list append reductions. Instantiate the cilk::reducer + * template class with a op_list_append monoid to create a list append reducer + * class. For example, to create a list of strings: + * + * cilk::reducer< cilk::op_list_append<std::string> > r; * - * @param initial_value List used to initialize the reducer_list_prepend + * @tparam Type The list element type (not the list type). + * @tparam Alloc The list allocator type. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. + * + * @see ReducersList + * @see op_list_append_view */ -template<class _Ty, class _Ax> -reducer_list_prepend<_Ty, _Ax>::reducer_list_prepend(const std::list<_Ty, _Ax> &initial_value) : - imp_(std::list<_Ty, _Ax>(initial_value)) +template <typename Type, + typename Allocator = typename std::list<Type>::allocator_type, + bool Align = false> +struct op_list_append : + public internal::list_monoid_base<op_list_append_view<Type, Allocator>, Align> { -} + /// Construct with default allocator. + op_list_append() {} + /// Construct with specified allocator. + op_list_append(const Allocator& alloc) : + internal::list_monoid_base<op_list_append_view<Type, Allocator>, Align>(alloc) {} +}; -/** - * Allows read-only access to the list - same as get_reference() +/** Monoid class for list prepend reductions. Instantiate the cilk::reducer + * template class with a op_list_prepend monoid to create a list prepend + * reducer class. For example, to create a list of strings: + * + * cilk::reducer< cilk::op_list_prepend<std::string> > r; * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial result. + * @tparam Type The list element type (not the list type). + * @tparam Alloc The list allocator type. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @returns A const reference to the list that is the current contents of this view. + * @see ReducersList + * @see op_list_prepend_view */ -template<class _Ty, class _Ax> -const std::list<_Ty, _Ax> &reducer_list_prepend<_Ty, _Ax>::get_value() const +template <typename Type, + typename Allocator = typename std::list<Type>::allocator_type, + bool Align = false> +struct op_list_prepend : + public internal::list_monoid_base<op_list_prepend_view<Type, Allocator>, Align> { - return imp_.view(); -} + /// Construct with default allocator. + op_list_prepend() {} + /// Construct with specified allocator. + op_list_prepend(const Allocator& alloc) : + internal::list_monoid_base<op_list_prepend_view<Type, Allocator>, Align>(alloc) {} +}; + -/** - * Allows mutable access to the list +/** Deprecated list append reducer wrapper class. * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial result. + * reducer_list_append is the same as + * @ref reducer<@ref op_list_append>, except that reducer_list_append is a + * proxy for the contained view, so that accumulator variable update + * operations can be applied directly to the reducer. For example, an element + * is appended to a `reducer<%op_list_append>` with `r->push_back(a)`, but an + * element can be appended to a `%reducer_list_append` with `r.push_back(a)`. * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @returns A mutable reference to the list that is the current contents of this view. + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_list_append. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_list_append` + * and `reducer<%op_list_append>`. This allows incremental code + * conversion: old code that used `%reducer_list_append` can pass a + * `%reducer_list_append` to a converted function that now expects a + * pointer or reference to a `reducer<%op_list_append>`, and vice + * versa. + * + * @tparam Type The value type of the list. + * @tparam Allocator The allocator type of the list. + * + * @see op_list_append + * @see reducer + * @see ReducersList */ -template<class _Ty, class _Ax> -std::list<_Ty, _Ax> &reducer_list_prepend<_Ty, _Ax>::get_reference() +template <class Type, class Allocator = std::allocator<Type> > +class reducer_list_append : + public reducer<op_list_append<Type, Allocator, true> > { - return imp_.view(); -} + typedef reducer<op_list_append<Type, Allocator, true> > base; + using base::view; +public: + + /// The reducer’s list type. + typedef typename base::value_type list_type; -/** - * Allows read-only access to the list + /// The list’s element type. + typedef Type list_value_type; + + /// The reducer’s primitive component type. + typedef Type basic_value_type; + + /// The monoid type. + typedef typename base::monoid_type Monoid; + + /** @name Constructors + */ + //@{ + + /** Construct a reducer with an empty list. + */ + reducer_list_append() {} + + /** Construct a reducer with a specified initial list value. + */ + reducer_list_append(const std::list<Type, Allocator> &initial_value) : + base(initial_value) {} + + //@} + + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_and_view. */ + //@{ + + /// @copydoc op_list_append_view::push_back(const Type&) + void push_back(const Type& element) { view().push_back(element); } + + //@} + + /** Allow mutable access to the list within the current view. + * + * @warning If this method is called before the parallel calculation is + * complete, the list returned by this method will be a partial + * result. + * + * @returns A mutable reference to the list within the current view. + */ + list_type &get_reference() { return view().view_get_reference(); } + + /** Allow read-only access to the list within the current view. + * + * @warning If this method is called before the parallel calculation is + * complete, the list returned by this method will be a partial + * result. + * + * @returns A const reference to the list within the current view. + */ + list_type const &get_reference() const { return view().view_get_reference(); } + + /// @name Dereference + //@{ + /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. + * Combined with the rule that a wrapper forwards view operations to the + * view, this means that view operations can be written the same way on + * reducers and wrappers, which is convenient for incrementally + * converting code using wrappers to code using reducers. That is: + * + * reducer< op_list_append<int> > r; + * r->push_back(a); // *r returns the view + * // push_back is a view member function + * + * reducer_list_append<int> w; + * w->push_back(a); // *w returns the wrapper + * // push_back is a wrapper member function that + * // calls the corresponding view function + */ + //@{ + reducer_list_append& operator*() { return *this; } + reducer_list_append const& operator*() const { return *this; } + + reducer_list_append* operator->() { return this; } + reducer_list_append const* operator->() const { return this; } + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_list_append<Type, Allocator, false> >& () + { + return *reinterpret_cast< + reducer< op_list_append<Type, Allocator, false> >* + >(this); + } + operator const reducer< op_list_append<Type, Allocator, false> >& () const + { + return *reinterpret_cast< + const reducer< op_list_append<Type, Allocator, false> >* + >(this); + } + //@} + +}; + + +/** Deprecated list prepend reducer wrapper class. * - * @warning If this method is called before the parallel calculation is - * complete, the list returned by this method will be a partial result. + * reducer_list_prepend is the same as + * @ref reducer<@ref op_list_prepend>, except that reducer_list_prepend is a + * proxy for the contained view, so that accumulator variable update operations + * can be applied directly to the reducer. For example, an element is prepended + * to a `reducer<op_list_prepend>` with `r->push_back(a)`, but an element is + * prepended to a `reducer_list_prepend` with `r.push_back(a)`. * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @returns A read-only reference to the list that is the current contents of this view. + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_list_prepend. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_list_prepend` + * and `reducer<%op_list_prepend>`. This allows incremental code + * conversion: old code that used `%reducer_list_prepend` can pass a + * `%reducer_list_prepend` to a converted function that now expects a + * pointer or reference to a `reducer<%op_list_prepend>`, and vice + * versa. + * + * @tparam Type The value type of the list. + * @tparam Allocator The allocator type of the list. + * + * @see op_list_prepend + * @see reducer + * @see ReducersList */ -template<class _Ty, class _Ax> -const std::list<_Ty, _Ax> &reducer_list_prepend<_Ty, _Ax>::get_reference() const +template <class Type, class Allocator = std::allocator<Type> > +class reducer_list_prepend : + public reducer<op_list_prepend<Type, Allocator, true> > { - return imp_.view(); -} + typedef reducer<op_list_prepend<Type, Allocator, true> > base; + using base::view; +public: + + /** The reducer’s list type. + */ + typedef typename base::value_type list_type; -/** - * Replace the list's contents + /** The list’s element type. + */ + typedef Type list_value_type; + + /** The reducer’s primitive component type. + */ + typedef Type basic_value_type; + + /** The monoid type. + */ + typedef typename base::monoid_type Monoid; + + /** @name Constructors + */ + //@{ + + /** Construct a reducer with an empty list. + */ + reducer_list_prepend() {} + + /** Construct a reducer with a specified initial list value. + */ + reducer_list_prepend(const std::list<Type, Allocator> &initial_value) : + base(initial_value) {} + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_and_view. + */ + //@{ + + /// @copydoc op_list_prepend_view::push_front(const Type&) + void push_front(const Type& element) { view().push_front(element); } + + //@} + + /** Allow mutable access to the list within the current view. + * + * @warning If this method is called before the parallel calculation is + * complete, the list returned by this method will be a partial + * result. + * + * @returns A mutable reference to the list within the current view. + */ + list_type &get_reference() { return view().view_get_reference(); } + + /** Allow read-only access to the list within the current view. + * + * @warning If this method is called before the parallel calculation is + * complete, the list returned by this method will be a partial + * result. + * + * @returns A const reference to the list within the current view. + */ + list_type const &get_reference() const { return view().view_get_reference(); } + + /// @name Dereference + /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. + * Combined with the rule that a wrapper forwards view operations to the + * view, this means that view operations can be written the same way on + * reducers and wrappers, which is convenient for incrementally + * converting code using wrappers to code using reducers. That is: + * + * reducer< op_list_prepend<int> > r; + * r->push_front(a); // *r returns the view + * // push_front is a view member function + * + * reducer_list_prepend<int> w; + * w->push_front(a); // *w returns the wrapper + * // push_front is a wrapper member function that + * // calls the corresponding view function + */ + //@{ + reducer_list_prepend& operator*() { return *this; } + reducer_list_prepend const& operator*() const { return *this; } + + reducer_list_prepend* operator->() { return this; } + reducer_list_prepend const* operator->() const { return this; } + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_list_prepend<Type, Allocator, false> >& () + { + return *reinterpret_cast< + reducer< op_list_prepend<Type, Allocator, false> >* + >(this); + } + operator const reducer< op_list_prepend<Type, Allocator, false> >& () const + { + return *reinterpret_cast< + const reducer< op_list_prepend<Type, Allocator, false> >* + >(this); + } + //@} + +}; + +/// @cond internal + +/** Metafunction specialization for reducer conversion. * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @param value - The list to replace the current contents of this view + * This specialization of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes the `reducer< op_list_append<Type, Allocator> >` + * class to have an `operator reducer_list_append<Type, Allocator>& ()` + * conversion operator that statically downcasts the `reducer<op_list_append>` + * to the corresponding `reducer_list_append` type. (The reverse conversion, + * from `reducer_list_append` to `reducer<op_list_append>`, is just an upcast, + * which is provided for free by the language.) */ -template<class _Ty, class _Ax> -void reducer_list_prepend<_Ty, _Ax>::set_value(const list_type &value) +template <class Type, class Allocator, bool Align> +struct legacy_reducer_downcast<reducer<op_list_append<Type, Allocator, Align> > > { - // Clean out any value in our list - imp_.view().clear(); - - // If the new list is empty, we're done - if (value.empty()) - return; - - // Copy each element into our list - imp_.view() = value; -} + typedef reducer_list_append<Type, Allocator> type; +}; -/** - * Add an element to the beginning of the list +/** Metafunction specialization for reducer conversion. * - * @tparam _Ty - Type of the list elements - * @tparam _Ax - Allocator object used to define the storage allocation - * model. If not specified, the allocator class template for _Ty is used. - * @param element Element to be added to the beginning of the list + * This specialization of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes the + * `reducer< op_list_prepend<Type, Allocator> >` class to have an + * `operator reducer_list_prepend<Type, Allocator>& ()` conversion operator + * that statically downcasts the `reducer<op_list_prepend>` to the + * corresponding `reducer_list_prepend` type. (The reverse conversion, from + * `reducer_list_prepend` to `reducer<op_list_prepend>`, is just an upcast, + * which is provided for free by the language.) */ -template<class _Ty, class _Ax> -void reducer_list_prepend<_Ty, _Ax>::push_front(const _Ty element) +template <class Type, class Allocator, bool Align> +struct legacy_reducer_downcast<reducer<op_list_prepend<Type, Allocator, Align> > > { - imp_.view().push_front(element); -} + typedef reducer_list_prepend<Type, Allocator> type; +}; + +/// @endcond + +//@} -} // namespace cilk +} // Close namespace cilk #endif // REDUCER_LIST_H_INCLUDED diff --git a/libcilkrts/include/cilk/reducer_max.h b/libcilkrts/include/cilk/reducer_max.h index 0b1c2960d63..2a5b1bd8934 100644 --- a/libcilkrts/include/cilk/reducer_max.h +++ b/libcilkrts/include/cilk/reducer_max.h @@ -1,1016 +1,41 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. - * - */ - -/* - * reducer_max.h - * - * Purpose: Reducer hyperobject to retain the max value. - * - * Classes: reducer_max<Type, Compare=std::less<Type> > - * reducer_max_index<Index, Value, Compare=std::less<Type> > - * - * Description: - * ============ - * This component provides reducer-type hyperobject representations that allow - * the maximum value, or the maximum value and an index, of a group of values to - * be determined in parallel. - * - * Usage Example: - * ============== - * Suppose we wish to compute the maximum value in an array of integers. - * - * int test() - * { - * int a[ARRAY_SIZE]; - * int max = INT_MAX; - * - * ... - * - * for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * if (max < a[i]) - * { - * max = a[i]; - * } - * } - * std::cout << "max = " << max << std::endl; - * - * ... - * } - * - * Changing the 'for' to a 'cilk_for' will allow the loop to be run in parallel - * but will create a data race on the variable 'max'. The race can be resolved - * by changing 'max' to a 'reducer_max' hyperobject: - * - * int test() - * { - * int a[ARRAY_SIZE]; - * cilk::reducer_max<int> max(INT_MAX); - * - * ... - * - * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * max->calc_max(a[i]); - * } - * std::cout << "max = " << max->get_value() << std::endl; - * - * ... - * } - * - * A similar loop which calculates both the maximum value and index would be: - * - * int test() - * { - * int a[ARRAY_SIZE]; - * cilk::reducer_max_index<int, int> rmi(INT_MIN, -1); - * - * ... - * - * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * rmi.calc_max(i, a[i]); - * } - * std::cout << "max = " << rmi->get_value() << - * ", index = " << rmi->get_index() << std::endl; - * - * ... - * } - * - * - * Operations provided: - * ==================== - * reducer_max and reducer_max_index provide set and get methods that are - * guaranteed to be deterministic iff they are called prior to the first - * spawn or after the last sync in a parallel algorithm. When called during - * execution, the value returned by get_value (and get_index) may differ from - * run to run depending on how the routine or loop is scheduled. Calling - * set_value anywhere between the first spawn and the last sync may cause the - * algorithm to produce non-deterministic results. - * - * get_value and get_index return imutable values. The matching get_reference - * and get_index_reference methods return modifiable references - * - * The calc_max method is a comparison operation that sets the reducer to the - * larger of itself and the object being compared. The max_of routines are - * provided for convenience: - * - * cilk::reducer_max<int> rm; - * - * ... - * - * rm.calc_max(55); // alternatively: rm = cilk::max_of(rm, 55); - * - * - * Template parameter restrictions: - * ================================ - * reducer_max and reducer_max_index require that the 'Type' template parameter - * be DefaultConstructible. The 'Compare' template parameter must - * implement a strict weak ordering if you want deterministic results. - * - * There are no requirements on the 'Index' template parameter of - * reducer_max_index. All comparisons will be done on the 'Type' value. - * - */ - -#ifndef REDUCER_MAX_H_INCLUDED -#define REDUCER_MAX_H_INCLUDED - -#include <cilk/reducer.h> -#ifdef __cplusplus -# include <cstddef> -# include <functional> -#else -# include <stddef.h> -#endif - -#ifdef __cplusplus - -/* C++ Interface - */ - -namespace cilk { - -// Forward declaration -template <typename Type, typename Compare> class reducer_max; - -namespace internal { - // "PRIVATE" HELPER CLASS - uses the type system to make sure that - // reducer_max instances aren't copied, but we can still allow statements - // like *max = cilk::max_of(*max, a[i]); - template <typename Type, typename Compare> - class temp_max - { - private: - reducer_max<Type,Compare>* m_reducerPtr; - - friend class reducer_max<Type,Compare>; - - // Copyable, not assignable - temp_max& operator=(const temp_max &); - - public: - explicit temp_max(reducer_max<Type,Compare> *reducerPtr); - - temp_max calc_max(const Type& x) const; - }; - - template <typename Type, typename Compare> - inline - temp_max<Type,Compare> - max_of(const temp_max<Type,Compare>& tmp, const Type& x) - { - return tmp.calc_max(x); - } - - template <typename Type, typename Compare> - inline - temp_max<Type,Compare> - max_of(const Type& x, const temp_max<Type,Compare>& tmp) - { - return tmp.calc_max(x); - } - -} // end namespace internal - -/** - * @brief Class 'reducer_max' is a hyperobject representation of a value that - * retains the maximum value of all of the values it sees during its lifetime. - */ -template <typename Type, typename Compare=std::less<Type> > -class reducer_max -{ -public: - /// Type of data in a reducer_max - typedef Type basic_value_type; - -public: - /// Internal representation of the per-strand view of the data for - /// reducer_max - struct View - { - friend class reducer_max<Type,Compare>; - friend class monoid_base<View>; - - public: - /// Constructs a per-strand view instance, initializing it to the - /// identity value. - View(); - - /// Constructs a per-strand view instance, initializing it to the - /// specified value. - View(const Type& v); - - /// Sets this view to the specified value. - void set(const Type &v); - - /// Returns current value for this view - const Type &get_value() const; - - /// Returns true if the value has ever been set - bool is_set() const; - - private: - Type m_value; - bool m_isSet; - }; - -public: - /// Definition of data view, operation, and identity for reducer_max - struct Monoid: monoid_base<View> - { - Compare m_comp; - Monoid() : m_comp() {} - Monoid(const Compare& comp) : m_comp(comp) {} - void take_max(View *left, const Type &v) const; - void reduce(View *left, View *right) const; - }; -private: - // Hyperobject to serve up views - reducer<Monoid> m_imp; - -public: - typedef internal::temp_max<Type,Compare> temp_max; - - friend class internal::temp_max<Type,Compare>; - -public: - /// Construct a 'reducer_max' object with a value of 'Type()'. - reducer_max(); - - /// Construct a 'reducer_max' object with the specified initial value. - explicit reducer_max(const Type& initial_value); - - /// Construct a 'reducer_max' object with the specified initial value and - /// comparator. - reducer_max(const Type& initial_value, const Compare& comp); - - /// Return an immutable reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Type& get_value() const; - - /// Return a reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Type& get_reference(); - - /// Return a reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Type const& get_reference() const; - - /// Returns true if the value has ever been set - bool is_set() const; - - /// Set the value of this object. - /// - /// @warning Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Type& value); - - /// Compare the current value with the one passed and retain the - /// larger of the two. Return this reducer. - reducer_max& calc_max(const Type& value); - - /// Merge the result of a 'max' operation into this object. The - /// operation must involve this hyperobject, i.e., x = max_of(x, 5); - reducer_max& operator=(const temp_max &temp); - - reducer_max& operator*() { return *this; } - reducer_max const& operator*() const { return *this; } - - reducer_max* operator->() { return this; } - reducer_max const* operator->() const { return this; } - -private: - // Not copyable - reducer_max(const reducer_max&); - reducer_max& operator=(const reducer_max&); -}; - -// Global "cilk::max_of" functions - -using internal::max_of; - -template <typename Type, typename Compare> -inline -internal::temp_max<Type,Compare> -max_of(reducer_max<Type,Compare>& r, const Type& x) -{ - return internal::temp_max<Type,Compare>(&r.calc_max(x)); -} - -template <typename Type, typename Compare> -inline -internal::temp_max<Type,Compare> -max_of(const Type& x, reducer_max<Type,Compare>& r) -{ - return internal::temp_max<Type,Compare>(&r.calc_max(x)); -} - -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// -------------------------------- -// template class reducer_max::View -// -------------------------------- - -template<typename Type, typename Compare> -reducer_max<Type,Compare>::View::View() - : m_value() - , m_isSet(false) -{ -} - -template<typename Type, typename Compare> -reducer_max<Type,Compare>::View::View(const Type& v) - : m_value(v) - , m_isSet(true) -{ -} - -template<typename Type, typename Compare> -void reducer_max<Type,Compare>::View::set(const Type &v) -{ - m_value = v; - m_isSet = true; -} - -template<typename Type, typename Compare> -const Type &reducer_max<Type,Compare>::View::get_value() const -{ - return m_value; -} - -template<typename Type, typename Compare> -bool reducer_max<Type,Compare>::View::is_set() const -{ - return m_isSet; -} - -// ------------------------------------------- -// template class reducer_max::Monoid -// ------------------------------------------- - -template<typename Type, typename Compare> -void -reducer_max<Type,Compare>::Monoid::take_max(View *left, const Type &v) const -{ - if (! left->m_isSet || m_comp(left->m_value,v)) - { - left->m_value = v; - left->m_isSet = true; - } -} - -template<typename Type, typename Compare> -void -reducer_max<Type,Compare>::Monoid::reduce(View *left, View *right) const -{ - if (right->m_isSet) - { - // Take the max of the two values - take_max (left, right->m_value); - } -} - -// -------------------------------------------- -// temp_max private helper class implementation -// -------------------------------------------- - -template <typename Type, typename Compare> inline -internal::temp_max<Type,Compare>::temp_max( - reducer_max<Type,Compare> *reducerPtr) - : m_reducerPtr(reducerPtr) -{ -} - -template <typename Type, typename Compare> inline -internal::temp_max<Type,Compare> -internal::temp_max<Type,Compare>::calc_max(const Type& x) const -{ - m_reducerPtr->calc_max(x); - return *this; -} - -// -------------------------- -// template class reducer_max -// -------------------------- - -// Default constructor -template <typename Type, typename Compare> -inline -reducer_max<Type,Compare>::reducer_max() - : m_imp() -{ -} - -template <typename Type, typename Compare> -inline -reducer_max<Type,Compare>::reducer_max(const Type& initial_value) - : m_imp(initial_value) -{ -} - -template <typename Type, typename Compare> -inline -reducer_max<Type,Compare>::reducer_max(const Type& initial_value, - const Compare& comp) - : m_imp(Monoid(comp), initial_value) -{ -} - -template <typename Type, typename Compare> -inline -const Type& reducer_max<Type,Compare>::get_value() const -{ - const View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Type, typename Compare> -inline -Type& reducer_max<Type,Compare>::get_reference() -{ - View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Type, typename Compare> -inline -Type const& reducer_max<Type,Compare>::get_reference() const -{ - View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Type, typename Compare> -inline -bool reducer_max<Type,Compare>::is_set() const -{ - const View &v = m_imp.view(); - - return v.m_isSet; -} - -template <typename Type, typename Compare> -inline -void reducer_max<Type,Compare>::set_value(const Type& value) -{ - View &v = m_imp.view(); - - v.set(value); -} - -template <typename Type, typename Compare> inline -reducer_max<Type,Compare>& -reducer_max<Type,Compare>::calc_max(const Type& value) -{ - View &v = m_imp.view(); - m_imp.monoid().take_max(&v, value); - return *this; -} - -template <typename Type, typename Compare> -reducer_max<Type,Compare>& -reducer_max<Type,Compare>::operator=(const temp_max& temp) -{ - // Noop. Just test that temp is the same as this. - __CILKRTS_ASSERT(this == temp.m_reducerPtr); - return *this; -} - -/* - * @brief Class 'reducer_max_index' is a hyperobject representation of an - * index and corresponding value representing the maximum such pair this - * object has seen. - */ -template <typename Index, typename Value, typename Compare=std::less<Value> > -class reducer_max_index -{ -public: - /// Type of data in a reducer_max - typedef Value basic_value_type; - -public: - /// Internal representation of the per-strand view of the data for - /// reducer_max_index - struct View - { - friend class reducer_max_index<Index, Value, Compare>; - friend class monoid_base<View>; - - public: - /// Constructs a per-strand view instance, initializing it to the - /// identity value. - View(); - - /// Construct a per-strand view instance, initializing it to the - /// specified value and index. - View(const Index &i, const Value &v); - - /// Sets this view to a specified value and index - void set(const Index &i, const Value &v); - - /// Returns current index for this view - const Index &get_index() const; - - /// Returns current value for this view - const Value &get_value() const; - - /// Returns true if the value has ever been set - bool is_set() const; - - private: - Index m_index; - Value m_value; - bool m_isSet; - }; - -public: - /// Definition of data view, operation, and identity for reducer_max_index - struct Monoid: monoid_base<View> - { - Compare m_comp; - Monoid() : m_comp() {} - Monoid(const Compare& comp) : m_comp(comp) {} - void take_max(View *left, const Index &i, const Value &v) const; - void reduce (View *left, View *right) const; - }; - -private: - // Hyperobject to serve up views - reducer<Monoid> m_imp; - -public: - /// Construct a 'reducer_max_index' object with a value of 'Type()'. - reducer_max_index(); - - /// Construct a 'reducer_max_index' object with the specified initial - /// value and index. - reducer_max_index(const Index& initial_index, - const Value& initial_value); - - /// Construct a 'reducer_max_index' object with the specified initial - /// value, index, and comparator. - reducer_max_index(const Index& initial_index, - const Value& initial_value, - const Compare& comp); - - /// Return an immutable reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Value& get_value() const; - - /// Return a reference to the value of this object - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Value& get_reference(); - - /// Return a reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Value const& get_reference() const; - - /// Return an immutable reference to the maximum index. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Index& get_index() const; - - /// Return a mutable reference to the maximum index - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Index& get_index_reference(); - - /// Returns true if the value has ever been set - bool is_set() const; - - /// Set the index/value of this object. - /// - /// @warning Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Index& index, - const Value& value); - - /// Compare the current value with the one passed and retain the - /// larger of the two. Return this reducer. - reducer_max_index& calc_max(const Index& index, const Value& value); - - // DEPRECATED. Use calc_max instead. - void max_of(const Index& index, const Value& value) {calc_max(index,value);} - - reducer_max_index& operator*() { return *this; } - reducer_max_index const& operator*() const { return *this; } - - reducer_max_index* operator->() { return this; } - reducer_max_index const* operator->() const { return this; } - -private: - // Not copyable - reducer_max_index(const reducer_max_index&); - reducer_max_index& operator=(const reducer_max_index&); -}; - -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// -------------------------------- -// template class reducer_max::View -// -------------------------------- - -template<typename Index, typename Value, typename Compare> -reducer_max_index<Index, Value, Compare>::View::View() - : m_index() - , m_value() - , m_isSet(false) -{ -} - -template<typename Index, typename Value, typename Compare> -reducer_max_index<Index, Value, Compare>::View::View(const Index &i, - const Value &v) - : m_index(i) - , m_value(v) - , m_isSet(true) -{ -} - -template<typename Index, typename Value, typename Compare> -void -reducer_max_index<Index, Value, Compare>::View::set(const Index &i, - const Value &v) -{ - m_index = i; - m_value = v; - m_isSet = true; -} - -template<typename Index, typename Value, typename Compare> -const Index & -reducer_max_index<Index, Value, Compare>::View::get_index() const -{ - return m_index; -} - -template<typename Index, typename Value, typename Compare> -const Value & -reducer_max_index<Index, Value, Compare>::View::get_value() const -{ - return m_value; -} - -template<typename Index, typename Value, typename Compare> -bool -reducer_max_index<Index, Value, Compare>::View::is_set() const -{ - return m_isSet; -} - -// ------------------------------------------- -// template class reducer_max::Monoid -// ------------------------------------------- - -template<typename Index, typename Value, typename Compare> -void -reducer_max_index<Index,Value,Compare>::Monoid::take_max(View *left, - const Index &i, - const Value &v) const -{ - if (! left->m_isSet || m_comp(left->m_value,v)) - { - left->m_index = i; - left->m_value = v; - left->m_isSet = true; - } -} - -template<typename Index, typename Value, typename Compare> -void -reducer_max_index<Index, Value, Compare>::Monoid::reduce(View *left, - View *right) const -{ - if (right->m_isSet) - take_max (left, right->m_index, right->m_value); -} - -// -------------------------------- -// template class reducer_max_index -// -------------------------------- - -// Default constructor -template <typename Index, typename Value, typename Compare> -inline -reducer_max_index<Index, Value, Compare>::reducer_max_index() - : m_imp() -{ -} - -template <typename Index, typename Value, typename Compare> -inline -reducer_max_index<Index, Value, Compare>::reducer_max_index( - const Index& initial_index, const Value& initial_value) - : m_imp(initial_index, initial_value) -{ -} - -template <typename Index, typename Value, typename Compare> -inline -reducer_max_index<Index, Value, Compare>::reducer_max_index( - const Index& initial_index, - const Value& initial_value, - const Compare& comp) - : m_imp(Monoid(comp), initial_index, initial_value) -{ -} - -template <typename Index, typename Value, typename Compare> -inline -reducer_max_index<Index, Value, Compare>& -reducer_max_index<Index, Value, Compare>::calc_max(const Index& index, - const Value& value) -{ - View &v = m_imp.view(); - m_imp.monoid().take_max(&v, index, value); - return *this; -} - -template <typename Index, typename Value, typename Compare> -inline -const Value& reducer_max_index<Index, Value, Compare>::get_value() const -{ - const View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Index, typename Value, typename Compare> -inline -Value& reducer_max_index<Index, Value, Compare>::get_reference() -{ - View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Index, typename Value, typename Compare> -inline -Value const& reducer_max_index<Index, Value, Compare>::get_reference() const -{ - const View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Index, typename Value, typename Compare> -inline -const Index& reducer_max_index<Index, Value, Compare>::get_index() const -{ - const View &v = m_imp.view(); - - return v.m_index; -} - -template <typename Index, typename Value, typename Compare> -inline -Index& reducer_max_index<Index, Value, Compare>::get_index_reference() -{ - View &v = m_imp.view(); - - return v.m_index; -} - -template <typename Index, typename Value, typename Compare> -inline -bool reducer_max_index<Index, Value, Compare>::is_set() const -{ - const View &v = m_imp.view(); - - return v.m_isSet; -} - -template <typename Index, typename Value, typename Compare> -inline -void reducer_max_index<Index, Value, Compare>::set_value(const Index& index, - const Value& value) -{ - View &v = m_imp.view(); - - return v.set(index, value); -} - -} // namespace cilk - -#endif // __cplusplus - -/* C Interface - */ - -__CILKRTS_BEGIN_EXTERN_C - -/* REDUCER_MAX */ - -#define CILK_C_REDUCER_MAX_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_,tn) -#define CILK_C_REDUCER_MAX(obj,tn,v) \ - CILK_C_REDUCER_MAX_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_reduce_,tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_identity_,tn), \ - __cilkrts_hyperobject_noop_destroy, v) - -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MAX_INSTANCE(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_,tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn); - -/* CILK_C_REDUCER_MAX_CALC(reducer, v) performs the reducer lookup - * AND calc_max operation, leaving the current view with the max of the - * previous value and v. - */ -#define CILK_C_REDUCER_MAX_CALC(reducer, v) do { \ - _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (*view < __value) { \ - *view = __value; \ - } } while (0) - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_MAX_INSTANCE(char,char); -CILK_C_REDUCER_MAX_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_MAX_INSTANCE(signed char,schar); -CILK_C_REDUCER_MAX_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_MAX_INSTANCE(short,short); -CILK_C_REDUCER_MAX_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_MAX_INSTANCE(int,int); -CILK_C_REDUCER_MAX_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_MAX_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_MAX_INSTANCE(long,long); -CILK_C_REDUCER_MAX_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_MAX_INSTANCE(long long,longlong); -CILK_C_REDUCER_MAX_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_MAX_INSTANCE(float,float); -CILK_C_REDUCER_MAX_INSTANCE(double,double); -CILK_C_REDUCER_MAX_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MAX_IMP(t,tn,id) \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r) \ - { if (*(t*)l < *(t*)r) *(t*)l = *(t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn) \ - { *(t*)v = id; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric types. The contents of reducer_max.c are as follows: - -CILK_C_REDUCER_MAX_IMP(char,char,CHAR_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned char,uchar,0) -CILK_C_REDUCER_MAX_IMP(signed char,schar,SCHAR_MIN) -CILK_C_REDUCER_MAX_IMP(wchar_t,wchar_t,WCHAR_MIN) -CILK_C_REDUCER_MAX_IMP(short,short,SHRT_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned short,ushort,0) -CILK_C_REDUCER_MAX_IMP(int,int,INT_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned int,uint,0) -CILK_C_REDUCER_MAX_IMP(unsigned int,unsigned,0) // alternate name -CILK_C_REDUCER_MAX_IMP(long,long,LONG_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned long,ulong,0) -CILK_C_REDUCER_MAX_IMP(long long,longlong,LLONG_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned long long,ulonglong,0) -CILK_C_REDUCER_MAX_IMP(float,float,-HUGE_VALF) -CILK_C_REDUCER_MAX_IMP(double,double,-HUGE_VAL) -CILK_C_REDUCER_MAX_IMP(long double,longdouble,-HUGE_VALL) - -*/ - -/* REDUCER_MAX_INDEX */ - -#define CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn) \ - typedef struct { \ - __STDNS ptrdiff_t index; \ - t value; \ - } __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) - -#define CILK_C_REDUCER_MAX_INDEX_TYPE(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_,t) -#define CILK_C_REDUCER_MAX_INDEX(obj,t,v) \ - CILK_C_REDUCER_MAX_INDEX_TYPE(t) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_reduce_,t), \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_identity_,t), \ - __cilkrts_hyperobject_noop_destroy, { 0, v }) - -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t,tn) \ - CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn); \ - typedef CILK_C_DECLARE_REDUCER( \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)) \ - __CILKRTS_MKIDENT(cilk_c_reducer_max_index_,tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn); - -/* CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v) performs the reducer lookup - * AND calc_max operation, leaving the current view with the max of the - * previous value and v. - */ -#define CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v) do { \ - _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (view->value < __value) { \ - view->index = (i); \ - view->value = __value; \ - } } while (0) - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_MAX_INDEX_INSTANCE(char,char); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(signed char,schar); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(short,short); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(int,int); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_MAX_INDEX_INSTANCE(long,long); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(long long,longlong); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(float,float); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(double,double); -CILK_C_REDUCER_MAX_INDEX_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MAX_INDEX_IMP(t,tn,id) \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r) \ - { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \ - if (((view_t*)l)->value < ((view_t*)r)->value) \ - *(view_t*)l = *(view_t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn) \ - { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \ - ((view_t*)v)->index = 0; ((view_t*)v)->value = id; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric tyeps. The contents of reducer_max_index.c are as follows: - -CILK_C_REDUCER_MAX_INDEX_IMP(char,char,CHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned char,uchar,0) -CILK_C_REDUCER_MAX_INDEX_IMP(signed char,schar,SCHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(wchar_t,wchar_t,WCHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(short,short,SHRT_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned short,ushort,0) -CILK_C_REDUCER_MAX_INDEX_IMP(int,int,INT_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,uint,0) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,unsigned,0) // alternate name -CILK_C_REDUCER_MAX_INDEX_IMP(long,long,LONG_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long,ulong,0) -CILK_C_REDUCER_MAX_INDEX_IMP(long long,longlong,LLONG_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long long,ulonglong,0) -CILK_C_REDUCER_MAX_INDEX_IMP(float,float,-HUGE_VALF) -CILK_C_REDUCER_MAX_INDEX_IMP(double,double,-HUGE_VAL) -CILK_C_REDUCER_MAX_INDEX_IMP(long double,longdouble,-HUGE_VALL) - -*/ - - -__CILKRTS_END_EXTERN_C - -#endif // defined REDUCER_MAX_H_INCLUDED +/* reducer_max.h -*- C++ -*- + * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/** @file reducer_max.h + * + * @brief Defines classes for doing parallel maximum reductions. + * + * @ingroup ReducersMinMax + * + * @see ReducersMinMax + */ + +#include "reducer_min_max.h" diff --git a/libcilkrts/include/cilk/reducer_min.h b/libcilkrts/include/cilk/reducer_min.h index 22694b101d3..52dea246d9e 100644 --- a/libcilkrts/include/cilk/reducer_min.h +++ b/libcilkrts/include/cilk/reducer_min.h @@ -1,1015 +1,41 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. - * - */ - -/* - * reducer_min.h - * - * Purpose: Reducer hyperobject to retain the min value. - * - * Classes: reducer_min<Type, Compare=std::less<Type> > - * reducer_min_index<Index, Value, Compare=std::less<Type> > - * - * Description: - * ============ - * This component provides reducer-type hyperobject representations that allow - * the minimum value, or the minimum value and an index, of a group of values to - * be determined in parallel. - * - * Usage Example: - * ============== - * Suppose we wish to compute the minimum value in an array of integers. - * - * int test() - * { - * int a[ARRAY_SIZE]; - * int min = INT_MIN; - * - * ... - * - * for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * if (a[i] < min) - * { - * min = a[i]; - * } - * } - * std::cout << "min = " << min << std::endl; - * - * ... - * } - * - * Changing the 'for' to a 'cilk_for' will allow the loop to be run in parallel - * but will create a data race on the variable 'min'. The race can be resolved - * by changing 'min' to a 'reducer_min' hyperobject: - * - * int test() - * { - * int a[ARRAY_SIZE]; - * cilk::reducer_min<int> min(INT_MIN); - * - * ... - * - * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * min->calc_min(a[i]); - * } - * std::cout << "min = " << min->get_value() << std::endl; - * - * ... - * } - * - * A similar loop which calculates both the minimum value and index would be: - * - * int test() - * { - * int a[ARRAY_SIZE]; - * cilk::reducer_min_index<int, int> rmi(INT_MAX, -1); - * - * ... - * - * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) - * { - * rmi->calc_min(i, a[i]); - * } - * std::cout << "min = " << rmi->get_value() << - * ", index = " << rmi->get_index() << std::endl; - * - * ... - * } - * - * - * Operations provided: - * ==================== - * reducer_min and reducer_min_index provide set and get methods that are - * guaranteed to be deterministic iff they are called prior to the first - * spawn or after the last sync in a parallel algorithm. When called during - * execution, the value returned by get_value (and get_index) may differ from - * run to run depending on how the routine or loop is scheduled. Calling - * set_value anywhere between the first spawn and the last sync may cause the - * algorithm to produce non-deterministic results. - * - * get_value and get_index return imutable values. The matching get_reference - * and get_index_reference methods return modifiable references - * - * The calc_min method is a comparison operation that sets the reducer to the - * smaller of itself and the object being compared. The min_of routines are - * provided for convenience: - * - * cilk::reducer_min<int> rm; - * - * ... - * - * rm.calc_min(55); // alternatively: rm = cilk::min_of(rm, 55); - * - * - * Template parameter restrictions: - * ================================ - * reducer_min and reducer_min_index require that the 'Type' template parameter - * be DefaultConstructible. The 'Compare' template parameter must - * implement a strict weak ordering if you want deterministic results. - * - * There are no requirements on the 'Index' template parameter of - * reducer_min_index. All comparisons will be done on the 'Type' value. - * - */ - -#ifndef REDUCER_MIN_H_INCLUDED -#define REDUCER_MIN_H_INCLUDED - -#include <cilk/reducer.h> -#ifdef __cplusplus -# include <cstddef> -# include <functional> -#else -# include <stddef.h> -#endif - -#ifdef __cplusplus - -/* C++ Interface - */ - -namespace cilk { - -// Forward declaration -template <typename Type, typename Compare> class reducer_min; - -namespace internal { - // "PRIVATE" HELPER CLASS - uses the type system to make sure that - // reducer_max instances aren't copied, but we can still allow statements - // like *min = cilk::min_of(*min, a[i]); - template <typename Type, typename Compare> - class temp_min - { - private: - reducer_min<Type,Compare>* m_reducerPtr; - - friend class reducer_min<Type,Compare>; - - // Copyable, not assignable - temp_min& operator=(const temp_min &); - - public: - explicit temp_min(reducer_min<Type,Compare> *reducerPtr); - - temp_min calc_min(const Type& x) const; - }; - - template <typename Type, typename Compare> - inline - temp_min<Type,Compare> - min_of(const internal::temp_min<Type,Compare>& tmp, const Type& x) - { - return tmp.calc_min(x); - } - - template <typename Type, typename Compare> - inline - temp_min<Type,Compare> - min_of(const Type& x, const internal::temp_min<Type,Compare>& tmp) - { - return tmp.calc_min(x); - } - -} // end namespace internal - -/** - * @brief Class 'reducer_min' is a hyperobject representation of a value that - * retains the minimum value of all of the values it sees during its lifetime. - */ -template <typename Type, typename Compare=std::less<Type> > -class reducer_min -{ -public: - /// Type of data in a reducer_min - typedef Type basic_value_type; - -public: - /// Internal representation of the per-strand view of the data for - /// reducer_min - struct View - { - friend class reducer_min<Type,Compare>; - friend class monoid_base<View>; - - public: - /// Constructs a per-strand view instance, initializing it to the - /// identity value. - View(); - - /// Constructs a per-strand view instance, initializing it to the - /// specified value. - View(const Type& v); - - /// Sets this view to the specified value. - void set(const Type &v); - - /// Returns current value for this view - const Type &get_value() const; - /// Returns true if the value has ever been set - bool is_set() const; - - private: - Type m_value; - bool m_isSet; - }; - -public: - /// Definition of data view, operation, and identity for reducer_min - struct Monoid: monoid_base<View> - { - Compare m_comp; - Monoid() : m_comp() {} - Monoid(const Compare& comp) : m_comp(comp) {} - void take_min(View *left, const Type &v) const; - void reduce(View *left, View *right) const; - }; -private: - // Hyperobject to serve up views - reducer<Monoid> m_imp; - -public: - typedef internal::temp_min<Type,Compare> temp_min; - - friend class internal::temp_min<Type,Compare>; - -public: - /// Construct a 'reducer_min' object with a value of 'Type()'. - reducer_min(); - - /// Construct a 'reducer_min' object with the specified initial value. - explicit reducer_min(const Type& initial_value); - - /// Construct a 'reducer_min' object with the specified initial value and - /// comparator. - reducer_min(const Type& initial_value, const Compare& comp); - - /// Return an immutable reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Type& get_value() const; - - /// Return a reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Type& get_reference(); - - /// Return a reference to the value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Type const& get_reference() const; - - /// Returns true if the value has ever been set - bool is_set() const; - - /// Set the value of this object. - /// - /// @warning Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Type& value); - - /// Compare the current value with the one passed and retain the - /// smaller of the two. Return this reducer. - reducer_min& calc_min(const Type& value); - - /// Merge the result of a 'min' operation into this object. The - /// operation must involve this hyperobject, i.e., x = min_of(x, 5); - reducer_min& operator=(const temp_min &temp); - - reducer_min& operator*() { return *this; } - reducer_min const& operator*() const { return *this; } - - reducer_min* operator->() { return this; } - reducer_min const* operator->() const { return this; } - -private: - // Not copyable - reducer_min(const reducer_min&); - reducer_min& operator=(const reducer_min&); -}; - -// Global "cilk::min_of" functions - -using internal::min_of; - -template <typename Type, typename Compare> -inline -internal::temp_min<Type,Compare> -min_of(reducer_min<Type,Compare>& r, const Type& x) -{ - return internal::temp_min<Type,Compare>(&r.calc_min(x)); -} - -template <typename Type, typename Compare> -inline -internal::temp_min<Type,Compare> -min_of(const Type& x, reducer_min<Type,Compare>& r) -{ - return internal::temp_min<Type,Compare>(&r.calc_min(x)); -} - -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// -------------------------------- -// template class reducer_min::View -// -------------------------------- - -template<typename Type, typename Compare> -reducer_min<Type,Compare>::View::View() - : m_value() - , m_isSet(false) -{ -} - -template<typename Type, typename Compare> -reducer_min<Type,Compare>::View::View(const Type& v) - : m_value(v) - , m_isSet(true) -{ -} - -template<typename Type, typename Compare> -void reducer_min<Type,Compare>::View::set(const Type &v) -{ - m_value = v; - m_isSet = true; -} - -template<typename Type, typename Compare> -const Type &reducer_min<Type,Compare>::View::get_value() const -{ - return m_value; -} - -template<typename Type, typename Compare> -bool reducer_min<Type,Compare>::View::is_set() const -{ - return m_isSet; -} - -// ------------------------------------------- -// template class reducer_min::Monoid -// ------------------------------------------- - -template<typename Type, typename Compare> -void -reducer_min<Type,Compare>::Monoid::take_min(View *left, const Type &v) const -{ - if (! left->m_isSet || m_comp(v,left->m_value)) - { - left->m_value = v; - left->m_isSet = true; - } -} - -template<typename Type, typename Compare> -void -reducer_min<Type,Compare>::Monoid::reduce(View *left, View *right) const -{ - if (right->m_isSet) - { - // Take the min of the two values - take_min (left, right->m_value); - } -} - -// ----------------------------------- -// temp_min private helper class implementation -// ----------------------------------- - -template <typename Type, typename Compare> inline -internal::temp_min<Type,Compare>::temp_min( - reducer_min<Type,Compare> *reducerPtr) - : m_reducerPtr(reducerPtr) -{ -} - -template <typename Type, typename Compare> inline -internal::temp_min<Type,Compare> -internal::temp_min<Type,Compare>::calc_min(const Type& x) const -{ - m_reducerPtr->calc_min(x); - return *this; -} - -// -------------------------- -// template class reducer_min -// -------------------------- - -// Default constructor -template <typename Type, typename Compare> -inline -reducer_min<Type,Compare>::reducer_min() - : m_imp() -{ -} - -template <typename Type, typename Compare> -inline -reducer_min<Type,Compare>::reducer_min(const Type& initial_value) - : m_imp(initial_value) -{ -} - -template <typename Type, typename Compare> -inline -reducer_min<Type,Compare>::reducer_min(const Type& initial_value, - const Compare& comp) - : m_imp(Monoid(comp), initial_value) -{ -} - -template <typename Type, typename Compare> -inline -const Type& reducer_min<Type,Compare>::get_value() const -{ - const View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Type, typename Compare> -inline -Type& reducer_min<Type,Compare>::get_reference() -{ - View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Type, typename Compare> -inline -Type const& reducer_min<Type,Compare>::get_reference() const -{ - View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Type, typename Compare> -inline -bool reducer_min<Type,Compare>::is_set() const -{ - const View &v = m_imp.view(); - - return v.m_isSet; -} - -template <typename Type, typename Compare> -inline -void reducer_min<Type,Compare>::set_value(const Type& value) -{ - View &v = m_imp.view(); - - v.set(value); -} - -template <typename Type, typename Compare> inline -reducer_min<Type,Compare>& -reducer_min<Type,Compare>::calc_min(const Type& value) -{ - View &v = m_imp.view(); - - // If no previous value has been set, always set the value - - m_imp.monoid().take_min(&v, value); - - return *this; -} - -template <typename Type, typename Compare> -reducer_min<Type,Compare>& -reducer_min<Type,Compare>::operator=(const temp_min& temp) -{ - // Noop. Just test that temp is the same as this. - __CILKRTS_ASSERT(this == temp.m_reducerPtr); - return *this; -} - - -/** - * @brief Class 'reducer_min_index' is a hyperobject representation of an - * index and corresponding value representing the minimum such pair this - * object has seen. - */ -template <typename Index, typename Value, typename Compare=std::less<Value> > -class reducer_min_index -{ -public: - /// Type of data in a reducer_min - typedef Value basic_value_type; - -public: - /// Internal representation of the per-strand view of the data for - /// reducer_min_index - struct View - { - friend class reducer_min_index<Index, Value, Compare>; - friend class monoid_base<View>; - - public: - /// Constructs a per-strand view instance, initializing it to the - /// identity value. - View(); - - /// Constructs a per-strand view instance, initializing it to the - /// specified value and index. - View(const Index &i, const Value &v); - - /// Sets this view to a specified value and index - void set(const Index &i, const Value &v); - - /// Returns current index for this view - const Index &get_index() const; - /// Returns current value for this view - const Value &get_value() const; - /// Returns true if the value has ever been set - bool is_set() const; - - private: - Index m_index; - Value m_value; - bool m_isSet; - }; - -public: - /// Definition of data view, operation, and identity for reducer_min_index - struct Monoid: monoid_base<View> - { - Compare m_comp; - Monoid() : m_comp() {} - Monoid(const Compare& comp) : m_comp(comp) {} - void take_min(View *left, const Index &i, const Value &v) const; - void reduce (View *left, View *right) const; - }; - -private: - // Hyperobject to serve up views - reducer<Monoid> m_imp; - -public: - /// Construct a 'reducer_min_index' object with a value of 'Type()'. - reducer_min_index(); - - /// Construct a 'reducer_min_index' object with the specified initial - /// value and index. - reducer_min_index(const Index& initial_index, - const Value& initial_value); - - /// Construct a 'reducer_min_index' object with the specified initial - /// value, index, and comparator. - reducer_min_index(const Index& initial_index, - const Value& initial_value, - const Compare& comp); - - /// Return an imutable reference to the value of this object. - const Value& get_value() const; - - /// Return a reference to the value of this object - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Value& get_reference(); - - /// Return a reference to the value of this object - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Value const& get_reference() const; - - /// Return an immutable reference to the minimum index. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Index& get_index() const; - - /// Return a mutable reference to the minimum index - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - Index& get_index_reference(); - - /// Returns true if the value has ever been set - bool is_set() const; - - /// Set the index/value of this object. - /// - /// @warning Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Index& index, - const Value& value); - - /// Compare the current value with the one passed and retain the - /// smaller of the two. - void calc_min(const Index& index, const Value& value); - - // DEPRECATED. Use calc_min instead. - void min_of(const Index& index, const Value& value) - { calc_min(index,value); } - - reducer_min_index& operator*() { return *this; } - reducer_min_index const& operator*() const { return *this; } - - reducer_min_index* operator->() { return this; } - reducer_min_index const* operator->() const { return this; } - -private: - // Not copyable - reducer_min_index(const reducer_min_index&); - reducer_min_index& operator=(const reducer_min_index&); -}; - -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// -------------------------------- -// template class reducer_min::View -// -------------------------------- - -template<typename Index, typename Value, typename Compare> -reducer_min_index<Index, Value, Compare>::View::View() - : m_index() - , m_value() - , m_isSet(false) -{ -} - -template<typename Index, typename Value, typename Compare> -reducer_min_index<Index, Value, Compare>::View::View(const Index &i, - const Value &v) - : m_index(i) - , m_value(v) - , m_isSet(true) -{ -} - -template<typename Index, typename Value, typename Compare> -void -reducer_min_index<Index, Value, Compare>::View::set(const Index &i, - const Value &v) -{ - m_index = i; - m_value = v; - m_isSet = true; -} - -template<typename Index, typename Value, typename Compare> -const Index & -reducer_min_index<Index, Value, Compare>::View::get_index() const -{ - return m_index; -} - -template<typename Index, typename Value, typename Compare> -const Value & -reducer_min_index<Index, Value, Compare>::View::get_value() const -{ - return m_value; -} - -template<typename Index, typename Value, typename Compare> -bool -reducer_min_index<Index, Value, Compare>::View::is_set() const -{ - return m_isSet; -} - -// ------------------------------------------- -// template class reducer_min::Monoid -// ------------------------------------------- - -template<typename Index, typename Value, typename Compare> -void -reducer_min_index<Index,Value,Compare>::Monoid::take_min(View *left, - const Index &i, - const Value &v) const -{ - if (! left->m_isSet || m_comp(v,left->m_value )) - { - left->m_index = i; - left->m_value = v; - left->m_isSet = true; - } -} - -template<typename Index, typename Value, typename Compare> -void -reducer_min_index<Index, Value, Compare>::Monoid::reduce(View *left, - View *right) const -{ - if (right->m_isSet) - take_min (left, right->m_index, right->m_value); -} - -// -------------------------------- -// template class reducer_min_index -// -------------------------------- - -// Default constructor -template <typename Index, typename Value, typename Compare> -inline -reducer_min_index<Index, Value, Compare>::reducer_min_index() - : m_imp() -{ -} - -template <typename Index, typename Value, typename Compare> -inline -reducer_min_index<Index, Value, Compare>::reducer_min_index( - const Index& initial_index, const Value& initial_value) - : m_imp(initial_index, initial_value) -{ -} - -template <typename Index, typename Value, typename Compare> -inline -reducer_min_index<Index, Value, Compare>::reducer_min_index( - const Index& initial_index, - const Value& initial_value, - const Compare& comp) - : m_imp(Monoid(comp), initial_index, initial_value) -{ -} - -template <typename Index, typename Value, typename Compare> -inline -void reducer_min_index<Index, Value, Compare>::calc_min(const Index& index, - const Value& value) -{ - View &v = m_imp.view(); - - m_imp.monoid().take_min(&v, index, value); -} - -template <typename Index, typename Value, typename Compare> -inline -const Value& reducer_min_index<Index, Value, Compare>::get_value() const -{ - const View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Index, typename Value, typename Compare> -inline -Value& reducer_min_index<Index, Value, Compare>::get_reference() -{ - View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Index, typename Value, typename Compare> -inline -Value const& reducer_min_index<Index, Value, Compare>::get_reference() const -{ - const View &v = m_imp.view(); - - return v.m_value; -} - -template <typename Index, typename Value, typename Compare> -inline -const Index& reducer_min_index<Index, Value, Compare>::get_index() const -{ - const View &v = m_imp.view(); - - return v.m_index; -} - -template <typename Index, typename Value, typename Compare> -inline -Index& reducer_min_index<Index, Value, Compare>::get_index_reference() -{ - View &v = m_imp.view(); - - return v.m_index; -} - -template <typename Index, typename Value, typename Compare> -inline -bool reducer_min_index<Index, Value, Compare>::is_set() const -{ - const View &v = m_imp.view(); - - return v.m_isSet; -} - -template <typename Index, typename Value, typename Compare> -inline -void reducer_min_index<Index, Value, Compare>::set_value(const Index& index, - const Value& value) -{ - View &v = m_imp.view(); - - return v.set(index, value); -} - -} // namespace cilk - -#endif // __cplusplus - -/* C Interface - */ - -__CILKRTS_BEGIN_EXTERN_C - -/* REDUCER_MIN */ - -#define CILK_C_REDUCER_MIN_TYPE(tn) \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_,tn) -#define CILK_C_REDUCER_MIN(obj,tn,v) \ - CILK_C_REDUCER_MIN_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_reduce_,tn), \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_identity_,tn), \ - __cilkrts_hyperobject_noop_destroy, v) - -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MIN_INSTANCE(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_,tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn); - -/* CILK_C_REDUCER_MIN_CALC(reducer, v) performs the reducer lookup - * AND calc_min operation, leaving the current view with the min of the - * previous value and v. - */ -#define CILK_C_REDUCER_MIN_CALC(reducer, v) do { \ - _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (*view > __value) { \ - *view = __value; \ - } } while (0) - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_MIN_INSTANCE(char,char); -CILK_C_REDUCER_MIN_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_MIN_INSTANCE(signed char,schar); -CILK_C_REDUCER_MIN_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_MIN_INSTANCE(short,short); -CILK_C_REDUCER_MIN_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_MIN_INSTANCE(int,int); -CILK_C_REDUCER_MIN_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_MIN_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_MIN_INSTANCE(long,long); -CILK_C_REDUCER_MIN_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_MIN_INSTANCE(long long,longlong); -CILK_C_REDUCER_MIN_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_MIN_INSTANCE(float,float); -CILK_C_REDUCER_MIN_INSTANCE(double,double); -CILK_C_REDUCER_MIN_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MIN_IMP(t,tn,id) \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r) \ - { if (*(t*)l > *(t*)r) *(t*)l = *(t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn) \ - { *(t*)v = id; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric tyeps. The contents of reducer_min.c are as follows: - -CILK_C_REDUCER_MIN_IMP(char,char,CHAR_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned char,uchar,CHAR_MIN) -CILK_C_REDUCER_MIN_IMP(signed char,schar,SCHAR_MAX) -CILK_C_REDUCER_MIN_IMP(wchar_t,wchar_t,WCHAR_MAX) -CILK_C_REDUCER_MIN_IMP(short,short,SHRT_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned short,ushort,USHRT_MAX) -CILK_C_REDUCER_MIN_IMP(int,int,INT_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned int,uint,UINT_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned int,unsigned,UINT_MAX) // alternate name -CILK_C_REDUCER_MIN_IMP(long,long,LONG_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned long,ulong,ULONG_MAX) -CILK_C_REDUCER_MIN_IMP(long long,longlong,LLONG_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned long long,ulonglong,ULLONG_MAX) -CILK_C_REDUCER_MIN_IMP(float,float,HUGE_VALF) -CILK_C_REDUCER_MIN_IMP(double,double,HUGE_VAL) -CILK_C_REDUCER_MIN_IMP(long double,longdouble,HUGE_VALL) - -*/ - -/* REDUCER_MIN_INDEX */ - -#define CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn) \ - typedef struct { \ - __STDNS ptrdiff_t index; \ - t value; \ - } __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) - -#define CILK_C_REDUCER_MIN_INDEX_TYPE(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_,t) -#define CILK_C_REDUCER_MIN_INDEX(obj,t,v) \ - CILK_C_REDUCER_MIN_INDEX_TYPE(t) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_reduce_,t), \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_identity_,t), \ - __cilkrts_hyperobject_noop_destroy, { 0, v }) - -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t,tn) \ - CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn); \ - typedef CILK_C_DECLARE_REDUCER( \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)) \ - __CILKRTS_MKIDENT(cilk_c_reducer_min_index_,tn); \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn); - -/* CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v) performs the reducer lookup - * AND calc_min operation, leaving the current view with the min of the - * previous value and v. - */ -#define CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v) do { \ - _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ - _Typeof(v) __value = (v); \ - if (view->value > __value) { \ - view->index = (i); \ - view->value = __value; \ - } } while (0) - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_MIN_INDEX_INSTANCE(char,char); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(signed char,schar); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(short,short); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(int,int); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_MIN_INDEX_INSTANCE(long,long); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(long long,longlong); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(float,float); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(double,double); -CILK_C_REDUCER_MIN_INDEX_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_MIN_INDEX_IMP(t,tn,id) \ - __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r) \ - { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \ - if (((view_t*)l)->value > ((view_t*)r)->value) \ - *(view_t*)l = *(view_t*)r; } \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn) \ - { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \ - ((view_t*)v)->index = 0; ((view_t*)v)->value = id; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric tyeps. The contents of reducer_min_index.c are as follows: - -CILK_C_REDUCER_MIN_INDEX_IMP(char,char,CHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned char,uchar,CHAR_MIN) -CILK_C_REDUCER_MIN_INDEX_IMP(signed char,schar,SCHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(wchar_t,wchar_t,WCHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(short,short,SHRT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned short,ushort,USHRT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(int,int,INT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,uint,UINT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,unsigned,UINT_MAX) // alternate name -CILK_C_REDUCER_MIN_INDEX_IMP(long,long,LONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long,ulong,ULONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(long long,longlong,LLONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long long,ulonglong,ULLONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(float,float,HUGE_VALF) -CILK_C_REDUCER_MIN_INDEX_IMP(double,double,HUGE_VAL) -CILK_C_REDUCER_MIN_INDEX_IMP(long double,longdouble,HUGE_VALL) - -*/ - - -__CILKRTS_END_EXTERN_C - -#endif // defined REDUCER_MIN_H_INCLUDED +/* reducer_min.h -*- C++ -*- + * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/** @file reducer_min.h + * + * @brief Defines classes for doing parallel minimum reductions. + * + * @ingroup ReducersMinMax + * + * @see ReducersMinMax + */ + +#include "reducer_min_max.h" diff --git a/libcilkrts/include/cilk/reducer_min_max.h b/libcilkrts/include/cilk/reducer_min_max.h new file mode 100644 index 00000000000..3348974daf4 --- /dev/null +++ b/libcilkrts/include/cilk/reducer_min_max.h @@ -0,0 +1,3601 @@ +/* reducer_min_max.h -*- C++ -*- + * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/** @file reducer_min_max.h + * + * @brief Defines classes for doing parallel minimum and maximum reductions. + * + * @ingroup ReducersMinMax + * + * @see ReducersMinMax + */ + +#ifndef REDUCER_MIN_MAX_H_INCLUDED +#define REDUCER_MIN_MAX_H_INCLUDED + +#include <cilk/reducer.h> + +#ifdef __cplusplus + +#include <algorithm> +#include <limits> + +/** @defgroup ReducersMinMax Minimum and Maximum Reducers + * + * Minimum and maximum reducers allow the computation of the minimum or + * maximum of a set of values in parallel. + * + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file `reducers.md`, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redminmax_usage Usage Examples + * + * cilk::reducer< cilk::op_max<int> > rm; + * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) + * { + * rm->calc_max(a[i]); // or *rm = cilk::max_of(*max, a[i]) + * } + * std::cout << "maximum value is " << rm.get_value() << std::endl; + * + * and + * + * cilk::reducer< cilk::op_min_index<int, double> > rmi; + * cilk_for (int i = 0; i < ARRAY_SIZE; ++i) + * { + * rmi->calc_min(i, a[i]) // or *rmi = cilk::min_of(*rmi, i, a[i]); + * } + * std::cout << "minimum value a[" << rmi.get_value().first << "] = " + * << rmi.get_value().second << std::endl; + * + * @section redminmax_monoid The Monoid + * + * @subsection redminmax_monoid_values Value Set + * + * The value set of a minimum or maximum reducer is the set of values of + * `Type`, possibly augmented with a special identity value which is greater + * than (less than) any value of `Type`. + * + * @subsection redminmax_monoid_operator Operator + * + * In the most common case, the operator of a minimum reducer is defined as + * + * x MIN y == (x < y) ? x : y + * + * Thus, `a1 MIN a2 MIN … an` is the first `ai` which is not greater than any + * other `ai`. + * + * The operator of a maximum reducer is defined as + * + * x MAX y == (x > y) ? x : y + * + * Thus, `a1 MAX a2 MAX … an` is the first `ai` which is not less than any + * other `ai`. + * + * @subsection redminmax_monoid_comparators Comparators + * + * Min/max reducers are not limited to finding the minimum or maximum value + * determined by the `<` or `>` operator. In fact, all min/max reducers use a + * _comparator_, which is either a function or an object of a function class + * that defines a [strict weak ordering] + * (http://en.wikipedia.org/wiki/Strict_weak_ordering#Strict_weak_orderings) + * on a set of values. (This is exactly the same as the requirement for the + * comparison predicate for STL associative containers and sorting + * algorithms.) + * + * Just as with STL algorithms and containers, the comparator type parameter + * for min/max reducers is optional. If it is omitted, it defaults to + * `std::less`, which gives the behavior described in the previous section. + * Using non-default comparators (anything other than `std::less`) with + * min/max reducers is just like using them with STL containers and + * algorithms. + * + * Taking comparator objects into account, the reduction operation `MIN` for a + * minimum reducer is defined as + * + * x MIN y == compare(x, y) ? x : y + * + * where `compare()` is the reducer’s comparator. Similarly, the reduction + * operation MAX for a maximum reducer is defined as + * + * x MAX y == compare(y, x) ? x : y + * + * (If `compare(x, y) == x < y`, then `compare(y, x) == x > y`.) + * + * @subsection redminmax_monoid_identity Identity + * + * The identity value of the reducer is the value which is greater than (less + * than) any other value in the value set of the reducer. This is the + * [“special identity value”](#redminmax_monoid_values) if the reducer has + * one, or the largest (smallest) value in the value set otherwise. + * + * @section redminmax_index Value and Index Reducers + * + * Min/max reducers come in two families. The _value_ reducers, using `op_min` + * and `op_max` monoids, simply find the smallest or largest value from a set + * of values. The _index_ reducers, using `op_min_index` and `op_max_index` + * monoids, also record an index value associated with the first occurrence of + * the smallest or largest value. + * + * In the `%op_min_index` usage example [above](#redminmax_usage), the values + * are taken from an array, and the index of a value is the index of the array + * element it comes from. More generally, though, an index can be any sort of + * key which identifies a particular value in a collection of values. For + * example, if the values were taken from the nodes of a tree, then the + * “index” of a value might be a pointer to the node containing that value. + * + * A min/max index reducer is essentially the same as a min/max value reducer + * whose value type is an (index, value) pair, and whose comparator ignores + * the index part of the pair. (index, value) pairs are represented by + * `std::pair<Index, Type>` objects. This has the consequence that wherever + * the interface of a min/max value reducer has a `Type`, the interface of the + * corresponding min/max index reducer has a `std::pair<Index, Type>`. (There + * are convenience variants of the `reducer(Type)` constructor and the + * `calc_min()`, `calc_max()`, `%min_of()`, and `%max_of()` functions that + * take an index argument and a value argument instead of an index/value + * pair.) + * + * @section redminmax_operations Operations + * + * @subsection redminmax_constructors Constructors + * + * @subsubsection redminmax_constructors_value Min/Max Value Reducers + * + * reducer() // identity + * reducer(const Compare& compare) // identity + * reducer(const Type& value) + * reducer(move_in(Type& variable)) + * reducer(const Type& value, const Compare& compare) + * reducer(move_in(Type& variable), const Compare& compare) + * + * @subsubsection redminmax_constructors_index Min/Max Index Reducers + * + * reducer() // identity + * reducer(const Compare& compare) // identity + * reducer(const std::pair<Index, Type>& pair) + * reducer(const Index& index, const Type& value) + * reducer(move_in(std::pair<Index, Type>& variable)) + * reducer(const std::pair<Index, Type>& pair, const Compare& compare) + * reducer(const Index& index, const Type& value, const Compare& compare) + * reducer(move_in(std::pair<Index, Type>& variable), const Compare& compare) + * + * @subsection redminmax_get_set Set and Get + * + * r.set_value(const Type& value) + * Type = r.get_value() const + * r.move_in(Type& variable) + * r.move_out(Type& variable) + * + * Note that for an index reducer, the `Type` in these operations is actually a + * `std::pair<Index, Type>`. (See @ref redminmax_index.) There is _not_ a + * `set_value(value, index)` operation. + * + * @subsection redminmax_initial Initial Values and is_set() + * + * A minimum or maximum reducer without a specified initial value, before any + * MIN or MAX operation has been performed on it, represents the [identity + * value](#redminmax_monoid_identity) of its monoid. For value reducers with a + * numeric type and default comparator (`std::less`), this will be a well + * defined value. For example, + * + * reducer< op_max<unsigned> > r1; + * // r1.get_value() == 0 + * + * reducer< op_min<float> > r2; + * // r2.get_value() == std::numeric_limits<float>::infinity + * + * In other cases, though (index reducers, non-numeric types, or non-default + * comparators), the actual identity value for the monoid may be unknown, or + * it may not even be a value of the reducer’s type. For example, there is no + * “largest string” to serve as the initial value for a + * `reducer< op_min<std::string> >`. In these cases, the result of calling + * `get_value()` is undefined. + * + * To avoid calling `get_value()` when its result is undefined, you can call + * the view’s `is_set()` function, which will return true if the reducer + * has a well-defined value — either because a MIN or MAX operation has been + * performed, or because it had a well-defined initial value: + * + * reducer< op_max<unsigned> > r1; + * // r1->is_set() == true + * // r1.get_value() == 0 + * + * reducer< op_min<std::string> > r2; + * // r2->is_set() == false + * // r2.get_value() is undefined + * r2->calc_min("xyzzy"); + * // r2->is_set() == true + * // r2.get_value() == "xyzzy" + * + * > Note: For an index reducer without a specified initial value, the + * > initial value of the index is the default value of the `Index` type. + * + * @subsection redminmax_view_ops View Operations + * + * The basic reduction operation is `x = x MIN a` for a minimum reducer, or + * `x = x MAX a` for a maximum reducer. The basic syntax for these operations + * uses the `calc_min()` and `calc_max()` member functions of the view class. + * An assignment syntax is also provided, using the %cilk::min_of() and + * %cilk::max_of() global functions: + * + * Class | Modifier | Assignment + * ---------------|---------------------|----------- + * `op_min` | `r->calc_min(x)` | `*r = min_of(*r, x)` or `*r = min_of(x, *r)` + * `op_max` | `r->calc_max(x)` | `*r = max_of(*r, x)` or `*r = max_of(x, *r)` + * `op_min_index` | `r->calc_min(i, x)` | `*r = min_of(*r, i, x)` or `*r = min_of(i, x, *r)` + * `op_max_index` | `r->calc_max(i, x)` | `*r = max_of(*r, i, x)` or `*r = max_of(i, x, *r)` + * + * Wherever an “`i`, `x`” argument pair is shown in the table above, a single + * pair argument may be passed instead. For example: + * + * Index index; + * Type value; + * std::pair<Index, Type> ind_val(index, value); + * // The following statements are all equivalent. + * r->calc_min(index, value); + * r->calc_min(ind_val); + * *r = min_of(*r, index, value); + * *r = min_of(*r, ind_val); + * + * The `calc_min()` and `calc_max()` member functions return a reference to + * the view, so they can be chained: + * + * r->calc_max(x).calc_max(y).calc_max(z); + * + * In a `%min_of()` or `%max_of()` assignment, the view on the left-hand side + * of the assignment must be the same as the view argument in the call. + * Otherwise, the behavior is undefined (but an assertion error will occur if + * the code is compiled with debugging enabled). + * + * *r = max_of(*r, x); // OK + * *r1 = max_of(*r2, y); // ERROR + * + * `%min_of()` and `%max_of()` calls can be nested: + * + * *r = max_of(max_of(max_of(*r, x), y), z); + * *r = min_of(i, a[i], min_of(j, a[j], min_of(k, a[k], *r))); + * + * @section redminmax_compatibility Compatibility Issues + * + * Most Cilk library reducers provide + * * Binary compatibility between `reducer_KIND` reducers compiled with Cilk + * library version 0.9 (distributed with Intel® C++ Composer XE version + * 13.0 and earlier) and the same reducers compiled with Cilk library + * version 1.0 and later. + * * Transparent casting between references to `reducer<op_KIND>` and + * `reducer_KIND`. + * + * This compatibility is not available in all cases for min/max reducers. + * There are two areas of incompatibility. + * + * @subsection redminmax_compatibility_stateful Non-empty Comparators + * + * There is no way to provide binary compatibility between the 0.9 and 1.0 + * definitions of min/max reducers that use a non-empty comparator class or a + * comparator function. (Empty comparator classes like `std::less` are not a + * problem.) + * + * To avoid run-time surprises, the legacy `reducer_{min|max}[_index]` classes + * have been coded in the 1.0 library so that they will not even compile when + * instantiated with a non-empty comparator class. + * + * @subsection redminmax_compatibility_optimized Numeric Optimization + * + * Min/max reducers with a numeric value type and the default comparator can + * be implemented slightly more efficiently than other min/max reducers. + * However, the optimization is incompatible with the 0.9 library + * implementation of min/max reducers. + * + * The default min/max reducers implementation in the 1.0 library uses this + * numeric optimization. Code using legacy reducers compiled with the 1.0 + * library can be safely used in the same program as code compiled with the + * 0.9 library, but classes compiled with the different Cilk libraries will be + * defined in different namespaces. + * + * The simplest solution is just to recompile the code that was compiled with + * the older version of Cilk. However, if this is impossible, you can define + * the `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro (on the compiler command line, + * or in your source code before including `reducer_min_max.h`) when compiling + * with the new library. This will cause it to generate numeric reducers that + * will be less efficient, but will be fully compatible with previously + * compiled code. (Note that this macro has no effect on [the non-empty + * comparator incompatibility] (redminmax_compatibility_stateful).) + * + * @section redminmax_types Type Requirements + * + * `Type` and `Index` must be `Copy Constructible`, `Default Constructible`, + * and `Assignable`. + * + * `Compare` must be `Copy Constructible` if the reducer is constructed with a + * `compare` argument, and `Default Constructible` otherwise. + * + * The `Compare` function must induce a strict weak ordering on the elements + * of `Type`. + * + * @section redminmax_in_c Minimum and Maximum Reducers in C + * + * These macros can be used to do minimum and maximum reductions in C: + * + * Declaration | Type | Operation + * -----------------------------|-----------------------------------|---------- + * @ref CILK_C_REDUCER_MIN |@ref CILK_C_REDUCER_MIN_TYPE |@ref CILK_C_REDUCER_MIN_CALC + * @ref CILK_C_REDUCER_MAX |@ref CILK_C_REDUCER_MAX_TYPE |@ref CILK_C_REDUCER_MAX_CALC + * @ref CILK_C_REDUCER_MIN_INDEX |@ref CILK_C_REDUCER_MIN_INDEX_TYPE |@ref CILK_C_REDUCER_MIN_INDEX_CALC + * @ref CILK_C_REDUCER_MAX_INDEX |@ref CILK_C_REDUCER_MAX_INDEX_TYPE |@ref CILK_C_REDUCER_MAX_INDEX_CALC + * + * For example: + * + * CILK_C_REDUCER_MIN(r, int, INT_MAX); + * CILK_C_REGISTER_REDUCER(r); + * cilk_for(int i = 0; i != n; ++i) { + * CILK_C_REDUCER_MIN_CALC(r, a[i]); + * } + * CILK_C_UNREGISTER_REDUCER(r); + * printf("The smallest value in a is %d\n", REDUCER_VIEW(r)); + * + * + * CILK_C_REDUCER_MAX_INDEX(r, uint, 0); + * CILK_C_REGISTER_REDUCER(r); + * cilk_for(int i = 0; i != n; ++i) { + * CILK_C_REDUCER_MAX_INDEX_CALC(r, i, a[i]); + * } + * CILK_C_UNREGISTER_REDUCER(r); + * printf("The largest value in a is %u at %d\n", + * REDUCER_VIEW (r).value, REDUCER_VIEW(r).index); + * + * See @ref reducers_c_predefined. + */ + +namespace cilk { + +/** @defgroup ReducersMinMaxBinComp Binary compatibility + * + * If the macro CILK_LIBRARY_0_9_REDUCER_MINMAX is defined, then we generate + * reducer code and data structures which are binary-compatible with code that + * was compiled with the old min/max wrapper definitions, so we want the + * mangled names of the legacy min/max reducer wrapper classes to be the + * same as the names produced by the old definitions. + * + * Conversely, if the macro is not defined, then we generate binary- + * incompatible code, so we want different mangled names, to make sure that + * the linker does not allow new and old compiled legacy wrappers to be passed + * to one another. (Global variables are a different, and probably insoluble, + * problem.) + * + * Similarly, min/max classes compiled with and without + * CILK_LIBRARY_0_9_REDUCER_MINMAX are binary-incompatible, and must get + * different mangled names. + * + * The trick is, when compiling in normal (non-compatibility) mode, wrap + * everything in an extra namespace, and then `use` it into the top-level cilk + * namespace. Then + * + * * Classes and functions compiled in normal mode will be in + * different namespaces from the same classes and functions compiled in + * compatibility mode. + * * The legacy wrapper classes and functions will be in the same namespace + * as the same classes and functions compiled with the0.9 library if and + * only if the are compiled in compatibility mode. + * + * @ingroup ReducersMinMax + */ + +#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX +/** Namespace to wrap min/max reducer definitions when not compiling in “binary + * compatibility” mode. + * + * By default, all of the min/max reducer definitions are defined in this + * namespace and then imported into namespace ::cilk, so that they do not + * clash with the legacy definitions with the same names. However, if the + * macro `CILK_LIBRARY_0_9_REDUCER_MINMAX` is defined, then the min/max + * definitions go directly into namespace ::cilk, so that, for example, + * cilk::reducer_max defined with the 1.0 library is equivalent (to the + * linker) to cilk::reducer_max defined with the 0.9 library. + * + * @ingroup ReducersMinMaxBinComp + * @ingroup ReducersMinMax + */ +namespace cilk_lib_1_0 { +#endif + +/** Namespace containing internal implementation classes and functions for + * min/max reducers. + * + * @ingroup ReducersMinMax + */ +namespace min_max_internal { + +using ::cilk::internal::binary_functor; +using ::cilk::internal::typed_indirect_binary_function; +using ::cilk::internal::class_is_empty; + +/** @defgroup ReducersMinMaxIsSet The “is_set optimization” + * + * The obvious definition of the identity value for a max or min reducer is as + * the smallest (or largest) value of the value type. However, for an + * arbitrary comparator and/or an arbitrary value type, the largest / smallest + * value may not be known. It may not even be defined — what is the largest + * string? + * + * Therefore, min/max reducers represent their value internally as a pair + * `(value, is_set)`. When `is_set` is true, the pair represents the known + * value `value`; when `is_set` is false, the pair represents the identity + * value. + * + * This is an effective solution, but the most common use of min/max reducers + * is probably with numeric types and the default definition of minimum or + * maximum (using `std::less`), in which case there are well-defined, knowable + * smallest and largest values. Testing `is_set` for every comparison is then + * unnecessary and wasteful. + * + * The “is_set optimization” just means generating code that doesn’t use + * `is_set` when it isn’t needed. It is implemented using two metaprogramming + * classes: + * + * - do_is_set_optimization tests whether the optimization is applicable. + * - identity_value gets the appropriate identity value for a type. + * + * The is_set optimization is the reason that min/max reducers compiled with + * Cilk library 1.0 are binary-incompatible with the same reducers compiled + * with library 0.9, and therefore the optimization is suppressed when + * compiling in + * ReducersMinMaxBinComp "binary compatibility mode". + * + * @ingroup ReducersMinMax + */ + +/** Test whether the ReducersMinMaxIsSet "is_set optimization" is + * applicable. + * + * The @ref do_is_set_optimization class is used to test whether the is_set + * optimization should be applied for a particular reducer. It is instantiated + * with a value type and a comparator, and defines a boolean constant, + * `value`. Then `%do_is_set_optimization<Type, Comp>::%value` can be used as + * a boolean template parameter to control the specialization of another + * class. + * + * In ReducersMinMaxBinComp "binary compatibility mode", when the + * `CILK_LIBRARY_0_9_REDUCER_MINMAX` macro is defined, `value` will always + * be false. + * + * @tparam Type The value type for the reducer. + * @tparam Compare The comparator type for the reducer. + * + * @result The `value` data member will be `true` if @a Type is a numeric + * type, @a Compare is `std::less<Type>`, and + * `CILK_LIBRARY_0_9_REDUCER_MINMAX` is not defined. + * + * @see ReducersMinMaxIsSet + * @see @ref view_content + * + * @ingroup ReducersMinMaxIsSet + */ +template < typename Type, + typename Compare > +struct do_is_set_optimization +{ + /// `True` if the is_set optimization should be applied to min/max reducers + /// with this value type and comparator; `false` otherwise. + static const bool value = false; +}; + +#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX +/// @cond +template <typename Type> +struct do_is_set_optimization<Type, std::less<Type> > +{ + /// True in the special case where optimization is possible. + static const bool value = std::numeric_limits<Type>::is_specialized; +}; +/// @endcond +#endif + + +/** Get the identity value when using the ReducersMinMaxIsSet + * "is_set optimization". + * + * This class defines a function which assigns the appropriate identity value + * to a variable when the is_set optimization is applicable. + * + * @tparam Type The value type for the reducer. + * @tparam Compare The comparator type for the reducer. + * @tparam ForMax `true` to get the identity value for a max reducer (i.e., + * the smallest value of @a Type), `false` to get the identity + * value for a min reducer (i.e., the largest value of + * @a Type). + * + * @result If @a Type and @a Compare qualify for the is_set optimization, the + * `set_identity()' function will set its argument variable to the + * smallest or largest value of @a Type, depending on @a ForMax. + * Otherwise, `set_identity()` will be a no-op. + * + * @see ReducersMinMaxIsSet + * + * @ingroup ReducersMinMaxIsSet + * @see @ref view_content + */ +template < typename Type, + typename Compare, + bool ForMax, + bool = std::numeric_limits<Type>::is_specialized, + bool = std::numeric_limits<Type>::has_infinity > +struct identity_value { + /// Assign the identity value to the reference parameter. + static void set_identity(Type&) {} +}; + +/// @cond +template <typename Type> +struct identity_value<Type, std::less<Type>, true, true, true> { + /// Floating max identity is negative infinity. + static void set_identity(Type& id) + { id = -std::numeric_limits<Type>::infinity(); } +}; + +template <typename Type> +struct identity_value<Type, std::less<Type>, true, true, false> { + /// Integer max identity is minimum value of type. + static void set_identity(Type& id) + { id = std::numeric_limits<Type>::min(); } +}; + +template <typename Type> +struct identity_value<Type, std::less<Type>, false, true, true> { + /// Floating min identity is positive infinity. + static void set_identity(Type& id) + { id = std::numeric_limits<Type>::infinity(); } +}; + +template <typename Type> +struct identity_value<Type, std::less<Type>, false, true, false> { + /// Integer min identity is maximum value of type. + static void set_identity(Type& id) + { id = std::numeric_limits<Type>::max(); } +}; + +/// @endcond + + +/** Adapter class to reverse the arguments of a predicate. + * + * Observe that: + * + * (x < y) == (y > x) + * max(x, y) == (x < y) ? y : x + * min(x, y) == (y < x) ? y : x == (x > y) ? y : x + * + * More generally, if `c` is a predicate defining a `Strict Weak Ordering`, + * and `c*(x, y) == c(y, x)`, then + * + * max(x, y, c) == c(x, y) ? y : x + * min(x, y, c) == c(y, x) ? y : x == c*(x, y) ? y : x == max(x, y, c*) + * + * For any predicate `C` with argument type `T`, the template class + * `%reverse_predicate<C, T>` defines a predicate which is identical to `C`, + * except that its arguments are reversed. Thus, for example, we could + * implement `%op_min_view<Type, Compare>` as + * `%op_max_view<Type, %reverse_predicate<Compare, Type> >`. + * (Actually, op_min_view and op_max_view are both implemented as subclasses + * of a common base class, view_base.) + * + * @note If `C` is an empty functor class, then `reverse_predicate(C)` will + * also be an empty functor class. + * + * @tparam Predicate The predicate whose arguments are to be reversed. + * @tparam Argument @a Predicate’s argument type. + * + * @ingroup ReducersMinMax + */ +template <typename Predicate, + typename Argument = typename Predicate::first_argument_type> +class reverse_predicate : private binary_functor<Predicate>::type { + typedef typename binary_functor<Predicate>::type base; +public: + /// Default constructor + reverse_predicate() : base() {} + /// Constructor with predicate object + reverse_predicate(const Predicate& p) : base(p) {} + /// The reversed predicate operation + bool operator()(const Argument& x, const Argument& y) const + { return base::operator()(y, x); } +}; + + +/** Class to represent the comparator for a min/max view class. + * + * This class is intended to accomplish two objectives in the implementation + * of min/max views. + * + * 1. To minimize data bloat, when we have a reducer with a non-stateless + * comparator, we want to keep a single instance of the comparator object + * in the monoid, and just call it from the views. + * 2. In ReducersMinMaxBinComp "binary compatibility mode", views for + * reducers with a stateless comparator must have the same content as in + * Cilk library 0.9 — that is, they must contain only `value` and + * `is_set` data members. + * + * To achieve the first objective, we use the + * @ref internal::typed_indirect_binary_function class defined in + * metaprogramming.h to wrap a pointer to the actual comparator. If no + * pointer is needed because the actual comparator is stateless, the + * `typed_indirect_binary_function` class will be empty, too. + * + * To achieve the second objective, we make the + * `typed_indirect_binary_function` class a base class of the view rather than + * a data member, so the “empty base class” rule will ensure no that no + * additional space is allocated in the view unless it is needed. + * + * We could simply use typed_indirect_binary_function as the base class of the + * view, but this would mean writing comparisons as `(*this)(x, y)`, which is + * just weird. So, instead, we comparator_base as a subclass of + * typed_indirect_binary_function which provides function `compare()` + * as a synonym for `operator()`. + * + * @tparam Type The value type of the comparator class. + * @tparam Compare A predicate class. + * + * @see internal::typed_indirect_binary_function + * + * @ingroup ReducersMinMax + */ +template <typename Type, typename Compare> +class comparator_base : private typed_indirect_binary_function<Compare, Type, Type, bool> +{ + typedef typed_indirect_binary_function<Compare, Type, Type, bool> base; +protected: + comparator_base(const Compare* f) : base(f) {} ///< Constructor. + + /// Comparison function. + bool compare(const Type& a, const Type& b) const + { + return base::operator()(a, b); + } + + /// Get the comparator pointer. + const Compare* compare_pointer() const { return base::pointer(); } +}; + + +/** @defgroup ReducersMinMaxViewContent Content classes for min/max views + * + * @ingroup ReducersMinMax + * + * Minimum and maximum reducer view classes inherit from a “view content” + * class. The content class defines the actual data members for the view, + * and provides typedefs and member functions for accessing the data members + * as needed to support the view functionality. + * + * There are two content classes, which encapsulate the differences between + * simple min/max reducers and min/max with index reducers: + * + * - view_content + * - index_view_content + * + * @note An obvious, and arguably simpler, encapsulation strategy would be + * to just let the `Type` of a min/max view be an (index, value) pair + * structure for min_index and max_index reducers. Then all views + * would just have a `Type` data member and an `is_set` data member, + * and the comparator for min_index and max_index views could be + * customized to consider only the value component of the (index, + * value) `Type` pair. Unfortunately, this would break binary + * compatibility with reducer_max_index and reducer_min_index in + * Cilk library 0.9, because the memory layout of an (index, value) + * pair followed by a `bool` is different from the memory layout of an + * index data member followed by a value data member followed by a + * `bool` data member. The content class is designed to exactly + * replicate the layout of the views in library 0.9 reducers. + * + * A content class `C`, and its objects `c`, must define the following: + * + * Definition | Meaning + * ------------------------------------|-------- + * `C::value_type` | A typedef for `Type` of the view. (A `std::pair<Index, Type>` for min_index and max_index views). + * `C::comp_value_type` | A typedef for the type of value compared by the view’s `compare()` function. + * `C()` | Constructs the content with the identity value. + * `C(const value_type&)` | Constructs the content with a specified value. + * `c.is_set()` | Returns true if the content has a known value. + * `c.value()` | Returns the content’s value. + * `c.set_value(const value_type&)` | Sets the content’s value. (The value becomes known.) + * `c.comp_value()` | Returns a const reference to the value or component of the value that is to be compared by the view’s comparator. + * `C::comp_value(const value_type&)` | Returns a const reference to a value or component of a value that is to be compared by the view’s comparator. + * + * @see view_base + */ + +/** Content class for op_min_view and op_max_view. + * + * @tparam Type The value type of the op_min_view or op_max_view. + * @tparam Compare The comparator class specified for the op_min_view or + * op_max_view. (_Not_ the derived comparator class actually + * used by the view_base. For example, the view_content of an + * `op_min_view<int>` will have `Compare = std::less<int>`, + * but its comparator_base will have + * `Compare = reverse_predicate< std::less<int> >`.) + * @tparam ForMax `true` if this is the content class for an op_max_view, + * `false` if it is for an op_min_view. + * + * @note The general implementation of view_content uses an `is_set` data + * member. There is also a specialization which implements the + * ReducersMinMaxIsSet "is_set optimization". View classes that + * inherit from view_content do not need to know anything about the + * difference, though; the details are abstracted away in the + * view_content interface. + * + * @see ReducersMinMaxViewContent + * + * @ingroup ReducersMinMaxViewContent + * @ingroup ReducersMinMax + */ +template < typename Type + , typename Compare + , bool ForMax + , bool = do_is_set_optimization<Type, Compare>::value + > +class view_content { + Type m_value; + bool m_is_set; +public: + /// The value type of the view. + typedef Type value_type; + + /// The type compared by the view’s `compare()` function (which is the same + /// as the value type for view_content). + typedef Type comp_value_type; + + /// Construct with the identity value. + view_content() : m_value(), m_is_set(false) {} + + /// Construct with a defined value. + view_content(const value_type& value) : m_value(value), m_is_set(true) {} + + /// Get the value. + value_type value() const { return m_value; } + + /// Set the value. + void set_value(const value_type& value) + { + m_value = value; + m_is_set = true; + } + + /// Get the comparison value (which is the same as the value for + /// view_content). + const comp_value_type& comp_value() const { return m_value; } + + /// Given an arbitrary value, get the corresponding comparison value (which + /// is the same as the value for view_content). + static const comp_value_type& comp_value(const value_type& value) + { + return value; + } + + /// Get a const reference to value part of the value (which is the same as + /// the value for view_content). + const Type& get_reference() const { return m_value; } + + /// Get a const reference to the index part of the value (which is + /// meaningless for non-index reducers, but required for view_base. + const Type& get_index_reference() const { return m_value; } + + /// Test if the value is defined. + bool is_set() const { return m_is_set; } +}; + +/// @cond + +/* This is the specialization of the view_content class for cases where + * `AssumeIsSet` is true (i.e., where the is_set optimization is applicable). + */ +template < typename Type + , typename Compare + , bool ForMax + > +class view_content<Type, Compare, ForMax, true> { + typedef identity_value<Type, Compare, ForMax> Identity; + Type m_value; +public: + typedef Type value_type; + typedef Type comp_value_type; + + /// Construct with identity value. + view_content() { Identity::set_identity(m_value); } + + view_content(const value_type& value) : m_value(value) {} + + value_type value() const { return m_value; } + + void set_value(const value_type& value) + { + m_value = value; + } + + const comp_value_type& comp_value() const { return m_value; } + + static const comp_value_type& comp_value(const value_type& value) + { + return value; + } + + const Type& get_reference() const { return m_value; } + + const Type& get_index_reference() const { return m_value; } + + /// Test if the value is defined. + bool is_set() const { return true; } +}; + +/// @endcond + + +/** Content class for op_min_index_view and op_max_index_view. + * + * @tparam Index The index type of the op_min_index_view or + op_max_index_view. + * @tparam Type The value type of the op_min_view or op_max_view. (_Not_ + * the value type of the view, which will be + * `std::pair<Index, Type>`.) + * @tparam Compare The comparator class specified for the op_min_index_view or + * op_max_index_view. (_Not_ the derived comparator class + * actually used by the view_base. For example, the + * index_view_content of an `op_min_index_view<int>` will have + * `Compare = std::less<int>`, but its comparator_base will + * have `Compare = reverse_predicate< std::less<int> >`.) + * @tparam ForMax `true` if this is the content class for an + * op_max_index_view, `false` if it is for an + * op_min_index_view. + * + * @see ReducersMinMaxViewContent + * + * @ingroup ReducersMinMaxViewContent + * @ingroup ReducersMinMax + */ +template < typename Index + , typename Type + , typename Compare + , bool ForMax + > +class index_view_content { + typedef identity_value<Type, Compare, ForMax> Identity; + + Index m_index; + Type m_value; + bool m_is_set; +public: + /// The value type of the view (which is an <index, value> pair for + /// index_view_content). + typedef std::pair<Index, Type> value_type; + + /// The type compared by the view’s `compare()` function (which is the data + /// value type for index_view_content). + typedef Type comp_value_type; + + /// Construct with the identity value. + index_view_content() : m_index(), m_value(), m_is_set(false) {} + + /// Construct with an index/value pair. + index_view_content(const value_type& value) : + m_index(value.first), m_value(value.second), m_is_set(true) {} + + /// Construct with an index and a value. + index_view_content(const Index& index, const Type& value) : + m_index(index), m_value(value), m_is_set(true) {} + + /// Construct with just an index. + index_view_content(const Index& index) : + m_index(index), m_value(), m_is_set(false) {} + + /// Get the value. + value_type value() const { return value_type(m_index, m_value); } + + /// Set value. + void set_value(const value_type& value) + { + m_index = value.first; + m_value = value.second; + m_is_set = true; + } + + /// Get the comparison value (which is the value component of the + /// index/value pair for index_view_content). + const comp_value_type& comp_value() const { return m_value; } + + /// Given an arbitrary value (i.e., index/value pair), get the + /// corresponding comparison value (which is the value component of the + /// index/value pair for index_view_content). + static const comp_value_type& comp_value(const value_type& value) + { return value.second; } + + /// Get a const reference to value part of the value. + const Type& get_reference() const { return m_value; } + + /// Get a const reference to the index part of the value. + const Index& get_index_reference() const { return m_index; } + + /// Test if the value is defined. + bool is_set() const { return m_is_set; } +}; + + +template <typename View> class rhs_proxy; + +/** Create an rhs_proxy. + */ +template <typename View> +inline rhs_proxy<View> +make_proxy(const typename View::value_type& value, const View& view); + +template <typename Content, typename Less, typename Compare> class view_base; + + +/** Class to represent the right-hand side of + * `*reducer = {min|max}_of(*reducer, value)`. + * + * The only assignment operator for a min/max view class takes a rhs_proxy as + * its operand. This results in the syntactic restriction that the only + * expressions that can be assigned to a min/max view are ones which generate + * an rhs_proxy — that is, expressions of the form `max_of(view, value)` and + * `min_of(view, value)`. + * + * @warning + * The lhs and rhs views in such an assignment must be the same; otherwise, + * the behavior will be undefined. (I.e., `*r1 = min_of(*r1, x)` is legal; + * `*r1 = min_of(*r2, x)` is illegal.) This condition will be checked with a + * runtime assertion when compiled in debug mode. + * + * @tparam View The view class (op_{min|max}[_index]_view) that this proxy + * was created from. + * + * @see view_base + * + * @ingroup ReducersMinMax + */ +template <typename View> +class rhs_proxy { + typedef typename View::less_type less_type; + typedef typename View::compare_type compare_type; + typedef typename View::value_type value_type; + typedef typename View::content_type content_type; + typedef typename content_type::comp_value_type comp_value_type; + + friend class view_base<content_type, less_type, compare_type>; + friend rhs_proxy make_proxy<View>( + const typename View::value_type& value, + const View& view); + + typed_indirect_binary_function< + compare_type, comp_value_type, comp_value_type, bool> + m_comp; + const View* m_view; + value_type m_value; + + rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator + rhs_proxy(); // Disable default constructor + + // Constructor (called from view_base::make_proxy). + rhs_proxy(const View* view, + const value_type& value, + const compare_type* compare) : + m_view(view), m_value(value), m_comp(compare) {} + + // Check matching view, then return value (called from view_base::assign). + value_type value(const typename View::base* view) const + { + __CILKRTS_ASSERT(view == m_view); + return m_value; + } + +public: + + /** Support max_of(max_of(view, value), value) and the like. + */ + rhs_proxy calc(const value_type& x) const + { + return rhs_proxy( + m_view, + m_comp( content_type::comp_value(m_value), + content_type::comp_value(x) + ) ? x : m_value, + m_comp.pointer()); + } +}; + + +template <typename View> +inline rhs_proxy<View> +make_proxy(const typename View::value_type& value, const View& view) +{ + return rhs_proxy<View>(&view, value, view.compare_pointer()); +} + +//@} + +/** Base class for min and max view classes. + * + * This class accumulates the minimum or maximum of a set of values which have + * occurred as arguments to the `calc()` function, as determined by a + * comparator. The accumulated value will be the first `calc()` argument value + * `x` such that `compare(x, y)` is false for every `calc()` argument value + * `y`. + * + * If the comparator is `std::less`, then the accumulated value is the first + * argument value which is not less than any other argument value, i.e., the + * maximum. Similarly, if the comparator is `reverse_predicate<std::less>`, + * which is equivalent to `std::greater`, then the accumulated value is the + * first argument value which is not greater than any other argument value, + * i.e., the minimum. + * + * @note This class provides the definitions that are required for a class + * that will be used as the parameter of a + * min_max_internal::monoid_base specialization. + * + * @tparam Content A content class that provides the value types and data + * members for the view. + * @tparam Less A “less than” binary predicate that defines the min or + * max function. + * @tparam Compare A binary predicate to be used to compare the values. + * (The same as @a Less for max reducers; its reversal for + * min reducers.) + * + * @see ReducersMinMaxViewContent + * @see op_max_view + * @see op_min_view + * @see op_max_index_view + * @see op_min_index_view + * @see monoid_base + * + * @ingroup ReducersMinMax + */ +template <typename Content, typename Less, typename Compare> +class view_base : + // comparator_base comes first to ensure that it will get empty base class + // treatment + private comparator_base<typename Content::comp_value_type, Compare>, + private Content +{ + typedef comparator_base<typename Content::comp_value_type, Compare> base; + using base::compare; + using Content::value; + using Content::set_value; + using Content::comp_value; + typedef Content content_type; + + template <typename View> friend class rhs_proxy; + template <typename View> + friend rhs_proxy<View> make_proxy(const typename View::value_type& value, const View& view); + +public: + + /** @name Monoid support. + */ + //@{ + + /** Value type. Required by @ref monoid_with_view. + */ + typedef typename Content::value_type value_type; + + /** The type of the comparator specified by the user, that defines the + * ordering on @a Type. Required by min_max::monoid_base. + */ + typedef Less less_type; + + /** The type of the comparator actually used by the view. Required by + * min_max::monoid_base. (This is the same as the @ref less_type for a + * max reducer, or `reverse_predicate<less_type>` for a min reducer.) + */ + typedef Compare compare_type; + + /** Reduce operation. Required by @ref monoid_with_view. + */ + void reduce(view_base* other) + { + if ( other->is_set() && + ( !this->is_set() || + compare(this->comp_value(), other->comp_value()) ) ) + { + this->set_value(other->value()); + } + } + + //@} + + /** Default constructor. Initializes to identity value. + */ + explicit view_base(const compare_type* compare) : + base(compare), Content() {} + + /** Value constructor. + */ + template <typename T1> + view_base(const T1& x1, const compare_type* compare) : + base(compare), Content(x1) {} + + /** Value constructor. + */ + template <typename T1, typename T2> + view_base(const T1& x1, const T2& x2, const compare_type* compare) : + base(compare), Content(x1, x2) {} + + + /** Move-in constructor. + */ + explicit view_base(move_in_wrapper<value_type> w, const compare_type* compare) : + base(compare), Content(w.value()) {} + + /** @name Reducer support. + */ + //@{ + + void view_move_in(value_type& v) { set_value(v); } + void view_move_out(value_type& v) { v = value(); } + void view_set_value(const value_type& v) { set_value(v); } + value_type view_get_value() const { return value(); } + // view_get_reference() NOT SUPPORTED + + //@} + + /** Is the value defined? + */ + using Content::is_set; + + /** Reference to contained value data member. + * @deprecated For legacy reducers only. + */ + using Content::get_reference; + + /** Reference to contained index data member. + * (Meaningless for non-index reducers.) + * @deprecated For legacy reducers only. + */ + using Content::get_index_reference; + +protected: + + /** Update the min/max value. + */ + void calc(const value_type& x) + { + if (!is_set() || compare(comp_value(), comp_value(x))) set_value(x); + } + + /** Assign the result of a `{min|max}_of(view, value)` expression to the + * view. + * + * @see rhs_proxy + */ + template <typename View> + void assign(const rhs_proxy<View>& rhs) + { + calc(rhs.value(this)); + } + +}; + + +/** Base class for min and max monoid classes. + * + * The unique characteristic of minimum and maximum reducers is that they + * incorporate a comparator functor that defines what “minimum” or “maximum” + * means. The monoid for a reducer contains the comparator that will be used + * for the reduction. If the comparator is a function or a class with state, + * then each view will have a pointer to the comparator. + * + * This means that the `construct()` functions first construct the monoid + * (possibly with an explicit comparator argument), and then construct the + * view with a pointer to the monoid’s comparator. + * + * @tparam View The view class. + * @tparam Align If true, reducers instantiated on this monoid will be + * aligned. By default, library reducers (unlike legacy + * library reducer _wrappers_) are unaligned. + * + * @see view_base + * + * @ingroup ReducersMinMax + */ +template <typename View, bool Align = false> +class monoid_base : public monoid_with_view<View, Align> +{ + typedef typename View::compare_type compare_type; + typedef typename View::less_type less_type; + const compare_type m_compare; + + const compare_type* compare_pointer() const { return &m_compare; } + + using cilk::monoid_base<typename View::value_type, View>::provisional; + +public: + + /** Default constructor uses default comparator. + */ + monoid_base() : m_compare() {} + + /** Constructor. + * + * @param compare The comparator to use. + */ + monoid_base(const compare_type& compare) : m_compare(compare) {} + + /** Create an identity view. + * + * List view identity constructors take the list allocator as an argument. + * + * @param v The address of the uninitialized memory in which the view + * will be constructed. + */ + void identity(View *v) const { ::new((void*) v) View(compare_pointer()); } + + /** @name construct functions + * + * Min/max monoid `construct()` functions optionally take one or two value + * arguments, a @ref move_in argument, and/or a comparator argument. + */ + //@{ + + template <typename Monoid> + static void construct(Monoid* monoid, View* view) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(monoid->compare_pointer()) ); } + + template <typename Monoid, typename T1> + static void construct(Monoid* monoid, View* view, const T1& x1) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1, monoid->compare_pointer()) ); } + + template <typename Monoid, typename T1, typename T2> + static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2) + { provisional( new ((void*)monoid) Monoid() ).confirm_if( + new ((void*)view) View(x1, x2, monoid->compare_pointer()) ); } + + template <typename Monoid> + static void construct(Monoid* monoid, View* view, const less_type& compare) + { provisional( new ((void*)monoid) Monoid(compare) ).confirm_if( + new ((void*)view) View(monoid->compare_pointer()) ); } + + template <typename Monoid, typename T1> + static void construct(Monoid* monoid, View* view, const T1& x1, const less_type& compare) + { provisional( new ((void*)monoid) Monoid(compare) ).confirm_if( + new ((void*)view) View(x1, monoid->compare_pointer()) ); } + + template <typename Monoid, typename T1, typename T2> + static void construct(Monoid* monoid, View* view, const T1& x1, const T2& x2, const less_type& compare) + { provisional( new ((void*)monoid) Monoid(compare) ).confirm_if( + new ((void*)view) View(x1, x2, monoid->compare_pointer()) ); } + + //@} +}; + +} //namespace min_max_internal + + +/** @defgroup ReducersMinMaxMaxValue Maximum reducers (value only) + * + * These reducers will find the largest value from a set of values. + * + * @ingroup ReducersMinMax + */ +//@{ + +/** The maximum reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_max<Type, Compare> >`. It accumulates the maximum, + * as determined by a comparator, of a set of values which have occurred as + * arguments to the `calc_max()` function. The accumulated value will be the + * first argument `x` such that `compare(x, y)` is false for every argument + * `y`. + * + * If the comparator is `std::less`, then the accumulated value is the first + * argument value which is not less than any other argument value, i.e., the + * maximum. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `calc_max()` function would be used in an expression like + * `r->calc_max(a)` where `r` is an op_max reducer variable. + * + * @tparam Type The type of the values compared by the reducer. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. + * @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It + * defines the “less than” relation used to compute the + * maximum. + * + * @see ReducersMinMax + * @see op_max + */ +template <typename Type, typename Compare> +class op_max_view : public min_max_internal::view_base< + min_max_internal::view_content<Type, Compare, true>, + Compare, + Compare> +{ + typedef min_max_internal::view_base< + min_max_internal::view_content<Type, Compare, true>, + Compare, + Compare> base; + using base::calc; + using base::assign; + friend class min_max_internal::rhs_proxy<op_max_view>; + +public: + + /** @name Constructors. + * + * All op_max_view constructors simply pass their arguments on to the + * @ref view_base base class. + */ + //@{ + + op_max_view() : base() {} + + template <typename T1> + op_max_view(const T1& x1) : base(x1) {} + + template <typename T1, typename T2> + op_max_view(const T1& x1, const T2& x2) : base(x1, x2) {} + + //@} + + /** @name View modifier operations. + */ + //@{ + + /** Maximize with a value. + * + * If @a x is greater than the current value of the view (as defined by + * the reducer’s comparator), or if the view was created without an + * initial value and its value has never been updated (with `calc_max()` + * or `= max_of()`), then the value of the view is set to @a x. + * + * @param x The value to maximize the view’s value with. + * + * @return A reference to the view. (Allows chaining + * `view.comp_max(a).comp_max(b)…`.) + */ + op_max_view& calc_max(const Type& x) { calc(x); return *this; } + + /** Assign the result of a `max_of(view, value)` expression to the view. + * + * @param rhs An rhs_proxy value created by a `max_of(view, value)` + * expression. + * + * @return A reference to the view. + * + * @see min_max_internal::view_base::rhs_proxy + */ + op_max_view& operator=(const min_max_internal::rhs_proxy<op_max_view>& rhs) + { assign(rhs); return *this; } + + //@} +}; + + +/** Compute the maximum of the value in an op_max_view and another value. + * + * The result of this computation can only be assigned back to the original + * view or used in another max_of() call. For example, + * + * *reducer = max_of(*reducer, x); + * *reducer = max_of(x, *reducer); + * + * @see min_max_internal::rhs_proxy + */ +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> > +max_of(const op_max_view<Type, Compare>& view, const Type& value) +{ + return min_max_internal::make_proxy(value, view); +} + +/// @copydoc max_of(const op_max_view<Type, Compare>&, const Type&) +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> > +max_of(const Type& value, const op_max_view<Type, Compare>& view) +{ + return min_max_internal::make_proxy(value, view); +} + +/** Nested maximum computation. + * + * Compute the maximum of the result of a max_of() call and another value. + * + * The result of this computation can only be assigned back to the original + * view or wrapper, or used in another max_of() call. For example, + * + * *reducer = max_of(x, max_of(y, *reducer)); + * wrapper = max_of(max_of(wrapper, x), y); + * + * @see min_max_internal::rhs_proxy + */ +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> > +max_of(const min_max_internal::rhs_proxy< op_max_view<Type, Compare> >& proxy, + const Type& value) +{ + return proxy.calc(value); +} + +/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_view<Type, Compare> >&, const Type&) +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> > +max_of(const Type& value, + const min_max_internal::rhs_proxy< op_max_view<Type, Compare> >& proxy) +{ + return proxy.calc(value); +} + + +/** Monoid class for maximum reductions. Instantiate the cilk::reducer template + * class with an op_max monoid to create a maximum reducer class. For example, + * to compute the maximum of a set of `int` values: + * + * cilk::reducer< cilk::op_max<int> > r; + * + * @see ReducersMinMax + * @see op_max_view + */ +template <typename Type, typename Compare=std::less<Type>, bool Align = false> +class op_max : + public min_max_internal::monoid_base<op_max_view<Type, Compare>, Align> +{ + typedef min_max_internal::monoid_base<op_max_view<Type, Compare>, Align> + base; +public: + /// Construct with default comparator. + op_max() {} + /// Construct with specified comparator. + op_max(const Compare& compare) : base(compare) {} +}; + +//@} + + +/** @defgroup ReducersMinMaxMinValue Minimum reducers (value only) + * + * These reducers will find the smallest value from a set of values. + * + * @ingroup ReducersMinMax + */ +//@{ + +/** The minimum reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_min<Type, Compare> >`. It accumulates the minimum, + * as determined by a comparator, of a set of values which have occurred as + * arguments to the `calc_min()` function. The accumulated value will be the + * first argument `x` such that `compare(y, x)` is false for every argument + * `y`. + * + * If the comparator is `std::less`, then the accumulated value is the first + * argument value which no other argument value is less than, i.e., the + * minimum. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `calc_min()` function would be used in an expression like + * `r->calc_min(a)` where `r` is an op_min reducer variable. + * + * @tparam Type The type of the values compared by the reducer. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. + * @tparam Compare A `Strict Weak Ordering` whose argument type is @a Type. It + * defines the “less than” relation used to compute the + * minimum. + * + * @see ReducersMinMax + * @see op_min + */ +template <typename Type, typename Compare> +class op_min_view : public min_max_internal::view_base< + min_max_internal::view_content<Type, Compare, false>, + Compare, + min_max_internal::reverse_predicate<Compare, Type> > +{ + typedef min_max_internal::view_base< + min_max_internal::view_content<Type, Compare, false>, + Compare, + min_max_internal::reverse_predicate<Compare, Type> > base; + using base::calc; + using base::assign; + friend class min_max_internal::rhs_proxy<op_min_view>; + +public: + /** @name Constructors. + * + * All op_min_view constructors simply pass their arguments on to the + * @ref view_base base class. + */ + //@{ + + op_min_view() : base() {} + + template <typename T1> + op_min_view(const T1& x1) : base(x1) {} + + template <typename T1, typename T2> + op_min_view(const T1& x1, const T2& x2) : base(x1, x2) {} + + //@} + + /** @name View modifier operations. + */ + //@{ + + /** Minimize with a value. + * + * If @a x is less than the current value of the view (as defined by the + * reducer’s comparator), or if the view was created without an initial + * value and its value has never been updated (with `calc_min()` or + * `= min_of()`), then the value of the view is set to @a x. + * + * @param x The value to minimize the view’s value with. + * + * @return A reference to the view. (Allows chaining + * `view.comp_min(a).comp_min(b)…`.) + */ + op_min_view& calc_min(const Type& x) { calc(x); return *this; } + + /** Assign the result of a `min_of(view, value)` expression to the view. + * + * @param rhs An rhs_proxy value created by a `min_of(view, value)` + * expression. + * + * @return A reference to the view. + * + * @see min_max_internal::view_base::rhs_proxy + */ + op_min_view& operator=(const min_max_internal::rhs_proxy<op_min_view>& rhs) + { assign(rhs); return *this; } +}; + + +/** Compute the minimum of the value in a view and another value. + * + * The result of this computation can only be assigned back to the original + * view or used in another min_of() call. For example, + * + * *reducer = min_of(*reducer, x); + * *reducer = min_of(x, *reducer); + * + * @see min_max_internal::view_base::rhs_proxy + */ +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> > +min_of(const op_min_view<Type, Compare>& view, const Type& value) +{ + return min_max_internal::make_proxy(value, view); +} + +/// @copydoc min_of(const op_min_view<Type, Compare>&, const Type&) +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> > +min_of(const Type& value, const op_min_view<Type, Compare>& view) +{ + return min_max_internal::make_proxy(value, view); +} + +/** Nested minimum computation. + * + * Compute the minimum of the result of a min_of() call and another value. + * + * The result of this computation can only be assigned back to the original + * view or wrapper, or used in another min_of() call. For example, + * + * *reducer = min_of(x, min_of(y, *reducer)); + * wrapper = min_of(min_of(wrapper, x), y); + * + * @see min_max_internal::rhs_proxy + */ +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> > +min_of(const min_max_internal::rhs_proxy< op_min_view<Type, Compare> >& proxy, + const Type& value) +{ + return proxy.calc(value); +} + +/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_view<Type, Compare> >&, const Type&) +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> > +min_of(const Type& value, + const min_max_internal::rhs_proxy< op_min_view<Type, Compare> >& proxy) +{ + return proxy.calc(value); +} + + +/** Monoid class for minimum reductions. Instantiate the cilk::reducer template + * class with an op_min monoid to create a minimum reducer class. For example, + * to compute the minimum of a set of `int` values: + * + * cilk::reducer< cilk::op_min<int> > r; + * + * @see ReducersMinMax + * @see op_min_view + */ +template <typename Type, typename Compare=std::less<Type>, bool Align = false> +class op_min : public min_max_internal::monoid_base<op_min_view<Type, Compare>, Align> { + typedef min_max_internal::monoid_base<op_min_view<Type, Compare>, Align> base; +public: + /// Construct with default comparator. + op_min() {} + /// Construct with specified comparator. + op_min(const Compare& compare) : base(compare) {} +}; + +//@} + + +/** @defgroup ReducersMinMaxMaxIndex Maximum reducers (value and index) + * + * These reducers will find the largest value from a set of values, and its + * index in the set. + * + * @ingroup ReducersMinMax + */ +//@{ + +/** The maximum index reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_max_index<Index, Type, Compare> >`. It accumulates + * the maximum, as determined by a comparator, of a set of values which have + * occurred as arguments to the `calc_max()` function, and records the index + * of the maximum value. The accumulated value will be the first argument `x` + * such that `compare(x, y)` is false for every argument `y`. + * + * If the comparator is `std::less`, then the accumulated value is the first + * argument value which is not less than any other argument value, i.e., the + * maximum. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `calc_max()` function would be used in an expression like + * `r->calc_max(i, a)`where `r` is an op_max_index reducer + * variable. + * + * @note The word “index” suggests an integer index into an array, but there + * is no restriction on the index type or how it should be used. In + * general, it may be convenient to use it for any kind of key that + * can be used to locate the maximum value in the collection that it + * came from — for example: + * - An index into an array. + * - A key into an STL map. + * - An iterator into any STL container. + * + * @note A max_index reducer is essentially a max reducer whose value type + * is a `std::pair<Index, Type>`. This fact is camouflaged in the view + * `calc_max` function, the global `max_of` functions, and the reducer + * value constructor, which can all take an index argument and a value + * argument as an alternative to a single `std::pair` argument. + * However, the reducer `set_value()`, `get_value()`, `move_in()`, and + * `move_out()` functions work only with pairs, not with individual + * value and/or index arguments. + * + * @tparam Index The type of the indices associated with the values. + * @tparam Type The type of the values compared by the reducer. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. + * @tparam Compare Used to compare the values. It must be a binary predicate. + * If it is omitted, then the view computes the conventional + * arithmetic maximum. + * + * @see ReducersMinMax + * @see op_max_index + */ +template <typename Index, typename Type, typename Compare> +class op_max_index_view : public min_max_internal::view_base< + min_max_internal::index_view_content<Index, Type, Compare, true>, + Compare, + Compare> +{ + typedef min_max_internal::view_base< + min_max_internal::index_view_content<Index, Type, Compare, true>, + Compare, + Compare> base; + using base::calc; + using base::assign; + typedef std::pair<Index, Type> pair_type; + friend class min_max_internal::rhs_proxy<op_max_index_view>; + +public: + /** @name Constructors. + * + * All op_max_index_view constructors simply pass their arguments on to the + * @ref view_base base class, except for the `(index, value [, compare])` + * constructors, which create a `std::pair` containing the index and value. + */ + //@{ + + op_max_index_view() : base() {} + + template <typename T1> + op_max_index_view(const T1& x1) : base(x1) {} + + template <typename T1, typename T2> + op_max_index_view(const T1& x1, const T2& x2) : base(x1, x2) {} + + template <typename T1, typename T2, typename T3> + op_max_index_view(const T1& x1, const T2& x2, const T3& x3) : base(x1, x2, x3) {} + + op_max_index_view(const Index& i, const Type& v) : base(pair_type(i, v)) {} + + op_max_index_view(const Index& i, const Type& v, const typename base::compare_type* c) : + base(pair_type(i, v), c) {} + + //@} + + /** Maximize with a value and index. + * + * If @a x is greater than the current value of the view (as defined by + * the reducer’s comparator), or if the view was created without an + * initial value and its value has never been updated (with `calc_max()` + * or `= max_of()`), then the value of the view is set to @a x, and the + * index is set to @a i.. + * + * @param i The index of the value @a x. + * @param x The value to maximize the view’s value with. + * + * @return A reference to the view. (Allows + * `view.comp_max(i, a).comp_max(j, b)…`.) + */ + op_max_index_view& calc_max(const Index& i, const Type& x) + { calc(pair_type(i, x)); return *this; } + + /** Maximize with an index/value pair. + * + * If @a pair.second is greater than the current value of the view (as + * defined by the reducer’s comparator), or if the view was created + * without an initial value and its value has never been updated (with + * `calc_max()` or `= max_of()`), then the value of the view is set to + * @a pair.second, and the index is set to @a pair.first. + * + * @param pair A pair containing a value to maximize the view’s value + * with and its associated index. + * + * @return A reference to the view. (Allows + * `view.comp_max(p1).comp_max(p2)…`.) + */ + op_max_index_view& calc_max(const pair_type& pair) + { calc(pair); return *this; } + + /** Assign the result of a `max_of(view, index, value)` expression to the + * view. + * + * @param rhs An rhs_proxy value created by a `max_of(view, index, value)` + * expression. + * + * @return A reference to the view. + * + * @see min_max_internal::view_base::rhs_proxy + */ + op_max_index_view& operator=(const min_max_internal::rhs_proxy<op_max_index_view>& rhs) + { assign(rhs); return *this; } +}; + + +/** Compute the maximum of the value in a view and another value. + * + * The result of this computation can only be assigned back to the original + * view or used in another max_of() call. For example, + * + * *reducer = max_of(*reducer, i, x); + * *reducer = max_of(i, x, *reducer); + * + * @see min_max_internal::rhs_proxy + */ +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const op_max_index_view<Index, Type, Compare>& view, + const Index& index, const Type& value) +{ + return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view); +} + +/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const Index& index, const Type& value, + const op_max_index_view<Index, Type, Compare>& view) +{ + return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view); +} + +/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const op_max_index_view<Index, Type, Compare>& view, + const std::pair<Index, Type>& pair) +{ + return min_max_internal::make_proxy(pair, view); +} + +/// @copydoc max_of(const op_max_index_view<Index, Type, Compare>&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const std::pair<Index, Type>& pair, + const op_max_index_view<Index, Type, Compare>& view) +{ + return min_max_internal::make_proxy(pair, view); +} + +/** Nested computation of the maximum of the value in a view and other values. + * + * Compute the maximum of the result of a max_of() call and another value. + * + * The result of this computation can only be assigned back to the original + * view or used in another max_of() call. For example, + * + * *reducer = max_of(x, max_of(y, *reducer)); + * *reducer = max_of(max_of(*reducer, x), y); + * + * @see min_max_internal::rhs_proxy + */ +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy, + const Index& index, const Type& value) +{ + return proxy.calc(std::pair<Index, Type>(index, value)); +} + +/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const Index& index, const Type& value, + const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy) +{ + return proxy.calc(std::pair<Index, Type>(index, value)); +} + +/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy, + const std::pair<Index, Type>& pair) +{ + return proxy.calc(pair); +} + +/// @copydoc max_of(const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> > +max_of(const std::pair<Index, Type>& pair, + const min_max_internal::rhs_proxy< op_max_index_view<Index, Type, Compare> >& proxy) +{ + return proxy.calc(pair); +} + + +/** Monoid class for maximum reductions with index. Instantiate the + * cilk::reducer template class with an op_max_index monoid to create a + * max_index reducer class. For example, to compute the maximum of an array of + * `double` values and the array index of the max value: + * + * cilk::reducer< cilk::op_max_index<unsigned, double> > r; + * + * @see ReducersMinMax + * @see op_max_index_view + */ +template < typename Index + , typename Type + , typename Compare=std::less<Type> + , bool Align = false + > +class op_max_index : public min_max_internal::monoid_base<op_max_index_view<Index, Type, Compare>, Align> +{ + typedef min_max_internal::monoid_base< + op_max_index_view<Index, Type, Compare>, Align> base; +public: + /// Construct with default comparator. + op_max_index() {} + /// Construct with specified comparator. + op_max_index(const Compare& compare) : base(compare) {} +}; + +//@} + + + +/** @defgroup ReducersMinMaxMinIndex Minimum reducers (value and index) + * + * These reducers will find the smallest value from a set of values, and its + * index in the set. + * + * @ingroup ReducersMinMax + */ +//@{ + +/** The minimum index reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer<cilk::op_min_index<Index, Type, Compare> >`. It accumulates + * the minimum, as determined by a comparator, of a set of values which have + * occurred as arguments to the `calc_min()` function, and records the index + * of the minimum value. The accumulated value will be the first argument `x` + * such that `compare(y, x)` is false for every argument `y`. + * + * If the comparator is `std::less`, then the accumulated value is the first + * argument value which no other argument value is less than, i.e., the + * minimum. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `calc_min()` function would be + * used in an expression like `r->calc_min(i, a)`where `r` is an + * op_min_index reducer variable. + * + * @note The word “index” suggests an integer index into an array, but there + * is no restriction on the index type or how it should be used. In + * general, it may be convenient to use it for any kind of key that + * can be used to locate the minimum value in the collection that it + * came from — for example: + * - An index into an array. + * - A key into an STL map. + * - An iterator into any STL container. + * + * @note A min_index reducer is essentially a min reducer whose value type + * is a `std::pair<Index, Type>`. This fact is camouflaged in the view + * `calc_min` function, the global `min_of` functions, and the reducer + * value constructor, which can all take an index argument and a value + * argument as an alternative to a single `std::pair` argument. + * However, the reducer `set_value()`, `get_value()`, `move_in()`, and + * `move_out()` functions work only with pairs, not with individual + * value and/or index arguments. + * + * @tparam Index The type of the indices associated with the values. + * @tparam Type The type of the values compared by the reducer. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. + * @tparam Compare Used to compare the values. It must be a binary predicate. + * If it is omitted, then the view computes the conventional + * arithmetic minimum. + * + * @see ReducersMinMax + * @see op_min_index + */ +template <typename Index, typename Type, typename Compare> +class op_min_index_view : public min_max_internal::view_base< + min_max_internal::index_view_content<Index, Type, Compare, false>, + Compare, + min_max_internal::reverse_predicate<Compare, Type> > +{ + typedef min_max_internal::view_base< + min_max_internal::index_view_content<Index, Type, Compare, false>, + Compare, + min_max_internal::reverse_predicate<Compare, Type> > base; + using base::calc; + using base::assign; + typedef std::pair<Index, Type> pair_type; + friend class min_max_internal::rhs_proxy<op_min_index_view>; + +public: + /** @name Constructors. + * + * All op_min_index_view constructors simply pass their arguments on to the + * @ref view_base base class, except for the `(index, value [, compare])` + * constructors, which create a `std::pair` containing the index and value. + */ + //@{ + + op_min_index_view() : base() {} + + template <typename T1> + op_min_index_view(const T1& x1) : base(x1) {} + + template <typename T1, typename T2> + op_min_index_view(const T1& x1, const T2& x2) : base(x1, x2) {} + + template <typename T1, typename T2, typename T3> + op_min_index_view(const T1& x1, const T2& x2, const T3& x3) : base(x1, x2, x3) {} + + op_min_index_view(const Index& i, const Type& v) : base(pair_type(i, v)) {} + + op_min_index_view(const Index& i, const Type& v, const typename base::compare_type* c) : + base(pair_type(i, v), c) {} + + //@} + + /** Minimize with a value and index. + * + * If @a x is greater than the current value of the view (as defined by + * the reducer’s comparator), or if the view was created without an + * initial value and its value has never been updated (with `calc_min()` + * or `= min_of()`), then the value of the view is set to @a x, and the + * index is set to @a i.. + * + * @param i The index of the value @a x. + * @param x The value to minimize the view’s value with. + * + * @return A reference to the view. (Allows + * `view.comp_min(i, a).comp_min(j, b)…`.) + */ + op_min_index_view& calc_min(const Index& i, const Type& x) + { calc(pair_type(i, x)); return *this; } + + /** Maximize with an index/value pair. + * + * If @a pair.second is less than the current value of the view (as + * defined by the reducer’s comparator), or if the view was created + * without an initial value and its value has never been updated (with + * `calc_min()` or `= min_of()`), then the value of the view is set to + * @a pair.second, and the index is set to @a pair.first. + * + * @param pair A pair containing a value to minimize the view’s value + * with and its associated index. + * + * @return A reference to the view. (Allows + * `view.comp_min(p1).comp_min(p2)…`.) + */ + op_min_index_view& calc_min(const pair_type& pair) + { calc(pair); return *this; } + + /** Assign the result of a `min_of(view, index, value)` expression to the + * view. + * + * @param rhs An rhs_proxy value created by a `min_of(view, index, value)` + * expression. + * + * @return A reference to the view. + * + * @see min_max_internal::view_base::rhs_proxy + */ + op_min_index_view& operator=(const min_max_internal::rhs_proxy<op_min_index_view>& rhs) + { assign(rhs); return *this; } +}; + + +/** Compute the minimum of the value in a view and another value. + * + * The result of this computation can only be assigned back to the original + * view or used in another min_of() call. For example, + * + * *reducer = min_of(*reducer, i, x); + * *reducer = min_of(i, x, *reducer); + * + * @see min_max_internal::min_min_view_base::rhs_proxy + */ +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const op_min_index_view<Index, Type, Compare>& view, + const Index& index, const Type& value) +{ + return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view); +} + +/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const Index& index, const Type& value, + const op_min_index_view<Index, Type, Compare>& view) +{ + return min_max_internal::make_proxy(std::pair<Index, Type>(index, value), view); +} + +/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const op_min_index_view<Index, Type, Compare>& view, + const std::pair<Index, Type>& pair) +{ + return min_max_internal::make_proxy(pair, view); +} + +/// @copydoc min_of(const op_min_index_view<Index, Type, Compare>&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const std::pair<Index, Type>& pair, + const op_min_index_view<Index, Type, Compare>& view) +{ + return min_max_internal::make_proxy(pair, view); +} + +/** Nested computation of the minimum of the value in a view and other values. + * + * Compute the minimum of the result of a min_of() call and another value. + * + * The result of this computation can only be assigned back to the original + * view or used in another min_of() call. For example, + * + * *reducer = min_of(x, min_of(y, *reducer)); + * *reducer = min_of(min_of(*reducer, x), y); + * + * @see min_max_internal::min_min_view_base::rhs_proxy + */ +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy, + const Index& index, const Type& value) +{ + return proxy.calc(std::pair<Index, Type>(index, value)); +} + +/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const Index& index, const Type& value, + const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy) +{ + return proxy.calc(std::pair<Index, Type>(index, value)); +} + +/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy, + const std::pair<Index, Type>& pair) +{ + return proxy.calc(pair); +} + +/// @copydoc min_of(const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >&, const Index&, const Type&) +template <typename Index, typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> > +min_of(const std::pair<Index, Type>& pair, + const min_max_internal::rhs_proxy< op_min_index_view<Index, Type, Compare> >& proxy) +{ + return proxy.calc(pair); +} + + +/** Monoid class for minimum reductions with index. Instantiate the + * cilk::reducer template class with an op_min_index monoid to create a + * min_index reducer class. For example, to compute the minimum of an array of + * `double` values and the array index of the min value: + * + * cilk::reducer< cilk::op_min_index<unsigned, double> > r; + * + * @see ReducersMinMax + * @see op_min_index_view + */ +template < typename Index + , typename Type + , typename Compare=std::less<Type> + , bool Align = false + > +class op_min_index : public min_max_internal::monoid_base<op_min_index_view<Index, Type, Compare>, Align> +{ + typedef min_max_internal::monoid_base< + op_min_index_view<Index, Type, Compare>, Align> base; +public: + /// Construct with default comparator. + op_min_index() {} + /// Construct with specified comparator. + op_min_index(const Compare& compare) : base(compare) {} +}; + +//@} + + +/** Deprecated maximum reducer wrapper class. + * + * reducer_max is the same as @ref reducer<@ref op_max>, except that + * reducer_max is a proxy for the contained view, so that accumulator + * variable update operations can be applied directly to the reducer. For + * example, a value is maximized with a `reducer<%op_max>` with + * `r->calc_max(a)`, but a value can be maximized with a `%reducer_max` with + * `r.calc_max(a)`. + * + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_max. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_max` + * and `reducer<%op_max>`. This allows incremental code + * conversion: old code that used `%reducer_max` can pass a + * `%reducer_max` to a converted function that now expects a + * pointer or reference to a `reducer<%op_max>`, and vice + * versa. **But see @ref redminmax_compatibility.** + * + * @tparam Type The value type of the reducer. + * @tparam Compare The “less than” comparator type for the reducer. + * + * @see op_max + * @see op_max_view + * @see reducer + * @see ReducersMinMax + * @ingroup ReducersMinMaxMaxValue + */ +template <typename Type, typename Compare=std::less<Type> > +class reducer_max : public reducer< op_max<Type, Compare, true> > +{ + __CILKRTS_STATIC_ASSERT( + ::cilk::internal::class_is_empty< + typename ::cilk::internal::binary_functor<Compare>::type >::value, + "cilk::reducer_max<Type, Compare> only works with " + "an empty Compare class"); + typedef reducer< op_max<Type, Compare, true> > base; +public: + + /// Type of data in a reducer_max. + typedef Type basic_value_type; + + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view type for the reducer. + typedef typename base::view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type monoid_type; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /// The view’s rhs proxy type. + typedef min_max_internal::rhs_proxy<View> rhs_proxy; + + using base::view; + + /** @name Constructors + */ + //@{ + + /// Construct the wrapper in its identity state (either `!is_set()`, or + /// `value() == identity value`). + reducer_max() : base() {} + + /// Construct the wrapper with a specified initial value. + explicit reducer_max(const Type& initial_value) : base(initial_value) {} + + /// Construct the wrapper in its identity state with a specified + /// comparator. + explicit reducer_max(const Compare& comp) : base(comp) {} + + /// Construct the wrapper with a specified initial value and a specified + /// comparator. + reducer_max(const Type& initial_value, const Compare& comp) + : base(initial_value, comp) {} + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_max_view. */ + //@{ + + /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const + bool is_set() const { return view().is_set(); } + + /// @copydoc op_max_view::calc_max(const Type&) + reducer_max& calc_max(const Type& x) + { view().calc_max(x); return *this; } + + /// @copydoc op_max_view::operator=(const min_max_internal::rhs_proxy<op_max_view>&) + reducer_max& operator=(const rhs_proxy& rhs) + { view() = rhs; return *this; } + + //@} + + /** Allow read-only access to the value within the current view. + * + * @returns A const reference to the value within the current view. + */ + const Type& get_reference() const { return view().get_reference(); } + + /// @name Dereference + /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. + * Combined with the rule that a wrapper forwards view operations to the + * view, this means that view operations can be written the same way on + * reducers and wrappers, which is convenient for incrementally + * converting code using wrappers to code using reducers. That is: + * + * reducer< op_max<int> > r; + * r->calc_max(a); // *r returns the view + * // calc_max is a view member function + * + * reducer_max<int> w; + * w->calc_max(a); // *w returns the wrapper + * // calc_max is a wrapper member function that + * // calls the corresponding view function + */ + //@{ + reducer_max& operator*() { return *this; } + reducer_max const& operator*() const { return *this; } + + reducer_max* operator->() { return this; } + reducer_max const* operator->() const { return this; } + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_max<Type, Compare, false> >& () + { + return *reinterpret_cast< reducer< op_max<Type, Compare, false> >* >(this); + } + + operator const reducer< op_max<Type, Compare, false> >& () const + { + return *reinterpret_cast< const reducer< op_max<Type, Compare, false> >* >(this); + } + //@} +}; + + +/// @cond internal +// The legacy definition of max_of(reducer_max, value) has different +// behavior and a different return type than this definition. We add an +// unused third argument to this version of the function to give it a different +// signature, so that they won’t end up sharing a single object file entry. +struct max_of_1_0_t {}; +const max_of_1_0_t max_of_1_0 = {}; +/// @endcond + +/** Compute the maximum of the value in a reducer_max and another value. + * + * @deprecated Because reducer_max is deprecated. + * + * The result of this computation can only be assigned back to the original + * reducer or used in another max_of() call. For example, + * + * reducer = max_of(reducer, x); + * reducer = max_of(x, reducer); + * + * @see min_max_internal::rhs_proxy + * + * @ingroup ReducersMinMaxMaxValue + */ +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> > +max_of(const reducer_max<Type, Compare>& r, const Type& value, + const max_of_1_0_t& = max_of_1_0) +{ + return min_max_internal::make_proxy(value, r.view()); +} + +/// @copydoc max_of(const reducer_max<Type, Compare>&, const Type&, const max_of_1_0_t&) +/// @ingroup ReducersMinMaxMaxValue +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_max_view<Type, Compare> > +max_of(const Type& value, const reducer_max<Type, Compare>& r, + const max_of_1_0_t& = max_of_1_0) +{ + return min_max_internal::make_proxy(value, r.view()); +} + + +/** Deprecated minimum reducer wrapper class. + * + * reducer_min is the same as @ref reducer<@ref op_min>, except that + * reducer_min is a proxy for the contained view, so that accumulator + * variable update operations can be applied directly to the reducer. For + * example, a value is minimized with a `reducer<%op_min>` with + * `r->calc_min(a)`, but a value can be minimized with a `%reducer_min` with + * `r.calc_min(a)`. + * + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_min. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_min` + * and `reducer<%op_min>`. This allows incremental code + * conversion: old code that used `%reducer_min` can pass a + * `%reducer_min` to a converted function that now expects a + * pointer or reference to a `reducer<%op_min>`, and vice + * versa. **But see @ref redminmax_compatibility.** + * + * @tparam Type The value type of the reducer. + * @tparam Compare The “less than” comparator type for the reducer. + * + * @see op_min + * @see op_min_view + * @see reducer + * @see ReducersMinMax + * @ingroup ReducersMinMaxMinValue + */ +template <typename Type, typename Compare=std::less<Type> > +class reducer_min : public reducer< op_min<Type, Compare, true> > +{ + __CILKRTS_STATIC_ASSERT( + ::cilk::internal::class_is_empty< + typename ::cilk::internal::binary_functor<Compare>::type >::value, + "cilk::reducer_min<Type, Compare> only works with " + "an empty Compare class"); + typedef reducer< op_min<Type, Compare, true> > base; +public: + + /// Type of data in a reducer_min. + typedef Type basic_value_type; + + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view type for the reducer. + typedef typename base::view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type monoid_type; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /// The view’s rhs proxy type. + typedef min_max_internal::rhs_proxy<View> rhs_proxy; + + using base::view; + + /** @name Constructors + */ + //@{ + + /// Construct the wrapper in its identity state (either `!is_set()`, or + /// `value() == identity value`). + reducer_min() : base() {} + + /// Construct the wrapper with a specified initial value. + explicit reducer_min(const Type& initial_value) : base(initial_value) {} + + /// Construct the wrapper in its identity state with a specified + /// comparator. + explicit reducer_min(const Compare& comp) : base(comp) {} + + /// Construct the wrapper with a specified initial value and a specified + /// comparator. + reducer_min(const Type& initial_value, const Compare& comp) + : base(initial_value, comp) {} + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_min_view. */ + //@{ + + /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const + bool is_set() const { return view().is_set(); } + + /// @copydoc op_min_view::calc_min(const Type&) + reducer_min& calc_min(const Type& x) + { view().calc_min(x); return *this; } + + /// @copydoc op_min_view::operator=(const min_max_internal::rhs_proxy<op_min_view>&) + reducer_min& operator=(const rhs_proxy& rhs) + { view() = rhs; return *this; } + + //@} + + /** Allow read-only access to the value within the current view. + * + * @returns A const reference to the value within the current view. + */ + const Type& get_reference() const { return view().get_reference(); } + + /// @name Dereference + /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. + * Combined with the rule that a wrapper forwards view operations to the + * view, this means that view operations can be written the same way on + * reducers and wrappers, which is convenient for incrementally + * converting code using wrappers to code using reducers. That is: + * + * reducer< op_min<int> > r; + * r->calc_min(a); // *r returns the view + * // calc_min is a view member function + * + * reducer_min<int> w; + * w->calc_min(a); // *w returns the wrapper + * // calc_min is a wrapper member function that + * // calls the corresponding view function + */ + //@{ + reducer_min& operator*() { return *this; } + reducer_min const& operator*() const { return *this; } + + reducer_min* operator->() { return this; } + reducer_min const* operator->() const { return this; } + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_min<Type, Compare, false> >& () + { + return *reinterpret_cast< reducer< op_min<Type, Compare, false> >* >(this); + } + + operator const reducer< op_min<Type, Compare, false> >& () const + { + return *reinterpret_cast< const reducer< op_min<Type, Compare, false> >* >(this); + } + //@} +}; + + +/** Compute the minimum of a reducer and a value. + * + * @deprecated Because reducer_min is deprecated. + */ +//@{ +// The legacy definition of min_of(reducer_min, value) has different +// behavior and a different return type than this definition. We add an +// unused third argument to this version of the function to give it a different +// signature, so that they won’t end up sharing a single object file entry. +struct min_of_1_0_t {}; +const min_of_1_0_t min_of_1_0 = {}; + +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> > +min_of(const reducer_min<Type, Compare>& r, const Type& value, + const min_of_1_0_t& = min_of_1_0) +{ + return min_max_internal::make_proxy(value, r.view()); +} + +template <typename Type, typename Compare> +inline min_max_internal::rhs_proxy< op_min_view<Type, Compare> > +min_of(const Type& value, const reducer_min<Type, Compare>& r, + const min_of_1_0_t& = min_of_1_0) +{ + return min_max_internal::make_proxy(value, r.view()); +} +//@} + + +/** Deprecated maximum with index reducer wrapper class. + * + * reducer_max_index is the same as @ref reducer<@ref op_max_index>, except + * that reducer_max_index is a proxy for the contained view, so that + * accumulator variable update operations can be applied directly to the + * reducer. For example, a value is maximized with a `reducer<%op_max_index>` + * with `r->calc_max(i, a)`, but a value can be maximized with a + * `%reducer_max` with `r.calc_max(i, aa)`. + * + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_max. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_max_index` + * and `reducer<%op_max_index>`. This allows incremental code + * conversion: old code that used `%reducer_max_index` can pass a + * `%reducer_max_index` to a converted function that now expects a + * pointer or reference to a `reducer<%op_max_index>`, and vice + * versa. **But see @ref redminmax_compatibility.** + * + * @tparam Index The index type of the reducer. + * @tparam Type The value type of the reducer. + * @tparam Compare The “less than” comparator type for the reducer. + * + * @see op_max_index + * @see op_max_index_view + * @see reducer + * @see ReducersMinMax + * @ingroup ReducersMinMaxMaxIndex + */ +template < typename Index + , typename Type + , typename Compare = std::less<Type> + > +class reducer_max_index : + public reducer< op_max_index<Index, Type, Compare, true> > +{ + __CILKRTS_STATIC_ASSERT( + ::cilk::internal::class_is_empty< + typename ::cilk::internal::binary_functor<Compare>::type >::value, + "cilk::reducer_max_index<Type, Compare> only works with " + "an empty Compare class"); + typedef reducer< op_max_index<Index, Type, Compare, true> > base; +public: + + /// Type of data in a reducer_max_index. + typedef Type basic_value_type; + + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view type for the reducer. + typedef typename base::view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type monoid_type; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /// The view’s rhs proxy type. + typedef min_max_internal::rhs_proxy<View> rhs_proxy; + + using base::view; + + /** @name Constructors + */ + //@{ + + /// Construct the wrapper in its identity state (`!is_set()`). + reducer_max_index() : base() {} + + /// Construct with a specified initial index and value. + reducer_max_index(const Index& initial_index, + const Type& initial_value) + : base(initial_index, initial_value) {} + + /// Construct the wrapper with a specified comparator. + explicit reducer_max_index(const Compare& comp) : base(comp) {} + + /// Construct the wrapper with a specified initial index, value, + /// and comparator. + reducer_max_index(const Index& initial_index, + const Type& initial_value, + const Compare& comp) + : base(initial_index, initial_value, comp) {} + + //@} + + /** @name Set / Get + */ + //@{ + + /// Set the index and value of this object. + void set_value(const Index& index, const Type& value) + { base::set_value(std::make_pair(index, value)); } + + /// Return the maximum value. + const Type& get_value() const + { return view().get_reference(); } + + /// Return the maximum index. + const Index& get_index() const + { return view().get_index_reference(); } + + /// Return a const reference to value data member in the view. + const Type& get_reference() const + { return view().get_reference(); } + + /// Return a const reference to index data member in the view. + const Index& get_index_reference() const + { return view().get_index_reference(); } + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_max_view. */ + //@{ + + /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const + bool is_set() const { return view().is_set(); } + + /// @copydoc op_max_index_view::calc_max(const Index&, const Type&) + reducer_max_index& calc_max(const Index& i, const Type& x) + { view().calc_max(i, x); return *this; } + + /// @copydoc op_max_view::operator=(const min_max_internal::rhs_proxy<op_max_view>&) + reducer_max_index& operator=(const rhs_proxy& rhs) + { view() = rhs; return *this; } + + //@} + + /// @name Dereference + /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. + * Combined with the rule that a wrapper forwards view operations to the + * view, this means that view operations can be written the same way on + * reducers and wrappers, which is convenient for incrementally + * converting code using wrappers to code using reducers. That is: + * + * reducer< op_max_index<int, int> > r; + * r->calc_max(i, a); // *r returns the view + * // calc_max is a view member function + * + * reducer_max_index<int, int> w; + * w->calc_max(i, a); // *w returns the wrapper + * // calc_max is a wrapper member function that + * // calls the corresponding view function + */ + //@{ + reducer_max_index& operator*() { return *this; } + reducer_max_index const& operator*() const { return *this; } + + reducer_max_index* operator->() { return this; } + reducer_max_index const* operator->() const { return this; } + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_max_index<Index, Type, Compare, false> >& () + { + return *reinterpret_cast< reducer< op_max_index<Index, Type, Compare, false> >* >(this); + } + + operator const reducer< op_max_index<Index, Type, Compare, false> >& () const + { + return *reinterpret_cast< const reducer< op_max_index<Index, Type, Compare, false> >* >(this); + } + //@} + +}; + + +/** Deprecated minimum with index reducer wrapper class. + * + * reducer_min_index is the same as @ref reducer<@ref op_min_index>, except + * that reducer_min_index is a proxy for the contained view, so that + * accumulator variable update operations can be applied directly to the + * reducer. For example, a value is minimized with a `reducer<%op_min_index>` + * with `r->calc_min(i, a)`, but a value can be minimized with a + * `%reducer_min` with `r.calc_min(i, aa)`. + * + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_min. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_min_index` + * and `reducer<%op_min_index>`. This allows incremental code + * conversion: old code that used `%reducer_min_index` can pass a + * `%reducer_min_index` to a converted function that now expects a + * pointer or reference to a `reducer<%op_min_index>`, and vice + * versa. **But see @ref redminmax_compatibility.** + * + * @tparam Index The index type of the reducer. + * @tparam Type The value type of the reducer. + * @tparam Compare The “less than” comparator type for the reducer. + * + * @see op_min_index + * @see op_min_index_view + * @see reducer + * @see ReducersMinMax + * @ingroup ReducersMinMaxMinIndex + */ +template < typename Index + , typename Type + , typename Compare = std::less<Type> + > +class reducer_min_index : + public reducer< op_min_index<Index, Type, Compare, true> > +{ + __CILKRTS_STATIC_ASSERT( + ::cilk::internal::class_is_empty< + typename ::cilk::internal::binary_functor<Compare>::type >::value, + "cilk::reducer_min_index<Type, Compare> only works with " + "an empty Compare class"); + typedef reducer< op_min_index<Index, Type, Compare, true> > base; +public: + + /// Type of data in a reducer_min_index. + typedef Type basic_value_type; + + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view type for the reducer. + typedef typename base::view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type monoid_type; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /// The view’s rhs proxy type. + typedef min_max_internal::rhs_proxy<View> rhs_proxy; + + using base::view; + + /** @name Constructors + */ + //@{ + + /// Construct the wrapper in its identity state (`!is_set()`). + reducer_min_index() : base() {} + + /// Construct with a specified initial index and value. + reducer_min_index(const Index& initial_index, + const Type& initial_value) + : base(initial_index, initial_value) {} + + /// Construct the wrapper with a specified comparator. + explicit reducer_min_index(const Compare& comp) : base(comp) {} + + /// Construct the wrapper with a specified initial index, value, + /// and comparator. + reducer_min_index(const Index& initial_index, + const Type& initial_value, + const Compare& comp) + : base(initial_index, initial_value, comp) {} + + //@} + + /** @name Set / Get + */ + //@{ + + /// Set the index and value of this object. + void set_value(const Index& index, const Type& value) + { base::set_value(std::make_pair(index, value)); } + + /// Return the minimum value. + const Type& get_value() const + { return view().get_reference(); } + + /// Return the minimum index. + const Index& get_index() const + { return view().get_index_reference(); } + + /// Return a const reference to value data member in the view. + const Type& get_reference() const + { return view().get_reference(); } + + /// Return a const reference to index data member in the view. + const Index& get_index_reference() const + { return view().get_index_reference(); } + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_min_view. */ + //@{ + + /// @copydoc cilk_lib_1_0::min_max_internal::view_content::is_set() const + bool is_set() const { return view().is_set(); } + + /// @copydoc op_min_index_view::calc_min(const Index&, const Type&) + reducer_min_index& calc_min(const Index& i, const Type& x) + { view().calc_min(i, x); return *this; } + + /// @copydoc op_min_view::operator=(const min_max_internal::rhs_proxy<op_min_view>&) + reducer_min_index& operator=(const rhs_proxy& rhs) + { view() = rhs; return *this; } + + //@} + + /// @name Dereference + /** Dereferencing a wrapper is a no-op. It simply returns the wrapper. + * Combined with the rule that a wrapper forwards view operations to the + * view, this means that view operations can be written the same way on + * reducers and wrappers, which is convenient for incrementally + * converting code using wrappers to code using reducers. That is: + * + * reducer< op_min_index<int, int> > r; + * r->calc_min(i, a); // *r returns the view + * // calc_min is a view member function + * + * reducer_min_index<int, int> w; + * w->calc_min(i, a); // *w returns the wrapper + * // calc_min is a wrapper member function that + * // calls the corresponding view function + */ + //@{ + reducer_min_index& operator*() { return *this; } + reducer_min_index const& operator*() const { return *this; } + + reducer_min_index* operator->() { return this; } + reducer_min_index const* operator->() const { return this; } + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_min_index<Index, Type, Compare, false> >& () + { + return *reinterpret_cast< reducer< op_min_index<Index, Type, Compare, false> >* >(this); + } + + operator const reducer< op_min_index<Index, Type, Compare, false> >& () const + { + return *reinterpret_cast< const reducer< op_min_index<Index, Type, Compare, false> >* >(this); + } + //@} + +}; + + +#ifndef CILK_LIBRARY_0_9_REDUCER_MINMAX +} // namespace cilk_lib_1_0 +using namespace cilk_lib_1_0; +#endif + + +/// @cond internal +/** Metafunction specialization for reducer conversion. + * + * These specializations of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes each `reducer< op_xxxx<Type> >` classes to have + * an `operator reducer_xxxx<Type>& ()` conversion operator that statically + * downcasts the `reducer<op_xxxx>` to the corresponding `reducer_xxxx` type. + * (The reverse conversion, from `reducer_xxxx` to `reducer<op_xxxx>`, is just + * an upcast, which is provided for free by the language.) + */ +template <typename Type, typename Compare, bool Align> +struct legacy_reducer_downcast< reducer< op_max<Type, Compare, Align> > > +{ + typedef reducer_max<Type> type; +}; + +template <typename Type, typename Compare, bool Align> +struct legacy_reducer_downcast< reducer< op_min<Type, Compare, Align> > > +{ + typedef reducer_min<Type> type; +}; + +template <typename Index, typename Type, typename Compare, bool Align> +struct legacy_reducer_downcast< reducer< op_max_index<Index, Type, Compare, Align> > > +{ + typedef reducer_max_index<Index, Type> type; +}; + +template <typename Index, typename Type, typename Compare, bool Align> +struct legacy_reducer_downcast< reducer< op_min_index<Index, Type, Compare, Align> > > +{ + typedef reducer_min_index<Index, Type> type; +}; +/// @endcond + +} // namespace cilk + +#endif // __cplusplus + + +/** @name C language reducer macros + * + * These macros are used to declare and work with numeric minimum and maximum reducers in C + * code. + * + * @see @ref page_reducers_in_c + */ + //@{ + + +#ifdef CILK_C_DEFINE_REDUCERS + +/* Integer min/max constants */ +#include <limits.h> + +/* Wchar_t min/max constants */ +#if defined(_MSC_VER) || defined(ANDROID) +# include <wchar.h> +#else +# include <stdint.h> +#endif + +/* Floating-point min/max constants */ +#include <math.h> +#ifndef HUGE_VALF + static const unsigned int __huge_valf[] = {0x7f800000}; +# define HUGE_VALF (*((const float *)__huge_valf)) +#endif + +#ifndef HUGE_VALL + static const unsigned int __huge_vall[] = {0, 0, 0x00007f80, 0}; +# define HUGE_VALL (*((const long double *)__huge_vall)) +#endif + +#endif + +/** Max reducer type name. + * + * This macro expands into the identifier which is the name of the max reducer + * type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MAX_TYPE(tn) \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_,tn) + +/** Declare a max reducer object. + * + * This macro expands into a declaration of a max reducer object for a specified numeric + * type. For example: + * + * CILK_C_REDUCER_MAX(my_reducer, double, -DBL_MAX); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * @param v The initial value for the reducer. (A value which can be assigned to the + * numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MAX(obj,tn,v) \ + CILK_C_REDUCER_MAX_TYPE(tn) obj = \ + CILK_C_INIT_REDUCER(_Typeof(obj.value), \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_reduce_,tn), \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_identity_,tn), \ + __cilkrts_hyperobject_noop_destroy, v) + +/** Maximize with a value. + * + * `CILK_C_REDUCER_MAX_CALC(reducer, v)` sets the current view of the + * reducer to the max of its previous value and a specified new value. + * This is equivalent to + * + * REDUCER_VIEW(reducer) = max(REDUCER_VIEW(reducer), v) + * + * @param reducer The reducer whose contained value is to be updated. + * @param v The value that it is to be maximized with. + */ +#define CILK_C_REDUCER_MAX_CALC(reducer, v) do { \ + _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ + _Typeof(v) __value = (v); \ + if (*view < __value) { \ + *view = __value; \ + } } while (0) + +/// @cond internal + +/** Declare the max reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which implement + * the reducer functionality for the max reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MAX_DECLARATION(t,tn,id) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r); \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn); + +/** Define the max reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement the + * reducer functionality for the max reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MAX_DEFINITION(t,tn,id) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MAX_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max,tn,l,r) \ + { if (*(t*)l < *(t*)r) *(t*)l = *(t*)r; } \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max,tn) \ + { *(t*)v = id; } + +//@{ +/** @def CILK_C_REDUCER_MAX_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and + * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` + * will be undefined, and this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_MAX_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MAX_DEFINITION(t,tn,id) +#else +# define CILK_C_REDUCER_MAX_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MAX_DECLARATION(t,tn,id) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +__CILKRTS_BEGIN_EXTERN_C +CILK_C_REDUCER_MAX_INSTANCE(char, char, CHAR_MIN) +CILK_C_REDUCER_MAX_INSTANCE(unsigned char, uchar, 0) +CILK_C_REDUCER_MAX_INSTANCE(signed char, schar, SCHAR_MIN) +CILK_C_REDUCER_MAX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN) +CILK_C_REDUCER_MAX_INSTANCE(short, short, SHRT_MIN) +CILK_C_REDUCER_MAX_INSTANCE(unsigned short, ushort, 0) +CILK_C_REDUCER_MAX_INSTANCE(int, int, INT_MIN) +CILK_C_REDUCER_MAX_INSTANCE(unsigned int, uint, 0) +CILK_C_REDUCER_MAX_INSTANCE(unsigned int, unsigned, 0) // alternate name +CILK_C_REDUCER_MAX_INSTANCE(long, long, LONG_MIN) +CILK_C_REDUCER_MAX_INSTANCE(unsigned long, ulong, 0) +CILK_C_REDUCER_MAX_INSTANCE(long long, longlong, LLONG_MIN) +CILK_C_REDUCER_MAX_INSTANCE(unsigned long long, ulonglong, 0) +CILK_C_REDUCER_MAX_INSTANCE(float, float, -HUGE_VALF) +CILK_C_REDUCER_MAX_INSTANCE(double, double, -HUGE_VAL) +CILK_C_REDUCER_MAX_INSTANCE(long double, longdouble, -HUGE_VALL) +__CILKRTS_END_EXTERN_C + +/// @endcond + +/** Max_index reducer type name. + * + * This macro expands into the identifier which is the name of the max_index reducer + * type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MAX_INDEX_TYPE(tn) \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_index_,tn) + +/** Declare an op_max_index reducer object. + * + * This macro expands into a declaration of a max_index reducer object for a specified + * numeric type. For example: + * + * CILK_C_REDUCER_MAX_INDEX(my_reducer, double, -DBL_MAX_INDEX); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * @param v The initial value for the reducer. (A value which can be assigned to the + * numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MAX_INDEX(obj,tn,v) \ + CILK_C_REDUCER_MAX_INDEX_TYPE(tn) obj = \ + CILK_C_INIT_REDUCER(_Typeof(obj.value), \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_index_reduce_,tn), \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_index_identity_,tn), \ + __cilkrts_hyperobject_noop_destroy, {0, v}) + +/** Maximize with a value. + * + * `CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v)` sets the current view of the + * reducer to the max of its previous value and a specified new value. + * This is equivalent to + * + * REDUCER_VIEW(reducer) = max_index(REDUCER_VIEW(reducer), v) + * + * If the value of the reducer is changed to @a v, then the index of the reducer is + * changed to @a i. + * + * @param reducer The reducer whose contained value and index are to be updated. + * @param i The index associated with the new value. + * @param v The value that it is to be maximized with. + */ +#define CILK_C_REDUCER_MAX_INDEX_CALC(reducer, i, v) do { \ + _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ + _Typeof(v) __value = (v); \ + if (view->value < __value) { \ + view->index = (i); \ + view->value = __value; \ + } } while (0) + +/// @cond internal + +/** Declare the max_index view type. + * + * The view of a max_index reducer is a structure containing both the + * maximum value for the reducer and the index that was associated with + * that value in the sequence of input values. + */ +#define CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn) \ + typedef struct { \ + __STDNS ptrdiff_t index; \ + t value; \ + } __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) + +/** Declare the max_index reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which implement + * the reducer functionality for the max_index reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MAX_INDEX_DECLARATION(t,tn,id) \ + CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn); \ + typedef CILK_C_DECLARE_REDUCER( \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)) \ + CILK_C_REDUCER_MAX_INDEX_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r); \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn); + +/** Define the max_index reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement the + * reducer functionality for the max_index reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MAX_INDEX_DEFINITION(t,tn,id) \ + CILK_C_REDUCER_MAX_INDEX_VIEW(t,tn); \ + typedef CILK_C_DECLARE_REDUCER( \ + __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn)) \ + CILK_C_REDUCER_MAX_INDEX_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_max_index,tn,l,r) \ + { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \ + if (((view_t*)l)->value < ((view_t*)r)->value) \ + *(view_t*)l = *(view_t*)r; } \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_max_index,tn) \ + { typedef __CILKRTS_MKIDENT(cilk_c_reducer_max_index_view_,tn) view_t; \ + ((view_t*)v)->index = 0; ((view_t*)v)->value = id; } + +//@{ +/** @def CILK_C_REDUCER_MAX_INDEX_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and + * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` + * will be undefined, and this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MAX_INDEX_DEFINITION(t,tn,id) +#else +# define CILK_C_REDUCER_MAX_INDEX_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MAX_INDEX_DECLARATION(t,tn,id) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +__CILKRTS_BEGIN_EXTERN_C +CILK_C_REDUCER_MAX_INDEX_INSTANCE(char, char, CHAR_MIN) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned char, uchar, 0) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(signed char, schar, SCHAR_MIN) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MIN) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(short, short, SHRT_MIN) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned short, ushort, 0) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(int, int, INT_MIN) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, uint, 0) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned int, unsigned, 0) // alternate name +CILK_C_REDUCER_MAX_INDEX_INSTANCE(long, long, LONG_MIN) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long, ulong, 0) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(long long, longlong, LLONG_MIN) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(unsigned long long, ulonglong, 0) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(float, float, -HUGE_VALF) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(double, double, -HUGE_VAL) +CILK_C_REDUCER_MAX_INDEX_INSTANCE(long double, longdouble, -HUGE_VALL) +__CILKRTS_END_EXTERN_C + +/// @endcond + +/** Min reducer type name. + * + * This macro expands into the identifier which is the name of the min reducer + * type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MIN_TYPE(tn) \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_,tn) + +/** Declare a min reducer object. + * + * This macro expands into a declaration of a min reducer object for a specified numeric + * type. For example: + * + * CILK_C_REDUCER_MIN(my_reducer, double, DBL_MAX); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * @param v The initial value for the reducer. (A value which can be assigned to the + * numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MIN(obj,tn,v) \ + CILK_C_REDUCER_MIN_TYPE(tn) obj = \ + CILK_C_INIT_REDUCER(_Typeof(obj.value), \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_reduce_,tn), \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_identity_,tn), \ + __cilkrts_hyperobject_noop_destroy, v) + +/** Minimize with a value. + * + * `CILK_C_REDUCER_MIN_CALC(reducer, v)` sets the current view of the + * reducer to the min of its previous value and a specified new value. + * This is equivalent to + * + * REDUCER_VIEW(reducer) = min(REDUCER_VIEW(reducer), v) + * + * @param reducer The reducer whose contained value is to be updated. + * @param v The value that it is to be minimized with. + */ +#define CILK_C_REDUCER_MIN_CALC(reducer, v) do { \ + _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ + _Typeof(v) __value = (v); \ + if (*view > __value) { \ + *view = __value; \ + } } while (0) + +/// @cond internal + +/** Declare the min reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which implement + * the reducer functionality for the min reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MIN_DECLARATION(t,tn,id) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r); \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn); + +/** Define the min reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement the + * reducer functionality for the min reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MIN_DEFINITION(t,tn,id) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_MIN_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min,tn,l,r) \ + { if (*(t*)l > *(t*)r) *(t*)l = *(t*)r; } \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min,tn) \ + { *(t*)v = id; } + +//@{ +/** @def CILK_C_REDUCER_MIN_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and + * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` + * will be undefined, and this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_MIN_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MIN_DEFINITION(t,tn,id) +#else +# define CILK_C_REDUCER_MIN_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MIN_DECLARATION(t,tn,id) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +__CILKRTS_BEGIN_EXTERN_C +CILK_C_REDUCER_MIN_INSTANCE(char, char, CHAR_MAX) +CILK_C_REDUCER_MIN_INSTANCE(unsigned char, uchar, CHAR_MAX) +CILK_C_REDUCER_MIN_INSTANCE(signed char, schar, SCHAR_MAX) +CILK_C_REDUCER_MIN_INSTANCE(wchar_t, wchar_t, WCHAR_MAX) +CILK_C_REDUCER_MIN_INSTANCE(short, short, SHRT_MAX) +CILK_C_REDUCER_MIN_INSTANCE(unsigned short, ushort, USHRT_MAX) +CILK_C_REDUCER_MIN_INSTANCE(int, int, INT_MAX) +CILK_C_REDUCER_MIN_INSTANCE(unsigned int, uint, UINT_MAX) +CILK_C_REDUCER_MIN_INSTANCE(unsigned int, unsigned, UINT_MAX) // alternate name +CILK_C_REDUCER_MIN_INSTANCE(long, long, LONG_MAX) +CILK_C_REDUCER_MIN_INSTANCE(unsigned long, ulong, ULONG_MAX) +CILK_C_REDUCER_MIN_INSTANCE(long long, longlong, LLONG_MAX) +CILK_C_REDUCER_MIN_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX) +CILK_C_REDUCER_MIN_INSTANCE(float, float, HUGE_VALF) +CILK_C_REDUCER_MIN_INSTANCE(double, double, HUGE_VAL) +CILK_C_REDUCER_MIN_INSTANCE(long double, longdouble, HUGE_VALL) +__CILKRTS_END_EXTERN_C + +/// @endcond + +/** Min_index reducer type name. + * + * This macro expands into the identifier which is the name of the min_index reducer + * type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MIN_INDEX_TYPE(tn) \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_index_,tn) + +/** Declare an op_min_index reducer object. + * + * This macro expands into a declaration of a min_index reducer object for a specified + * numeric type. For example: + * + * CILK_C_REDUCER_MIN_INDEX(my_reducer, double, -DBL_MIN_INDEX); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying the type of the + * reducer. + * @param v The initial value for the reducer. (A value which can be assigned to the + * numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + */ +#define CILK_C_REDUCER_MIN_INDEX(obj,tn,v) \ + CILK_C_REDUCER_MIN_INDEX_TYPE(tn) obj = \ + CILK_C_INIT_REDUCER(_Typeof(obj.value), \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_index_reduce_,tn), \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_index_identity_,tn), \ + __cilkrts_hyperobject_noop_destroy, {0, v}) + +/** Minimize with a value. + * + * `CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v)` sets the current view of the + * reducer to the min of its previous value and a specified new value. + * This is equivalent to + * + * REDUCER_VIEW(reducer) = min_index(REDUCER_VIEW(reducer), v) + * + * If the value of the reducer is changed to @a v, then the index of the reducer is + * changed to @a i. + * + * @param reducer The reducer whose contained value and index are to be updated. + * @param i The index associated with the new value. + * @param v The value that it is to be minimized with. + */ +#define CILK_C_REDUCER_MIN_INDEX_CALC(reducer, i, v) do { \ + _Typeof((reducer).value)* view = &(REDUCER_VIEW(reducer)); \ + _Typeof(v) __value = (v); \ + if (view->value > __value) { \ + view->index = (i); \ + view->value = __value; \ + } } while (0) + +/// @cond internal + +/** Declare the min_index view type. + * + * The view of a min_index reducer is a structure containing both the + * minimum value for the reducer and the index that was associated with + * that value in the sequence of input values. + */ +#define CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn) \ + typedef struct { \ + __STDNS ptrdiff_t index; \ + t value; \ + } __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) + +/** Declare the min_index reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which implement + * the reducer functionality for the min_index reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MIN_INDEX_DECLARATION(t,tn,id) \ + CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn); \ + typedef CILK_C_DECLARE_REDUCER( \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)) \ + CILK_C_REDUCER_MIN_INDEX_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r); \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn); + +/** Define the min_index reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement the + * reducer functionality for the min_index reducer type for a specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer type name, + * function names, etc. + */ +#define CILK_C_REDUCER_MIN_INDEX_DEFINITION(t,tn,id) \ + CILK_C_REDUCER_MIN_INDEX_VIEW(t,tn); \ + typedef CILK_C_DECLARE_REDUCER( \ + __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn)) \ + CILK_C_REDUCER_MIN_INDEX_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_min_index,tn,l,r) \ + { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \ + if (((view_t*)l)->value > ((view_t*)r)->value) \ + *(view_t*)l = *(view_t*)r; } \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_min_index,tn) \ + { typedef __CILKRTS_MKIDENT(cilk_c_reducer_min_index_view_,tn) view_t; \ + ((view_t*)v)->index = 0; ((view_t*)v)->value = id; } + +//@{ +/** @def CILK_C_REDUCER_MIN_INDEX_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` will be defined, and + * this macro will generate reducer implementation functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` + * will be undefined, and this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MIN_INDEX_DEFINITION(t,tn,id) +#else +# define CILK_C_REDUCER_MIN_INDEX_INSTANCE(t,tn,id) \ + CILK_C_REDUCER_MIN_INDEX_DECLARATION(t,tn,id) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +__CILKRTS_BEGIN_EXTERN_C +CILK_C_REDUCER_MIN_INDEX_INSTANCE(char, char, CHAR_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned char, uchar, CHAR_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(signed char, schar, SCHAR_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(wchar_t, wchar_t, WCHAR_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(short, short, SHRT_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned short, ushort, USHRT_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(int, int, INT_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, uint, UINT_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned int, unsigned, UINT_MAX) // alternate name +CILK_C_REDUCER_MIN_INDEX_INSTANCE(long, long, LONG_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long, ulong, ULONG_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(long long, longlong, LLONG_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(unsigned long long, ulonglong, ULLONG_MAX) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(float, float, HUGE_VALF) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(double, double, HUGE_VAL) +CILK_C_REDUCER_MIN_INDEX_INSTANCE(long double, longdouble, HUGE_VALL) +__CILKRTS_END_EXTERN_C + +/// @endcond + +//@} + +#endif // defined REDUCER_MAX_H_INCLUDED diff --git a/libcilkrts/include/cilk/reducer_opadd.h b/libcilkrts/include/cilk/reducer_opadd.h index 9380a0ac122..9c2a97c17e9 100644 --- a/libcilkrts/include/cilk/reducer_opadd.h +++ b/libcilkrts/include/cilk/reducer_opadd.h @@ -1,33 +1,41 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. +/* reducer_opadd.h -*- C++ -*- * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ -/* - * reducer_opadd.h +/** @file reducer_opadd.h * - * Purpose: Reducer hyperobject to sum values + * @brief Defines classes for doing parallel addition reductions. + * + * @ingroup ReducersAdd + * + * @see ReducersAdd */ #ifndef REDUCER_OPADD_H_INCLUDED @@ -35,408 +43,558 @@ #include <cilk/reducer.h> -#ifdef __cplusplus - -/* C++ Interface - * - * Classes: reducer_opadd<Type> - * - * Description: - * ============ - * This component provides a reducer-type hyperobject representation - * that allows adding values to a non-local variable using the +=, -=, - * ++, --, +, and - operators. A common operation when traversing a data - * structure is to sum values into a non-local numeric variable. When - * Cilk parallelism is introduced, however, a data race will occur on - * the variable holding the sum. By replacing the variable with the - * hyperobject defined in this component, the data race is eliminated. - * - * Usage Example: - * ============== - * Assume we wish to traverse an array of objects, performing an operation on - * each object and accumulating the result of the operation into an integer - * variable. - *.. - * int compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * int result = 0; - * for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * result += compute(myArray[i]); +/** @defgroup ReducersAdd Addition Reducers + * + * Addition reducers allow the computation of the sum of a set of values in + * parallel. + * + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file `reducers.md`, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redopadd_usage Usage Example + * + * cilk::reducer< cilk::op_add<int> > r; + * cilk_for (int i = 0; i != N; ++i) { + * *r += a[i]; * } + * return r.get_value(); + * + * @section redopadd_monoid The Monoid + * + * @subsection redopadd_monoid_values Value Set + * + * The value set of an addition reducer is the set of values of `Type`, which + * is expected to be a builtin numeric type (or something like it, such as + * `std::complex`). + * + * @subsection redopadd_monoid_operator Operator + * + * The operator of an addition reducer is the addition operator, defined by + * the “`+`” binary operator on `Type`. + * + * @subsection redopadd_monoid_identity Identity + * + * The identity value of the reducer is the numeric value “`0`”. This is + * expected to be the value of the default constructor `Type()`. + * + * @section redopadd_operations Operations + * + * @subsection redopadd_constructors Constructors + * + * reducer() // identity + * reducer(const Type& value) + * reducer(move_in(Type& variable)) + * + * @subsection redopadd_get_set Set and Get + * + * r.set_value(const Type& value) + * const Type& = r.get_value() const + * r.move_in(Type& variable) + * r.move_out(Type& variable) * - * std::cout << "The result is: " << result << std::endl; - * - * return 0; - * } - *.. - * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel, - * but doing so will create a data race on the 'result' variable. - * The race is solved by changing 'result' to a 'reducer_opadd' hyperobject: - *.. - * int compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * cilk::reducer_opadd<int> result; - * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * *result += compute(myArray[i]); + * @subsection redopadd_initial Initial Values + * + * If an addition reducer is constructed without an explicit initial value, + * then its initial value will be its identity value, as long as `Type` + * satisfies the requirements of @ref redopadd_types. + * + * @subsection redopadd_view_ops View Operations + * + * *r += a + * *r -= a + * ++*r + * --*r + * (*r)++ + * (*r)-- + * *r = *r + a + * *r = *r - a + * *r = *r ± a1 ± a2 … ± an + * + * The post-increment and post-decrement operations do not return a value. (If + * they did, they would expose the value contained in the view, which is + * non-deterministic in the middle of a reduction.) + * + * Note that subtraction operations are allowed on an addition reducer because + * subtraction is equivalent to addition with a negated operand. It is true + * that `(x - y) - z` is not equivalent to `x - (y - z)`, but + * `(x + (-y)) + (-z)` _is_ equivalent to `x + ((-y) + (-z))`. + * + * @section redopadd_floating_point Issues with Floating-Point Types + * + * Because of precision and round-off issues, floating-point addition is not + * really associative. For example, `(1e30 + -1e30) + 1 == 1`, but + * `1e30 + (-1e30 + 1) == 0`. + * + * In many cases, this won’t matter, but computations which have been + * carefully ordered to control round-off errors may not deal well with + * being reassociated. In general, you should be sure to understand the + * floating-point behavior of your program before doing any transformation + * that will reassociate its computations. + * + * @section redopadd_types Type and Operator Requirements + * + * `Type` must be `Copy Constructible`, `Default Constructible`, and + * `Assignable`. + * + * The operator “`+=`” must be defined on `Type`, with `x += a` having the + * same meaning as `x = x + a`. In addition, if the code uses the “`-=`”, + * pre-increment, post-increment, pre-decrement, or post-decrement operators, + * then the corresponding operators must be defined on `Type`. + * + * The expression `Type()` must be a valid expression which yields the + * identity value (the value of `Type` whose numeric value is zero). + * + * @section redopadd_in_c Addition Reducers in C + * + * The @ref CILK_C_REDUCER_OPADD and @ref CILK_C_REDUCER_OPADD_TYPE macros can + * be used to do addition reductions in C. For example: + * + * CILK_C_REDUCER_OPADD(r, double, 0); + * CILK_C_REGISTER_REDUCER(r); + * cilk_for(int i = 0; i != n; ++i) { + * REDUCER_VIEW(r) += a[i]; * } + * CILK_C_UNREGISTER_REDUCER(r); + * printf("The sum of the elements of a is %f\n", REDUCER_VIEW(r)); * - * std::cout << "The result is: " << result.get_value() << std::endl; - * - * return 0; - * } - *.. - * - * Operations provided: - * ==================== - * Given 'reducer_opadd' objects, x and y, the following are - * valid statements: - *.. - * *x += 5; - * *x = *x + 5; - * *x -= 5; - * *y = *y - 5; - * ++*x; - * --*x; - * (*x)++; - * (*x)--; - *.. - * The following are not valid expressions and will result in a run-time error - * in a debug build: - *.. - * x = y; // Cannot assign one reducer to another - * *x = *y + 5; // Mixed reducers - * *x = 5 + *x; // operator+ is not necessarily commutative - * *x = 5 - *x; // Violates associativity - *.. - * The the current value of the reducer can be get and set using the - * 'get_value' and 'set_value' methods, respectively. As with most reducers, - * 'set_value' and 'get_value' methods produce deterministic results only if - * called before the first spawn after creating a 'hyperobject' or when all - * strands spawned since creating the 'hyperobject' have been synced. However, - * the difference two values of the same reducer read twice in the same Cilk - * strand *is* typically deterministic (assuming the usual relationship between - * operator '+' and operator '-' for the specified 'Type'): - *.. - * cilk::reducer_opadd<int> x; - * cilk_spawn func(); - * int a = x.get_value(); - * *x += 5; - * int b = x.get_value(); - * assert(b - a == 5); - *.. - * - * Requirements on the 'Type' parameter - * ==================================== - * The 'Type' parameter used to instantiate the 'reducer_opadd' class must - * provide a += operator that meets the requirements for an - * *associative* *mutating* *operator* as defined in the Cilk++ user manual. - * The default constructor for 'Type' must yield an additive identity, i.e., - * a value (such as integer zero) that, when added to any other value, yields - * the other value. If 'Type' also provides a -= operator, then subtraction - * is also supported by this reducer. C++ integral types satisfy these - * requirements. - * - * Note that C++ floating-point types do not support truly - * associative addition in that (a + b) + c will exhibit different - * round-off error than a + (b + c). However, for numbers of similar - * magnitude, a floating-point 'reducer_opadd' may still be useful. + * See @ref reducers_c_predefined. */ -namespace cilk -{ +#ifdef __cplusplus + +namespace cilk { -/** - * @brief A reducer-type hyperobject representation that allows adding values - * to a non-local variable using the +=, -=, ++, --, +, and - operators. - * - * A common operation when traversing a data structure is to sum values into a - * non-local numeric variable. When Cilk parallelism is introduced, however, - * a data race will occur on the variable holding the sum. By replacing the - * variable with the hyperobject defined in this component, the data race is - * eliminated. +/** The addition reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_add<Type> >`. It holds the accumulator variable + * for the reduction, and allows only addition and subtraction operations to + * be performed on it. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `+=` operation would be used in an expression like `*r += a`, where + * `r` is an op_add reducer variable. + * + * @tparam Type The type of the contained accumulator variable. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. + * + * @see ReducersAdd + * @see op_add + * + * @ingroup ReducersAdd */ template <typename Type> -class reducer_opadd +class op_add_view : public scalar_view<Type> { + typedef scalar_view<Type> base; + public: - /// Definition of data view, operation, and identity for reducer_opadd - class Monoid : public monoid_base<Type> - { - public: - static void reduce(Type* left, Type* right); + /** Class to represent the right-hand side of + * `*reducer = *reducer ± value`. + * + * The only assignment operator for the op_add_view class takes an + * rhs_proxy as its operand. This results in the syntactic restriction + * that the only expressions that can be assigned to an op_add_view are + * ones which generate an rhs_proxy — that is, expressions of the form + * `op_add_view ± value ... ± value`. + * + * @warning + * The lhs and rhs views in such an assignment must be the same; + * otherwise, the behavior will be undefined. (I.e., `v1 = v1 + x` is + * legal; `v1 = v2 + x` is illegal.) This condition will be checked with a + * runtime assertion when compiled in debug mode. + * + * @see op_add_view + */ + class rhs_proxy { + friend class op_add_view; + + const op_add_view* m_view; + Type m_value; + + // Constructor is invoked only from op_add_view::operator+() and + // op_add_view::operator-(). + // + rhs_proxy(const op_add_view* view, const Type& value) : + m_view(view), m_value(value) {} + + rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator + rhs_proxy(); // Disable default constructor + + public: + //@{ + /** Add or subtract an additional rhs value. If `v` is an op_add_view + * and `a1` is a value, then the expression `v + a1` invokes the view’s + * `operator+()` to create an rhs_proxy for `(v, a1)`; then + * `v + a1 + a2` invokes the rhs_proxy’s `operator+()` to create a new + * rhs_proxy for `(v, a1+a2)`. This allows the right-hand side of an + * assignment to be not just `view ± value`, but + * `view ± value ± value ... ± value`. The effect is that + * + * v = v ± a1 ± a2 ... ± an; + * + * is evaluated as + * + * v = v ± (±a1 ± a2 ... ± an); + */ + rhs_proxy& operator+(const Type& x) { m_value += x; return *this; } + rhs_proxy& operator-(const Type& x) { m_value -= x; return *this; } + //@} }; - /// "PRIVATE" HELPER CLASS - class temp_sum { - friend class reducer_opadd; - - Type* valuePtr_; + + /** Default/identity constructor. This constructor initializes the + * contained value to `Type()`, which is expected to be the identity value + * for addition on `Type`. + */ + op_add_view() : base() {} + + /** Construct with a specified initial value. + */ + explicit op_add_view(const Type& v) : base(v) {} + + /** Reduction operation. + * + * This function is invoked by the @ref op_add monoid to combine the views + * of two strands when the right strand merges with the left one. It adds + * the value contained in the right-strand view to the value contained in + * the left-strand view, and leaves the value in the right-strand view + * undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_add monoid to implement the monoid + * reduce operation. + */ + void reduce(op_add_view* right) { this->m_value += right->m_value; } + + /** @name Accumulator variable updates. + * + * These functions support the various syntaxes for incrementing or + * decrementing the accumulator variable contained in the view. + */ + //@{ + + /** Increment the accumulator variable by @a x. + */ + op_add_view& operator+=(const Type& x) { this->m_value += x; return *this; } + + /** Decrement the accumulator variable by @a x. + */ + op_add_view& operator-=(const Type& x) { this->m_value -= x; return *this; } + + /** Pre-increment. + */ + op_add_view& operator++() { ++this->m_value; return *this; } + + /** Post-increment. + * + * @note Conventionally, post-increment operators return the old value + * of the incremented variable. However, reducer views do not + * expose their contained values, so `view++` does not have a + * return value. + */ + void operator++(int) { this->m_value++; } + + /** Pre-decrement. + */ + op_add_view& operator--() { --this->m_value; return *this; } + + /** Post-decrement. + * + * @note Conventionally, post-decrement operators return the old value + * of the decremented variable. However, reducer views do not + * expose their contained values, so `view--` does not have a + * return value. + */ + void operator--(int) { this->m_value--; } + + /** Create an object representing `*this + x`. + * + * @see rhs_proxy + */ + rhs_proxy operator+(const Type& x) const { return rhs_proxy(this, x); } + + /** Create an object representing `*this - x`. + * + * @see rhs_proxy + */ + rhs_proxy operator-(const Type& x) const { return rhs_proxy(this, -x); } + + /** Assign the result of a `view ± value` expression to the view. Note that + * this is the only assignment operator for this class. + * + * @see rhs_proxy + */ + op_add_view& operator=(const rhs_proxy& rhs) { + __CILKRTS_ASSERT(this == rhs.m_view); + this->m_value += rhs.m_value; + return *this; + } + + //@} +}; - // Default copy constructor, no assignment operator - temp_sum& operator=(const temp_sum&); - explicit temp_sum(Type* valuePtr); +/** Monoid class for addition reductions. Instantiate the cilk::reducer + * template class with an op_add monoid to create an addition reducer class. + * For example, to compute + * the sum of a set of `int` values: + * + * cilk::reducer< cilk::op_add<int> > r; + * + * @tparam Type The reducer value type. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. + * + * @see ReducersAdd + * @see op_add_view + * + * @ingroup ReducersAdd + */ +template <typename Type, bool Align = false> +struct op_add : public monoid_with_view<op_add_view<Type>, Align> {}; - public: - temp_sum& operator+(const Type& x); - temp_sum& operator-(const Type& x); - }; +/** **Deprecated** addition reducer wrapper class. + * + * reducer_opadd is the same as @ref reducer<@ref op_add>, except that + * reducer_opadd is a proxy for the contained view, so that accumulator + * variable update operations can be applied directly to the reducer. For + * example, a value is added to a `reducer<%op_add>` with `*r += a`, but a + * value can be added to a `%reducer_opadd` with `r += a`. + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_opadd. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_opadd` + * and `reducer<%op_add>`. This allows incremental code + * conversion: old code that used `%reducer_opadd` can pass a + * `%reducer_opadd` to a converted function that now expects a + * pointer or reference to a `reducer<%op_add>`, and vice + * versa. + * + * @tparam Type The value type of the reducer. + * + * @see op_add + * @see reducer + * @see ReducersAdd + * + * @ingroup ReducersAdd + */ +template <typename Type> +class reducer_opadd : public reducer< op_add<Type, true> > +{ + typedef reducer< op_add<Type, true> > base; + using base::view; public: - - /// Construct an 'reducer_opadd' object with a value of 'Type()'. - reducer_opadd(); - - /// Construct an 'reducer_opadd' object with the specified initial value. - explicit reducer_opadd(const Type& initial_value); - - /// Return a const reference to the current value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Type& get_value() const; - - /// Set the value of this object. - /// - /// @warning Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Type& value); - - /// Add 'x' to the value of this reducer and produce a temporary sum object. - /// The temporary sum can be used for additional arithmetic or assigned back - /// to this reducer. - temp_sum operator+(const Type& x) const; - - /// Subtract 'x' from the value of this reducer and produce a temporary sum - /// object. The temporary sum can be used for additional arithmetic or - /// assigned back to this reducer. - temp_sum operator-(const Type& x) const; - - /// Add 'x' to the value of this object. - reducer_opadd& operator+=(const Type& x); - - /// Subtract 'x' from the value of this object. - reducer_opadd& operator-=(const Type& x); - - /// Increment the value of this object using pre-increment syntax. - reducer_opadd& operator++(); - - /// Increment the value of this object using post-increment syntax. - /// Because the reducer is not copy-constructible, it is not possible to - /// return the previous value. - void operator++(int); - - /// Decrement the value of this object using pre-decrement syntax. - reducer_opadd& operator--(); - - /// Decrement the value of this object using post-decrement syntax. - /// Because the reducer is not copy-constructible, it is not possible to - /// return the previous value. - void operator--(int); - - /// Merge the result of an addition into this object. The addition - /// must involve this reducer, i.e., x = x + 5; not x = y + 5; - reducer_opadd& operator=(const temp_sum& temp); - + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view’s rhs proxy type. + typedef typename view_type::rhs_proxy rhs_proxy; + + /// The view type for the reducer. + typedef view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /** @name Constructors + */ + //@{ + + /** Default (identity) constructor. + * + * Constructs the wrapper with the default initial value of `Type()`. + */ + reducer_opadd() {} + + /** Value constructor. + * + * Constructs the wrapper with a specified initial value. + */ + explicit reducer_opadd(const Type& initial_value) : base(initial_value) {} + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_add_view. */ + //@{ + + /// @copydoc op_add_view::operator+=(const Type&) + reducer_opadd& operator+=(const Type& x) { view() += x; return *this; } + + /// @copydoc op_add_view::operator-=(const Type&) + reducer_opadd& operator-=(const Type& x) { view() -= x; return *this; } + + /// @copydoc op_add_view::operator++() + reducer_opadd& operator++() { ++view(); return *this; } + + /// @copydoc op_add_view::operator++(int) + void operator++(int) { view()++; } + + /// @copydoc op_add_view::operator-\-() + reducer_opadd& operator--() { --view(); return *this; } + + /// @copydoc op_add_view::operator-\-(int) + void operator--(int) { view()--; } + + // The legacy definitions of reducer_opadd::operator+() and + // reducer_opadd::operator-() have different behavior and a different + // return type than this definition. The legacy version is defined as a + // member function, so this new version is defined as a free function to + // give it a different signature, so that they won’t end up sharing a + // single object file entry. + + /// @copydoc op_add_view::operator+(const Type&) const + friend rhs_proxy operator+(const reducer_opadd& r, const Type& x) + { + return r.view() + x; + } + /// @copydoc op_add_view::operator-(const Type&) const + friend rhs_proxy operator-(const reducer_opadd& r, const Type& x) + { + return r.view() - x; + } + /// @copydoc op_add_view::operator=(const rhs_proxy&) + reducer_opadd& operator=(const rhs_proxy& temp) + { + view() = temp; + return *this; + } + //@} + + /** @name Dereference + * @details Dereferencing a wrapper is a no-op. It simply returns the + * wrapper. Combined with the rule that the wrapper forwards view + * operations to its contained view, this means that view operations can + * be written the same way on reducers and wrappers, which is convenient + * for incrementally converting old code using wrappers to use reducers + * instead. That is: + * + * reducer< op_add<int> > r; + * *r += a; // *r returns the view + * // operator += is a view member function + * + * reducer_opadd<int> w; + * *w += a; // *w returns the wrapper + * // operator += is a wrapper member function that + * // calls the corresponding view function + */ + //@{ reducer_opadd& operator*() { return *this; } reducer_opadd const& operator*() const { return *this; } reducer_opadd* operator->() { return this; } reducer_opadd const* operator->() const { return this; } - - private: - friend class temp_sum; - - // Hyperobject to serve up views - reducer<Monoid> imp_; - - // Not copyable - reducer_opadd(const reducer_opadd&); - reducer_opadd& operator=(const reducer_opadd&); + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_add<Type, false> >& () + { + return *reinterpret_cast< reducer< op_add<Type, false> >* >(this); + } + operator const reducer< op_add<Type, false> >& () const + { + return *reinterpret_cast< const reducer< op_add<Type, false> >* >(this); + } + //@} }; -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// ------------------------------------ -// template class reducer_opadd::Monoid -// ------------------------------------ - -/** - * Combines two views of the data. +/// @cond internal +/** Metafunction specialization for reducer conversion. + * + * This specialization of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes the `reducer< op_add<Type> >` class to have an + * `operator reducer_opadd<Type>& ()` conversion operator that statically + * downcasts the `reducer<op_add>` to the corresponding `reducer_opadd` type. + * (The reverse conversion, from `reducer_opadd` to `reducer<op_add>`, is just + * an upcast, which is provided for free by the language.) + * + * @ingroup ReducersAdd */ -template <typename Type> -void -reducer_opadd<Type>::Monoid::reduce(Type* left, Type* right) -{ - *left += *right; -} - -// ---------------------------- -// template class reducer_opadd -// ---------------------------- - -template <typename Type> -inline -reducer_opadd<Type>::reducer_opadd() - : imp_(Type()) -{ -} - -template <typename Type> -inline -reducer_opadd<Type>::reducer_opadd(const Type& initial_value) - : imp_(initial_value) -{ -} - -template <typename Type> -inline -const Type& reducer_opadd<Type>::get_value() const -{ - return imp_.view(); -} - -template <typename Type> -inline -void reducer_opadd<Type>::set_value(const Type& value) -{ - imp_.view() = value; -} - -template <typename Type> -inline -typename reducer_opadd<Type>::temp_sum -reducer_opadd<Type>::operator+(const Type& x) const -{ - Type* valuePtr = const_cast<Type*>(&imp_.view()); - *valuePtr = *valuePtr + x; - return temp_sum(valuePtr); -} - -template <typename Type> -inline -typename reducer_opadd<Type>::temp_sum -reducer_opadd<Type>::operator-(const Type& x) const -{ - Type* valuePtr = const_cast<Type*>(&imp_.view()); - *valuePtr = *valuePtr - x; - return temp_sum(valuePtr); -} - -template <typename Type> -inline -reducer_opadd<Type>& reducer_opadd<Type>::operator+=(const Type& x) -{ - imp_.view() += x; - return *this; -} - -template <typename Type> -inline -reducer_opadd<Type>& reducer_opadd<Type>::operator-=(const Type& x) -{ - imp_.view() -= x; - return *this; -} - -template <typename Type> -inline -reducer_opadd<Type>& reducer_opadd<Type>::operator++() -{ - imp_.view() += 1; - return *this; -} - -template <typename Type> -inline -void reducer_opadd<Type>::operator++(int) -{ - imp_.view() += 1; -} - -template <typename Type> -inline -reducer_opadd<Type>& reducer_opadd<Type>::operator--() -{ - imp_.view() -= 1; - return *this; -} - -template <typename Type> -inline -void reducer_opadd<Type>::operator--(int) +template <typename Type, bool Align> +struct legacy_reducer_downcast<reducer<op_add<Type, Align> > > { - imp_.view() -= 1; -} - -template <typename Type> -inline -reducer_opadd<Type>& -reducer_opadd<Type>::operator=( - const typename reducer_opadd<Type>::temp_sum& temp) -{ - // No-op. Just test that temp was constructed from this. - __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_); - return *this; -} - -// -------------------------------------- -// template class reducer_opadd::temp_sum -// -------------------------------------- - -template <typename Type> -inline -reducer_opadd<Type>::temp_sum::temp_sum(Type *valuePtr) - : valuePtr_(valuePtr) -{ -} - -template <typename Type> -inline -typename reducer_opadd<Type>::temp_sum& -reducer_opadd<Type>::temp_sum::operator+(const Type& x) -{ - *valuePtr_ = *valuePtr_ + x; - return *this; -} - -template <typename Type> -inline -typename reducer_opadd<Type>::temp_sum& -reducer_opadd<Type>::temp_sum::operator-(const Type& x) -{ - *valuePtr_ = *valuePtr_ - x; - return *this; -} + typedef reducer_opadd<Type> type; +}; +/// @endcond } // namespace cilk #endif // __cplusplus -/* C Interface + +/** @ingroup ReducersAdd */ +//@{ +/** @name C Language Reducer Macros + * + * These macros are used to declare and work with numeric op_add reducers in + * C code. + * + * @see @ref page_reducers_in_c + */ + //@{ + __CILKRTS_BEGIN_EXTERN_C +/** Opadd reducer type name. + * + * This macro expands into the identifier which is the name of the op_add + * reducer type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * + * @see @ref reducers_c_predefined + * @see ReducersAdd + */ #define CILK_C_REDUCER_OPADD_TYPE(tn) \ __CILKRTS_MKIDENT(cilk_c_reducer_opadd_,tn) + +/** Declare an op_add reducer object. + * + * This macro expands into a declaration of an op_add reducer object for a + * specified numeric type. For example: + * + * CILK_C_REDUCER_OPADD(my_reducer, double, 0.0); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * @param v The initial value for the reducer. (A value which can be + * assigned to the numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + * @see ReducersAdd + */ #define CILK_C_REDUCER_OPADD(obj,tn,v) \ CILK_C_REDUCER_OPADD_TYPE(tn) obj = \ CILK_C_INIT_REDUCER(_Typeof(obj.value), \ @@ -444,60 +602,84 @@ __CILKRTS_BEGIN_EXTERN_C __CILKRTS_MKIDENT(cilk_c_reducer_opadd_identity_,tn), \ __cilkrts_hyperobject_noop_destroy, v) -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opadd_,tn); \ +/// @cond internal + +/** Declare the op_add reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which + * implement the reducer functionality for the op_add reducer type for a + * specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPADD_DECLARATION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn); - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_OPADD_INSTANCE(char,char); -CILK_C_REDUCER_OPADD_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_OPADD_INSTANCE(signed char,schar); -CILK_C_REDUCER_OPADD_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_OPADD_INSTANCE(short,short); -CILK_C_REDUCER_OPADD_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_OPADD_INSTANCE(int,int); -CILK_C_REDUCER_OPADD_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_OPADD_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_OPADD_INSTANCE(long,long); -CILK_C_REDUCER_OPADD_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_OPADD_INSTANCE(long long,longlong); -CILK_C_REDUCER_OPADD_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_OPADD_INSTANCE(float,float); -CILK_C_REDUCER_OPADD_INSTANCE(double,double); -CILK_C_REDUCER_OPADD_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPADD_IMP(t,tn) \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn); + +/** Define the op_add reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement + * the reducer functionality for the op_add reducer type for a specified + * numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPADD_DEFINITION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPADD_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opadd,tn,l,r) \ { *(t*)l += *(t*)r; } \ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opadd,tn) \ { *(t*)v = 0; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric tyeps. The contents of reducer_opadd.c are as follows: - -CILK_C_REDUCER_OPADD_IMP(char,char) -CILK_C_REDUCER_OPADD_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPADD_IMP(signed char,schar) -CILK_C_REDUCER_OPADD_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPADD_IMP(short,short) -CILK_C_REDUCER_OPADD_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPADD_IMP(int,int) -CILK_C_REDUCER_OPADD_IMP(unsigned int,uint) -CILK_C_REDUCER_OPADD_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPADD_IMP(long,long) -CILK_C_REDUCER_OPADD_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPADD_IMP(long long,longlong) -CILK_C_REDUCER_OPADD_IMP(unsigned long long,ulonglong) -CILK_C_REDUCER_OPADD_IMP(float,float) -CILK_C_REDUCER_OPADD_IMP(double,double) -CILK_C_REDUCER_OPADD_IMP(long double,longdouble) - -*/ + +//@{ +/** @def CILK_C_REDUCER_OPADD_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` + * will be defined, and this macro will generate reducer implementation + * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, + * and this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPADD_DEFINITION(t,tn) +#else +# define CILK_C_REDUCER_OPADD_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPADD_DECLARATION(t,tn) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +CILK_C_REDUCER_OPADD_INSTANCE(char, char) +CILK_C_REDUCER_OPADD_INSTANCE(unsigned char, uchar) +CILK_C_REDUCER_OPADD_INSTANCE(signed char, schar) +CILK_C_REDUCER_OPADD_INSTANCE(wchar_t, wchar_t) +CILK_C_REDUCER_OPADD_INSTANCE(short, short) +CILK_C_REDUCER_OPADD_INSTANCE(unsigned short, ushort) +CILK_C_REDUCER_OPADD_INSTANCE(int, int) +CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, uint) +CILK_C_REDUCER_OPADD_INSTANCE(unsigned int, unsigned) /* alternate name */ +CILK_C_REDUCER_OPADD_INSTANCE(long, long) +CILK_C_REDUCER_OPADD_INSTANCE(unsigned long, ulong) +CILK_C_REDUCER_OPADD_INSTANCE(long long, longlong) +CILK_C_REDUCER_OPADD_INSTANCE(unsigned long long, ulonglong) +CILK_C_REDUCER_OPADD_INSTANCE(float, float) +CILK_C_REDUCER_OPADD_INSTANCE(double, double) +CILK_C_REDUCER_OPADD_INSTANCE(long double, longdouble) + +//@endcond __CILKRTS_END_EXTERN_C +//@} + +//@} + #endif /* REDUCER_OPADD_H_INCLUDED */ diff --git a/libcilkrts/include/cilk/reducer_opand.h b/libcilkrts/include/cilk/reducer_opand.h index 0ee7b9faf57..3b27246d0ea 100644 --- a/libcilkrts/include/cilk/reducer_opand.h +++ b/libcilkrts/include/cilk/reducer_opand.h @@ -1,33 +1,41 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. +/* reducer_opand.h -*- C++ -*- * + * @copyright + * Copyright (C) 20009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ -/* - * reducer_opand.h +/** @file reducer_opand.h + * + * @brief Defines classes for doing parallel bitwise and reductions. * - * Purpose: Reducer hyperobject to compute bitwise AND of values + * @ingroup ReducersAnd + * + * @see ReducersAnd */ #ifndef REDUCER_OPAND_H_INCLUDED @@ -35,317 +43,475 @@ #include <cilk/reducer.h> -#ifdef __cplusplus - -/* C++ Interface - * - * Purpose: Reducer hyperobject to compute bitwise AND values - * When bool is passed as 'Type', it computes logical AND - * operation. - * - * Classes: reducer_opand<Type> - * - * Description: - * ============ - * This component provides a reducer-type hyperobject representation - * that allows conducting bitwise AND operation to a non-local variable - * using the &=, & operators. A common operation - * when traversing a data structure is to bit-wise AND values - * into a non-local numeric variable. When Cilk parallelism is - * introduced, however, a data race will occur on the variable holding - * the bit-wise AND result. By replacing the variable with the - * hyperobject defined in this component, the data race is eliminated. - * - * When bool is passed as the 'Type', this reducer conducts logic AND - * operation. - * - * Usage Example: - * ============== - * Assume we wish to traverse an array of objects, performing a bit-wise AND - * operation on each object and accumulating the result of the operation - * into an integer variable. - *.. - * unsigned int compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * unsigned int result = 1; - * for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * result &= compute(myArray[i]); - * } +/** @defgroup ReducersAnd Bitwise And Reducers * - * std::cout << "The result is: " << result << std::endl; + * Bitwise and reducers allow the computation of the bitwise and of a set of + * values in parallel. * - * return 0; - * } - *.. - * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel, - * but doing so will create a data race on the 'result' variable. - * The race is solved by changing 'result' to a 'reducer_opand' hyperobject: - *.. - * unsigned int compute(const X& v); - * - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * cilk::reducer_opand<unsigned int> result(1); - * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * *result &= compute(myArray[i]); + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file `reducers.md`, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redopand_usage Usage Example + * + * cilk::reducer< cilk::op_and<unsigned> > r; + * cilk_for (int i = 0; i != N; ++i) { + * *r &= a[i]; * } + * unsigned result; + * r.move_out(result); * - * std::cout << "The result is: " - * << result.get_value() << std::endl; + * @section redopand_monoid The Monoid * - * return 0; - * } - * + * @subsection redopand_monoid_values Value Set + * + * The value set of a bitwise and reducer is the set of values of `Type`, + * which is expected to be a builtin integer type which has a representation + * as a sequence of bits (or something like it, such as `bool` or + * `std::bitset`). + * + * @subsection redopand_monoid_operator Operator + * + * The operator of a bitwise and reducer is the bitwise and operator, defined + * by the “`&`” binary operator on `Type`. + * + * @subsection redopand_monoid_identity Identity * - * Operations provided: - * ==================== - * Given 'reducer_opand' objects, x and y, the following are - * valid statements: - *.. - * *x &= 5; - * *x = *x & 5; - *.. - * The following are not valid expressions and will result in a run-time error - * in a debug build: - *.. - * x = y; // Cannot assign one reducer to another - * *x = *y & 5; // Mixed reducers - * *x = 5 & *x; // operator& is not necessarily commutative - *.. - *.. - * - * Requirements on the 'Type' parameter - * ==================================== - * The 'Type' parameter used to instantiate the 'reducer_opand' class must - * provide a &= operator that meets the requirements for an - * *associative* *mutating* *operator* as defined in the Cilk++ user manual. - * The identity function of 'Type' in class Monoid must yield a bit-wise - * AND identity, i.e., - * a value (such as true in bool) that, when AND with any other value, yields - * the other value. - * - * When unsigned int or bool is passed as 'Type', the identity function of - * Monoid returns AND identity. + * The identity value of the reducer is the value whose representation + * contains all 1-bits. This is expected to be the value of the expression + * `~Type()` (i.e., the bitwise negation operator applied to the default value + * of the value type). + * + * @section redopand_operations Operations + * + * @subsection redopand_constructors Constructors + * + * reducer() // identity + * reducer(const Type& value) + * reducer(move_in(Type& variable)) + * + * @subsection redopand_get_set Set and Get + * + * r.set_value(const Type& value) + * const Type& = r.get_value() const + * r.move_in(Type& variable) + * r.move_out(Type& variable) + * + * @subsection redopand_initial Initial Values + * + * If a bitwise and reducer is constructed without an explicit initial value, + * then its initial value will be its identity value, as long as `Type` + * satisfies the requirements of @ref redopand_types. + * + * @subsection redopand_view_ops View Operations + * + * *r &= a + * *r = *r & a + * *r = *r & a1 & a2 … & an + * + * @section redopand_types Type and Operator Requirements + * + * `Type` must be `Copy Constructible`, `Default Constructible`, and + * `Assignable`. + * + * The operator “`&=`” must be defined on `Type`, with `x &= a` having the + * same meaning as `x = x & a`. + * + * The expression `~ Type()` must be a valid expression which yields the + * identity value (the value of `Type` whose representation consists of all + * 1-bits). + * + * @section redopand_in_c Bitwise And Reducers in C + * + * The @ref CILK_C_REDUCER_OPAND and @ref CILK_C_REDUCER_OPAND_TYPE macros can + * be used to do bitwise and reductions in C. For example: + * + * CILK_C_REDUCER_OPAND(r, uint, ~0); + * CILK_C_REGISTER_REDUCER(r); + * cilk_for(int i = 0; i != n; ++i) { + * REDUCER_VIEW(r) &= a[i]; + * } + * CILK_C_UNREGISTER_REDUCER(r); + * printf("The bitwise AND of the elements of a is %x\n", REDUCER_VIEW(r)); + * + * See @ref reducers_c_predefined. */ -#include <new> +#ifdef __cplusplus namespace cilk { -/** - * @brief A reducer-type hyperobject representation that allows conducting - * bitwise AND operation to a non-local variable using the &=, & operators. +/** The bitwise and reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_and<Type> >`. It holds the accumulator variable + * for the reduction, and allows only `and` operations to be performed on it. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `&=` operation would be used in an expression like `*r &= a`, where + * `r` is an opmod reducer variable. + * + * @tparam Type The type of the contained accumulator variable. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. * - * A common operation when traversing a data structure is to bit-wise AND - * values into a non-local numeric variable. When Cilk parallelism is - * introduced, however, a data race will occur on the variable holding - * the bit-wise AND result. By replacing the variable with the - * hyperobject defined in this component, the data race is eliminated. + * @see ReducersAnd + * @see op_and + * + * @ingroup ReducersAnd */ - template <typename Type> -class reducer_opand +class op_and_view : public scalar_view<Type> { - public: - /// Definition of data view, operation, and identity for reducer_opand - class Monoid : public monoid_base<Type> - { - public: - static void reduce(Type* left, Type* right); - - /// identity function must provide a value that, - /// when AND with any other values, yields the other value - void identity(Type* p) const { new ((void*) p) Type(~0); } + typedef scalar_view<Type> base; + +public: + /** Class to represent the right-hand side of `*reducer = *reducer & value`. + * + * The only assignment operator for the op_and_view class takes an + * rhs_proxy as its operand. This results in the syntactic restriction + * that the only expressions that can be assigned to an op_and_view are + * ones which generate an rhs_proxy — that is, expressions of the form + * `op_and_view & value ... & value`. + * + * @warning + * The lhs and rhs views in such an assignment must be the same; + * otherwise, the behavior will be undefined. (I.e., `v1 = v1 & x` is + * legal; `v1 = v2 & x` is illegal.) This condition will be checked with + * a runtime assertion when compiled in debug mode. + * + * @see op_and_view + */ + class rhs_proxy { + private: + friend class op_and_view; + + const op_and_view* m_view; + Type m_value; + + // Constructor is invoked only from op_and_view::operator&(). + // + rhs_proxy(const op_and_view* view, const Type& value) : m_view(view), m_value(value) {} + + rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator + rhs_proxy(); // Disable default constructor + + public: + /** Bitwise and with an additional rhs value. If `v` is an op_and_view + * and `a1` is a value, then the expression `v & a1` invokes the + * view’s `operator&()` to create an rhs_proxy for `(v, a1)`; then + * `v & a1 & a2` invokes the rhs_proxy’s `operator&()` to create a new + * rhs_proxy for `(v, a1&a2)`. This allows the right-hand side of an + * assignment to be not just `view & value`, but + * `view & value & value ... & value`. The effect is that + * + * v = v & a1 & a2 ... & an; + * + * is evaluated as + * + * v = v & (a1 & a2 ... & an); + */ + rhs_proxy& operator&(const Type& x) { m_value &= x; return *this; } }; - /// "PRIVATE" HELPER CLASS - class temp_and { - friend class reducer_opand; - - Type* valuePtr_; - - // Default copy constructor, no assignment operator - temp_and& operator=(const temp_and&); - - explicit temp_and(Type* valuePtr); - - public: - temp_and& operator&(const Type& x); - }; - - public: - - /// Construct an 'reducer_opand' object with a value of 'Type()'. - reducer_opand(); - - /// Construct an 'reducer_opand' object with the specified initial value. - explicit reducer_opand(const Type& initial_value); - /// Return a const reference to the current value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Type& get_value() const; - - /// Set the value of this object. - /// - /// @warning Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Type& value); - - /// AND 'x' to the value of this reducer and produce a temporary and object. - /// The temporary and can be used for additional bit-wise operations - /// or assigned back to this reducer. - temp_and operator&(const Type& x) const; - - /// AND 'x' to the value of this object. - reducer_opand& operator&=(const Type& x); + /** Default/identity constructor. This constructor initializes the + * contained value to `~ Type()`. + */ + op_and_view() : base(~Type()) {} + + /** Construct with a specified initial value. + */ + explicit op_and_view(const Type& v) : base(v) {} + + + /** Reduction operation. + * + * This function is invoked by the @ref op_and monoid to combine the views + * of two strands when the right strand merges with the left one. It + * “ands” the value contained in the left-strand view with the value + * contained in the right-strand view, and leaves the value in the + * right-strand view undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_and monoid to implement the monoid + * reduce operation. + */ + void reduce(op_and_view* right) { this->m_value &= right->m_value; } + + /** @name Accumulator variable updates. + * + * These functions support the various syntaxes for “anding” the + * accumulator variable contained in the view with some value. + */ + //@{ + + /** And the accumulator variable with @a x. + */ + op_and_view& operator&=(const Type& x) { this->m_value &= x; return *this; } + + /** Create an object representing `*this & x`. + * + * @see rhs_proxy + */ + rhs_proxy operator&(const Type& x) const { return rhs_proxy(this, x); } + + /** Assign the result of a `view & value` expression to the view. Note that + * this is the only assignment operator for this class. + * + * @see rhs_proxy + */ + op_and_view& operator=(const rhs_proxy& rhs) { + __CILKRTS_ASSERT(this == rhs.m_view); + this->m_value &= rhs.m_value; + return *this; + } + + //@} +}; - /// Merge the result of AND operation into this object. The AND operation - /// must involve this reducer, i.e., x = x + 5; not x = y + 5; - reducer_opand& operator=(const temp_and& temp); +/** Monoid class for bitwise and reductions. Instantiate the cilk::reducer + * template class with an op_and monoid to create a bitwise and reducer + * class. For example, to compute the bitwise and of a set of `unsigned long` + * values: + * + * cilk::reducer< cilk::op_and<unsigned long> > r; + * + * @tparam Type The reducer value type. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. + * + * @see ReducersAnd + * @see op_and_view + * + * @ingroup ReducersAnd + */ +template <typename Type, bool Align = false> +struct op_and : public monoid_with_view<op_and_view<Type>, Align> {}; +/** Deprecated bitwise and reducer class. + * + * reducer_opand is the same as @ref reducer<@ref op_and>, except that + * reducer_opand is a proxy for the contained view, so that accumulator + * variable update operations can be applied directly to the reducer. For + * example, a value is anded with a `reducer<%op_and>` with `*r &= a`, but a + * value can be anded with a `%reducer_opand` with `r &= a`. + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_opand. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_opand` + * and `reducer<%op_and>`. This allows incremental code + * conversion: old code that used `%reducer_opand` can pass a + * `%reducer_opand` to a converted function that now expects a + * pointer or reference to a `reducer<%op_and>`, and vice + * versa. + * + * @tparam Type The value type of the reducer. + * + * @see op_and + * @see reducer + * @see ReducersAnd + * + * @ingroup ReducersAnd + */ +template <typename Type> +class reducer_opand : public reducer< op_and<Type, true> > +{ + typedef reducer< op_and<Type, true> > base; + using base::view; + +public: + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view’s rhs proxy type. + typedef typename view_type::rhs_proxy rhs_proxy; + + /// The view type for the reducer. + typedef view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /** @name Constructors + */ + //@{ + + /** Default constructor. + * + * Constructs the wrapper with the default initial value of `Type()` + * (not the identity value). + */ + reducer_opand() : base(Type()) {} + + /** Value constructor. + * + * Constructs the wrapper with a specified initial value. + */ + explicit reducer_opand(const Type& initial_value) : base(initial_value) {} + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_and_view. */ + //@{ + + /// @copydoc op_and_view::operator&=(const Type&) + reducer_opand& operator&=(const Type& x) + { + view() &= x; + return *this; + } + + // The legacy definition of reducer_opand::operator&() has different + // behavior and a different return type than this definition. The legacy + // version is defined as a member function, so this new version is defined + // as a free function to give it a different signature, so that they won’t + // end up sharing a single object file entry. + + /// @copydoc op_and_view::operator&(const Type&) const + friend rhs_proxy operator&(const reducer_opand& r, const Type& x) + { + return r.view() & x; + } + + /// @copydoc op_and_view::operator=(const rhs_proxy&) + reducer_opand& operator=(const rhs_proxy& temp) + { + view() = temp; + return *this; + } + //@} + + /** @name Dereference + * @details Dereferencing a wrapper is a no-op. It simply returns the + * wrapper. Combined with the rule that the wrapper forwards view + * operations to its contained view, this means that view operations can + * be written the same way on reducers and wrappers, which is convenient + * for incrementally converting old code using wrappers to use reducers + * instead. That is: + * + * reducer< op_and<int> > r; + * *r &= a; // *r returns the view + * // operator &= is a view member function + * + * reducer_opand<int> w; + * *w &= a; // *w returns the wrapper + * // operator &= is a wrapper member function that + * // calls the corresponding view function + */ + //@{ reducer_opand& operator*() { return *this; } reducer_opand const& operator*() const { return *this; } reducer_opand* operator->() { return this; } reducer_opand const* operator->() const { return this; } - - private: - friend class temp_and; - - // Hyperobject to serve up views - reducer<Monoid> imp_; - - // Not copyable - reducer_opand(const reducer_opand&); - reducer_opand& operator=(const reducer_opand&); + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_and<Type, false> >& () + { + return *reinterpret_cast< reducer< op_and<Type, false> >* >(this); + } + operator const reducer< op_and<Type, false> >& () const + { + return *reinterpret_cast< const reducer< op_and<Type, false> >* >(this); + } + //@} }; -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// ------------------------------------ -// template class reducer_opand::Monoid -// ------------------------------------ - -/** - * Combines two views of the data. +/// @cond internal +/** Metafunction specialization for reducer conversion. + * + * This specialization of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes the `reducer< op_and<Type> >` class to have an + * `operator reducer_opand<Type>& ()` conversion operator that statically + * downcasts the `reducer<op_and>` to the corresponding `reducer_opand` type. + * (The reverse conversion, from `reducer_opand` to `reducer<op_and>`, is just + * an upcast, which is provided for free by the language.) + * + * @ingroup ReducersAnd */ -template <typename Type> -void -reducer_opand<Type>::Monoid::reduce(Type* left, Type* right) -{ - *left &= *right; -} - -// ---------------------------- -// template class reducer_opand -// ---------------------------- - -template <typename Type> -inline -reducer_opand<Type>::reducer_opand() - : imp_(Type()) -{ -} - -template <typename Type> -inline -reducer_opand<Type>::reducer_opand(const Type& initial_value) - : imp_(initial_value) -{ -} - -template <typename Type> -inline -const Type& reducer_opand<Type>::get_value() const -{ - return imp_.view(); -} - -template <typename Type> -inline -void reducer_opand<Type>::set_value(const Type& value) -{ - imp_.view() = value; -} - -template <typename Type> -inline -typename reducer_opand<Type>::temp_and -reducer_opand<Type>::operator&(const Type& x) const -{ - Type* valuePtr = const_cast<Type*>(&imp_.view()); - *valuePtr = *valuePtr & x; - return temp_and(valuePtr); -} - -template <typename Type> -inline -reducer_opand<Type>& reducer_opand<Type>::operator&=(const Type& x) -{ - imp_.view() &= x; - return *this; -} - -template <typename Type> -inline -reducer_opand<Type>& -reducer_opand<Type>::operator=( - const typename reducer_opand<Type>::temp_and& temp) +template <typename Type, bool Align> +struct legacy_reducer_downcast<reducer<op_and<Type, Align> > > { - // No-op. Just test that temp was constructed from this. - __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_); - return *this; -} - -// -------------------------------------- -// template class reducer_opand::temp_and -// -------------------------------------- - -template <typename Type> -inline -reducer_opand<Type>::temp_and::temp_and(Type *valuePtr) - : valuePtr_(valuePtr) -{ -} - -template <typename Type> -inline -typename reducer_opand<Type>::temp_and& -reducer_opand<Type>::temp_and::operator&(const Type& x) -{ - *valuePtr_ = *valuePtr_ & x; - return *this; -} + typedef reducer_opand<Type> type; +}; +/// @endcond } // namespace cilk -#endif /* __cplusplus */ +#endif // __cplusplus + -/* C Interface +/** @ingroup ReducersAdd */ +//@{ +/** @name C language reducer macros + * + * These macros are used to declare and work with op_and reducers in C code. + * + * @see @ref page_reducers_in_c + */ + //@{ + __CILKRTS_BEGIN_EXTERN_C +/** Opand reducer type name. + * + * This macro expands into the identifier which is the name of the op_and + * reducer type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * + * @see @ref reducers_c_predefined + * @see ReducersAnd + */ #define CILK_C_REDUCER_OPAND_TYPE(tn) \ __CILKRTS_MKIDENT(cilk_c_reducer_opand_,tn) + +/** Declare an op_and reducer object. + * + * This macro expands into a declaration of an op_and reducer object for a + * specified numeric type. For example: + * + * CILK_C_REDUCER_OPAND(my_reducer, ulong, ~0UL); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * @param v The initial value for the reducer. (A value which can be + * assigned to the numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + * @see ReducersAnd + */ #define CILK_C_REDUCER_OPAND(obj,tn,v) \ CILK_C_REDUCER_OPAND_TYPE(tn) obj = \ CILK_C_INIT_REDUCER(_Typeof(obj.value), \ @@ -353,57 +519,81 @@ __CILKRTS_BEGIN_EXTERN_C __CILKRTS_MKIDENT(cilk_c_reducer_opand_identity_,tn), \ __cilkrts_hyperobject_noop_destroy, v) -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opand_,tn); \ +/// @cond internal + +/** Declare the op_and reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which + * implement the reducer functionality for the op_and reducer type for a + * specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPAND_DECLARATION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn); - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_OPAND_INSTANCE(char,char); -CILK_C_REDUCER_OPAND_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_OPAND_INSTANCE(signed char,schar); -CILK_C_REDUCER_OPAND_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_OPAND_INSTANCE(short,short); -CILK_C_REDUCER_OPAND_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_OPAND_INSTANCE(int,int); -CILK_C_REDUCER_OPAND_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_OPAND_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_OPAND_INSTANCE(long,long); -CILK_C_REDUCER_OPAND_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_OPAND_INSTANCE(long long,longlong); -CILK_C_REDUCER_OPAND_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_OPAND_INSTANCE(float,float); -CILK_C_REDUCER_OPAND_INSTANCE(double,double); -CILK_C_REDUCER_OPAND_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPAND_IMP(t,tn) \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn); + +/** Define the op_and reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement + * the reducer functionality for the op_and reducer type for a specified + * numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPAND_DEFINITION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPAND_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opand,tn,l,r) \ { *(t*)l &= *(t*)r; } \ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opand,tn) \ - { *(t*)v = (t)-1; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric tyeps. The contents of reducer_opand.c are as follows: - -CILK_C_REDUCER_OPAND_IMP(char,char) -CILK_C_REDUCER_OPAND_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPAND_IMP(signed char,schar) -CILK_C_REDUCER_OPAND_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPAND_IMP(short,short) -CILK_C_REDUCER_OPAND_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPAND_IMP(int,int) -CILK_C_REDUCER_OPAND_IMP(unsigned int,uint) -CILK_C_REDUCER_OPAND_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPAND_IMP(long,long) -CILK_C_REDUCER_OPAND_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPAND_IMP(long long,longlong) -CILK_C_REDUCER_OPAND_IMP(unsigned long long,ulonglong) - -*/ + { *(t*)v = ~((t)0); } + +//@{ +/** @def CILK_C_REDUCER_OPAND_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` + * will be defined, and this macro will generate reducer implementation + * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and + * this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPAND_DEFINITION(t,tn) +#else +# define CILK_C_REDUCER_OPAND_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPAND_DECLARATION(t,tn) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for + * each numeric type. + */ +CILK_C_REDUCER_OPAND_INSTANCE(char, char) +CILK_C_REDUCER_OPAND_INSTANCE(unsigned char, uchar) +CILK_C_REDUCER_OPAND_INSTANCE(signed char, schar) +CILK_C_REDUCER_OPAND_INSTANCE(wchar_t, wchar_t) +CILK_C_REDUCER_OPAND_INSTANCE(short, short) +CILK_C_REDUCER_OPAND_INSTANCE(unsigned short, ushort) +CILK_C_REDUCER_OPAND_INSTANCE(int, int) +CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, uint) +CILK_C_REDUCER_OPAND_INSTANCE(unsigned int, unsigned) /* alternate name */ +CILK_C_REDUCER_OPAND_INSTANCE(long, long) +CILK_C_REDUCER_OPAND_INSTANCE(unsigned long, ulong) +CILK_C_REDUCER_OPAND_INSTANCE(long long, longlong) +CILK_C_REDUCER_OPAND_INSTANCE(unsigned long long, ulonglong) + +//@endcond __CILKRTS_END_EXTERN_C -#endif /* REDUCER_OPAND_H_INCLUDED */ +//@} + +//@} + +#endif /* REDUCER_OPAND_H_INCLUDED */ diff --git a/libcilkrts/include/cilk/reducer_opmul.h b/libcilkrts/include/cilk/reducer_opmul.h new file mode 100644 index 00000000000..47dce121ad9 --- /dev/null +++ b/libcilkrts/include/cilk/reducer_opmul.h @@ -0,0 +1,437 @@ +/* reducer_opmul.h -*- C++ -*- + * + * @copyright + * Copyright (C) 2012-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + */ + +/** @file reducer_opmul.h + * + * @brief Defines classes for doing parallel multiplication reductions. + * + * @ingroup ReducersMul + * + * @see ReducersMul + */ + +#ifndef REDUCER_OPMUL_H_INCLUDED +#define REDUCER_OPMUL_H_INCLUDED + +#include <cilk/reducer.h> + +/** @defgroup ReducersMul Multiplication Reducers + * + * Multiplication reducers allow the computation of the product of a set of + * values in parallel. + * + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file `reducers.md`, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redopmul_usage Usage Example + * + * cilk::reducer< cilk::op_mul<double> > r; + * cilk_for (int i = 0; i != N; ++i) { + * *r *= a[i]; + * } + * double product; + * r.move_out(product); + * + * @section redopmul_monoid The Monoid + * + * @subsection redopmul_monoid_values Value Set + * + * The value set of a multiplication reducer is the set of values of `Type`, + * which is expected to be a builtin numeric type (or something like it, such + * as `std::complex`). + * + * @subsection redopmul_monoid_operator Operator + * + * The operator of a multiplication reducer is the multiplication operation, + * defined by the “`*`” binary operator on `Type`. + * + * @subsection redopmul_monoid_identity Identity + * + * The identity value of the reducer is the numeric value “`1`”. This is + * expected to be the value of the expression `Type(1)`. + * + * @section redopmul_operations Operations + * + * @subsection redopmul_constructors Constructors + * + * reducer() // identity + * reducer(const Type& value) + * reducer(move_in(Type& variable)) + * + * @subsection redopmul_get_set Set and Get + * + * r.set_value(const Type& value) + * const Type& = r.get_value() const + * r.move_in(Type& variable) + * r.move_out(Type& variable) + * + * @subsection redopmul_initial Initial Values + * + * If a multiplication reducer is constructed without an explicit initial + * value, then its initial value will be its identity value, as long as `Type` + * satisfies the requirements of @ref redopmul_types. + * + * @subsection redopmul_view_ops View Operations + * + * *r *= a + * *r = *r * a + * *r = *r * a1 * a2 … * an + * + * @section redopmul_floating_point Issues with Floating-Point Types + * + * Because of overflow and underflow issues, floating-point multiplication is + * not really associative. For example, `(1e200 * 1e-200) * 1e-200 == 1e-200`, + * but `1e200 * (1e-200 * 1e-200 == 0. + * + * In many cases, this won’t matter, but computations which have been + * carefully ordered to control overflow and underflow may not deal well with + * being reassociated. In general, you should be sure to understand the + * floating-point behavior of your program before doing any transformation + * that will reassociate its computations. + * + * @section redopmul_types Type and Operator Requirements + * + * `Type` must be `Copy Constructible`, `Default Constructible`, and + * `Assignable`. + * + * The operator “`*=`” must be defined on `Type`, with `x *= a` having the same + * meaning as `x = x * a`. + * + * The expression `Type(1)` must be a valid expression which yields the + * identity value (the value of `Type` whose numeric value is `1`). + * + * @section redopmul_in_c Multiplication Reducers in C + * + * The @ref CILK_C_REDUCER_OPMUL and @ref CILK_C_REDUCER_OPMUL_TYPE macros can + * be used to do multiplication reductions in C. For example: + * + * CILK_C_REDUCER_OPMUL(r, double, 1); + * CILK_C_REGISTER_REDUCER(r); + * cilk_for(int i = 0; i != n; ++i) { + * REDUCER_VIEW(r) *= a[i]; + * } + * CILK_C_UNREGISTER_REDUCER(r); + * printf("The product of the elements of a is %f\n", REDUCER_VIEW(r)); + * + * See @ref reducers_c_predefined. + */ + +#ifdef __cplusplus + +namespace cilk { + +/** The multiplication reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_mul<Type> >`. It holds the accumulator variable + * for the reduction, and allows only multiplication operations to be + * performed on it. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `*=` operation would be used in an expression like `*r *= a`, where + * `r` is an op_mul reducer variable. + * + * @tparam Type The type of the contained accumulator variable. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. + * + * @see ReducersMul + * @see op_mul + * + * @ingroup ReducersMul + */ +template <typename Type> +class op_mul_view : public scalar_view<Type> +{ + typedef scalar_view<Type> base; + +public: + /** Class to represent the right-hand side of `*reducer = *reducer * value`. + * + * The only assignment operator for the op_mul_view class takes an + * rhs_proxy as its operand. This results in the syntactic restriction + * that the only expressions that can be assigned to an op_mul_view are + * ones which generate an rhs_proxy — that is, expressions of the form + * `op_mul_view * value ... * value`. + * + * @warning + * The lhs and rhs views in such an assignment must be the same; + * otherwise, the behavior will be undefined. (I.e., `v1 = v1 * x` is + * legal; `v1 = v2 * x` is illegal.) This condition will be checked with a + * runtime assertion when compiled in debug mode. + * + * @see op_mul_view + */ + class rhs_proxy { + friend class op_mul_view; + + const op_mul_view* m_view; + Type m_value; + + // Constructor is invoked only from op_mul_view::operator*(). + // + rhs_proxy(const op_mul_view* view, const Type& value) : m_view(view), m_value(value) {} + + rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator + rhs_proxy(); // Disable default constructor + + public: + /** Multiply by an additional rhs value. If `v` is an op_mul_view and + * `a1` is a value, then the expression `v * a1` invokes the view’s + * `operator*()` to create an rhs_proxy for `(v, a1)`; then + * `v * a1 * a2` invokes the rhs_proxy’s `operator*()` to create a + * new rhs_proxy for `(v, a1*a2)`. This allows the right-hand side of + * an assignment to be not just `view * value`, but + * `view * value * value ... * value`. The effect is that + * + * v = v * a1 * a2 ... * an; + * + * is evaluated as + * + * v = v * (a1 * a2 ... * an); + */ + rhs_proxy& operator*(const Type& x) { m_value *= x; return *this; } + }; + + + /** Default/identity constructor. This constructor initializes the + * contained value to `Type(1)`, which is expected to be the identity + * value for multiplication on `Type`. + */ + op_mul_view() : base(Type(1)) {} + + /** Construct with a specified initial value. + */ + explicit op_mul_view(const Type& v) : base(v) {} + + /** Reduction operation. + * + * This function is invoked by the @ref op_mul monoid to combine the views + * of two strands when the right strand merges with the left one. It + * multiplies the value contained in the left-strand view by the value + * contained in the right-strand view, and leaves the value in the + * right-strand view undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_mul monoid to implement the monoid + * reduce operation. + */ + void reduce(op_mul_view* right) { this->m_value *= right->m_value; } + + /** @name Accumulator variable updates. + * + * These functions support the various syntaxes for multiplying the + * accumulator variable contained in the view by some value. + */ + //@{ + + /** Multiply the accumulator variable by @a x. + */ + op_mul_view& operator*=(const Type& x) { this->m_value *= x; return *this; } + + /** Create an object representing `*this * x`. + * + * @see rhs_proxy + */ + rhs_proxy operator*(const Type& x) const { return rhs_proxy(this, x); } + + /** Assign the result of a `view * value` expression to the view. Note that + * this is the only assignment operator for this class. + * + * @see rhs_proxy + */ + op_mul_view& operator=(const rhs_proxy& rhs) { + __CILKRTS_ASSERT(this == rhs.m_view); + this->m_value *= rhs.m_value; + return *this; + } + + //@} +}; + +/** Monoid class for multiplication reductions. Instantiate the cilk::reducer + * template class with an op_mul monoid to create a multiplication reducer + * class. For example, to compute the product of a set of `double` values: + * + * cilk::reducer< cilk::op_mul<double> > r; + * + * @see ReducersMul + * @see op_mul_view + * + * @ingroup ReducersMul + */ +template <typename Type> +struct op_mul : public monoid_with_view< op_mul_view<Type> > {}; + +} // namespace cilk + +#endif // __cplusplus + + +/** @ingroup ReducersAdd + */ +//@{ + +/** @name C language reducer macros + * + * These macros are used to declare and work with numeric op_mul reducers in + * C code. + * + * @see @ref page_reducers_in_c + */ + //@{ + +__CILKRTS_BEGIN_EXTERN_C + +/** Opmul reducer type name. + * + * This macro expands into the identifier which is the name of the op_mul + * reducer type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * + * @see @ref reducers_c_predefined + * @see ReducersMul + */ +#define CILK_C_REDUCER_OPMUL_TYPE(tn) \ + __CILKRTS_MKIDENT(cilk_c_reducer_opmul_,tn) + +/** Declare an op_mul reducer object. + * + * This macro expands into a declaration of an op_mul reducer object for a + * specified numeric type. For example: + * + * CILK_C_REDUCER_OPMUL(my_reducer, double, 1.0); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * @param v The initial value for the reducer. (A value which can be + * assigned to the numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + * @see ReducersMul + */ +#define CILK_C_REDUCER_OPMUL(obj,tn,v) \ + CILK_C_REDUCER_OPMUL_TYPE(tn) obj = \ + CILK_C_INIT_REDUCER(_Typeof(obj.value), \ + __CILKRTS_MKIDENT(cilk_c_reducer_opmul_reduce_,tn), \ + __CILKRTS_MKIDENT(cilk_c_reducer_opmul_identity_,tn), \ + __cilkrts_hyperobject_noop_destroy, v) + +/// @cond internal + +/** Declare the op_mul reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which + * implement the reducer functionality for the op_mul reducer type for a + * specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPMUL_DECLARATION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r); \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn); + +/** Define the op_mul reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement + * the reducer functionality for the op_mul reducer type for a specified + * numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPMUL_DEFINITION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPMUL_TYPE(tn); \ + __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opmul,tn,l,r) \ + { *(t*)l *= *(t*)r; } \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opmul,tn) \ + { *(t*)v = 1; } + +//@{ +/** @def CILK_C_REDUCER_OPMUL_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` + * will be defined, and this macro will generate reducer implementation + * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and + * this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPMUL_DEFINITION(t,tn) +#else +# define CILK_C_REDUCER_OPMUL_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPMUL_DECLARATION(t,tn) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +CILK_C_REDUCER_OPMUL_INSTANCE(char, char) +CILK_C_REDUCER_OPMUL_INSTANCE(unsigned char, uchar) +CILK_C_REDUCER_OPMUL_INSTANCE(signed char, schar) +CILK_C_REDUCER_OPMUL_INSTANCE(wchar_t, wchar_t) +CILK_C_REDUCER_OPMUL_INSTANCE(short, short) +CILK_C_REDUCER_OPMUL_INSTANCE(unsigned short, ushort) +CILK_C_REDUCER_OPMUL_INSTANCE(int, int) +CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, uint) +CILK_C_REDUCER_OPMUL_INSTANCE(unsigned int, unsigned) /* alternate name */ +CILK_C_REDUCER_OPMUL_INSTANCE(long, long) +CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long, ulong) +CILK_C_REDUCER_OPMUL_INSTANCE(long long, longlong) +CILK_C_REDUCER_OPMUL_INSTANCE(unsigned long long, ulonglong) +CILK_C_REDUCER_OPMUL_INSTANCE(float, float) +CILK_C_REDUCER_OPMUL_INSTANCE(double, double) +CILK_C_REDUCER_OPMUL_INSTANCE(long double, longdouble) + +//@endcond + +__CILKRTS_END_EXTERN_C + +//@} + +//@} + +#endif /* REDUCER_OPMUL_H_INCLUDED */ diff --git a/libcilkrts/include/cilk/reducer_opor.h b/libcilkrts/include/cilk/reducer_opor.h index 2b2de9bc163..5472e603555 100644 --- a/libcilkrts/include/cilk/reducer_opor.h +++ b/libcilkrts/include/cilk/reducer_opor.h @@ -1,33 +1,41 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. +/* reducer_opor.h -*- C++ -*- * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ -/* - * reducer_opor.h +/** @file reducer_opor.h + * + * @brief Defines classes for doing parallel bitwise or reductions. * - * Purpose: Reducer hyperobject to compute bitwise OR values + * @ingroup ReducersOr + * + * @see ReducersOr */ #ifndef REDUCER_OPOR_H_INCLUDED @@ -35,367 +43,551 @@ #include <cilk/reducer.h> -#ifdef __cplusplus - -/* C++ Interface - * - * Purpose: Reducer hyperobject to compute bitwise OR values - * When bool is passed as 'Type', it computes logical OR - * operation. - * - * Classes: reducer_opor<Type> - * - * Description: - * ============ - * This component provides a reducer-type hyperobject representation - * that allows conducting bitwise OR operation to a non-local variable - * using the |=, | operators. A common operation - * when traversing a data structure is to bit-wise OR values - * into a non-local numeric variable. When Cilk parallelism is - * introduced, however, a data race will occur on the variable holding - * the bit-wise OR result. By replacing the variable with the - * hyperobject defined in this component, the data race is eliminated. - * - * When bool is passed as the 'Type', this reducer conducts logic OR - * operation. - * - * Usage Example: - * ============== - * Assume we wish to traverse an array of objects, performing a bit-wise OR - * operation on each object and accumulating the result of the operation - * into an integer variable. - *.. - * unsigned int compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * unsigned int result = 0; - * for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * result |= compute(myArray[i]); - * } +/** @defgroup ReducersOr Bitwise Or Reducers * - * std::cout << "The result is: " << result << std::endl; + * Bitwise and reducers allow the computation of the bitwise and of a set of + * values in parallel. * - * return 0; - * } - *.. - * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel, - * but doing so will create a data race on the 'result' variable. - * The race is solved by changing 'result' to a 'reducer_opor' hyperobject: - *.. - * unsigned int compute(const X& v); - * - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * cilk::reducer_opor<unsigned int> result; - * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * *result |= compute(myArray[i]); + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file `reducers.md`, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redopor_usage Usage Example + * + * cilk::reducer< cilk::op_or<unsigned> > r; + * cilk_for (int i = 0; i != N; ++i) { + * *r |= a[i]; * } + * unsigned result; + * r.move_out(result); * - * std::cout << "The result is: " - * << result.get_value() << std::endl; + * @section redopor_monoid The Monoid * - * return 0; - * } - * + * @subsection redopor_monoid_values Value Set + * + * The value set of a bitwise or reducer is the set of values of `Type`, which + * is expected to be a builtin integer type which has a representation as a + * sequence of bits (or something like it, such as `bool` or `std::bitset`). + * + * @subsection redopor_monoid_operator Operator + * + * The operator of a bitwise or reducer is the bitwise or operator, defined by + * the “`|`” binary operator on `Type`. + * + * @subsection redopor_monoid_identity Identity * - * Operations provided: - * ==================== - * Given 'reducer_opor' objects, x and y, the following are - * valid statements: - *.. - * *x |= 5; - * *x = *x | 5; - *.. - * The following are not valid expressions and will result in a run-time error - * in a debug build: - *.. - * x = y; // Cannot assign one reducer to another - * *x = *y | 5; // Mixed reducers - * *x = 5 | *x; // operator| is not necessarily commutative - *.. - * - * Requirements on the 'Type' parameter - * ==================================== - * The 'Type' parameter used to instantiate the 'reducer_opor' class must - * provide a |= operator that meets the requirements for an - * *associative* *mutating* *operator* as defined in the Cilk++ user manual. - * The default constructor for 'Type' must yield an OR identity, i.e., - * a value (such as unsigned int 0, bool false) that, when performed - * OR operation to any other value, yields the other value. + * The identity value of the reducer is the value whose representation + * contains all 0-bits. This is expected to be the value of the default + * constructor `Type()`. + * + * @section redopor_operations Operations + * + * @subsection redopor_constructors Constructors + * + * reducer() // identity + * reducer(const Type& value) + * reducer(move_in(Type& variable)) + * + * @subsection redopor_get_set Set and Get + * + * r.set_value(const Type& value) + * const Type& = r.get_value() const + * r.move_in(Type& variable) + * r.move_out(Type& variable) + * + * @subsection redopor_initial Initial Values + * + * If a bitwise or reducer is constructed without an explicit initial value, + * then its initial value will be its identity value, as long as `Type` + * satisfies the requirements of @ref redopor_types. + * + * @subsection redopor_view_ops View Operations + * + * *r |= a + * *r = *r | a + * *r = *r | a1 | a2 … | an + * + * @section redopor_types Type and Operator Requirements + * + * `Type` must be `Copy Constructible`, `Default Constructible`, and + * `Assignable`. + * + * The operator “`|=`” must be defined on `Type`, with `x |= a` having the + * same meaning as `x = x | a`. + * + * The expression `Type()` must be a valid expression which yields the + * identity value (the value of `Type` whose representation consists of all + * 0-bits). + * + * @section redopor_in_c Bitwise Or Reducers in C + * + * The @ref CILK_C_REDUCER_OPOR and @ref CILK_C_REDUCER_OPOR_TYPE macros can + * be used to do bitwise or reductions in C. For example: + * + * CILK_C_REDUCER_OPOR(r, uint, 0); + * CILK_C_REGISTER_REDUCER(r); + * cilk_for(int i = 0; i != n; ++i) { + * REDUCER_VIEW(r) |= a[i]; + * } + * CILK_C_UNREGISTER_REDUCER(r); + * printf("The bitwise OR of the elements of a is %x\n", REDUCER_VIEW(r)); + * + * See @ref reducers_c_predefined. */ -#include <new> +#ifdef __cplusplus -namespace cilk -{ +namespace cilk { -/** - * @brief A reducer-type hyperobject representation that supports bitwise OR - * operations on a non-local variable using the |=, | operators. +/** The bitwise or reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_or<Type> >`. It holds the accumulator variable for + * the reduction, and allows only `or` operations to be performed on it. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `|=` operation would be used in an expression like `*r |= a`, where + * `r` is an opmod reducer variable. * - * A common operation when traversing a data structure is to bit-wise OR - * values into a non-local numeric variable. When Cilk parallelism is - * introduced, however, a data race will occur on the variable holding - * the bit-wise OR result. By replacing the variable with the - * hyperobject defined in this component, the data race is eliminated. + * @tparam Type The type of the contained accumulator variable. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. * - * When bool is passed as the 'Type', this reducer conducts logic OR - * operation. + * @see ReducersOr + * @see op_or + * + * @ingroup ReducersOr */ template <typename Type> -class reducer_opor +class op_or_view : public scalar_view<Type> { - public: - /// Definition of data view, operation, and identity for reducer_opor - class Monoid : public monoid_base<Type> - { - public: - /// Combines two views of the data - static void reduce(Type* left, Type* right); + typedef scalar_view<Type> base; + +public: + /** Class to represent the right-hand side of `*reducer = *reducer | value`. + * + * The only assignment operator for the op_or_view class takes an + * rhs_proxy as its operand. This results in the syntactic restriction + * that the only expressions that can be assigned to an op_or_view are + * ones which generate an rhs_proxy — that is, expressions of the form + * `op_or_view | value ... | value`. + * + * @warning + * The lhs and rhs views in such an assignment must be the same; + * otherwise, the behavior will be undefined. (I.e., `v1 = v1 | x` is + * legal; `v1 = v2 | x` is illegal.) This condition will be checked with + * a runtime assertion when compiled in debug mode. + * + * @see op_or_view + */ + class rhs_proxy { + friend class op_or_view; + + const op_or_view* m_view; + Type m_value; + + // Constructor is invoked only from op_or_view::operator|(). + // + rhs_proxy(const op_or_view* view, const Type& value) : m_view(view), m_value(value) {} + + rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator + rhs_proxy(); // Disable default constructor + + public: + /** Bitwise or with an additional rhs value. If `v` is an op_or_view + * and `a1` is a value, then the expression `v | a1` invokes the + * view’s `operator|()` to create an rhs_proxy for `(v, a1)`; then + * `v | a1 | a2` invokes the rhs_proxy’s `operator|()` to create a new + * rhs_proxy for `(v, a1|a2)`. This allows the right-hand side of an + * assignment to be not just `view | value`, but + ( `view | value | value ... | value`. The effect is that + * + * v = v | a1 | a2 ... | an; + * + * is evaluated as + * + * v = v | (a1 | a2 ... | an); + */ + rhs_proxy& operator|(const Type& x) { m_value |= x; return *this; } }; - /// "PRIVATE" HELPER CLASS - class temp_or { - friend class reducer_opor; - - Type* valuePtr_; - // Default copy constructor, no assignment operator - temp_or& operator=(const temp_or&); + /** Default/identity constructor. This constructor initializes the + * contained value to `Type()`. + */ + op_or_view() : base() {} + + /** Construct with a specified initial value. + */ + explicit op_or_view(const Type& v) : base(v) {} + + /** Reduction operation. + * + * This function is invoked by the @ref op_or monoid to combine the views + * of two strands when the right strand merges with the left one. It + * “ors” the value contained in the left-strand view by the value + * contained in the right-strand view, and leaves the value in the + * right-strand view undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_or monoid to implement the monoid + * reduce operation. + */ + void reduce(op_or_view* right) { this->m_value |= right->m_value; } + + /** @name Accumulator variable updates. + * + * These functions support the various syntaxes for “oring” the + * accumulator variable contained in the view with some value. + */ + //@{ + + /** Or the accumulator variable with @a x. + */ + op_or_view& operator|=(const Type& x) { this->m_value |= x; return *this; } + + /** Create an object representing `*this | x`. + * + * @see rhs_proxy + */ + rhs_proxy operator|(const Type& x) const { return rhs_proxy(this, x); } + + /** Assign the result of a `view | value` expression to the view. Note that + * this is the only assignment operator for this class. + * + * @see rhs_proxy + */ + op_or_view& operator=(const rhs_proxy& rhs) { + __CILKRTS_ASSERT(this == rhs.m_view); + this->m_value |= rhs.m_value; + return *this; + } + + //@} +}; - explicit temp_or(Type* valuePtr); +/** Monoid class for bitwise or reductions. Instantiate the cilk::reducer + * template class with an op_or monoid to create a bitwise or reducer + * class. For example, to compute the bitwise or of a set of `unsigned long` + * values: + * + * cilk::reducer< cilk::op_or<unsigned long> > r; + * + * @tparam Type The reducer value type. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. + * + * @see ReducersOr + * @see op_or_view + * + * @ingroup ReducersOr + */ +template <typename Type, bool Align = false> +struct op_or : public monoid_with_view<op_or_view<Type>, Align> {}; - public: - temp_or& operator|(const Type& x); - }; +/** Deprecated bitwise or reducer class. + * + * reducer_opor is the same as @ref reducer<@ref op_or>, except that + * reducer_opor is a proxy for the contained view, so that accumulator + * variable update operations can be applied directly to the reducer. For + * example, a value is ored with a `reducer<%op_or>` with `*r |= a`, but a + * value can be ored with a `%reducer_opor` with `r |= a`. + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_opor. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_opor` + * and `reducer<%op_or>`. This allows incremental code + * conversion: old code that used `%reducer_opor` can pass a + * `%reducer_opor` to a converted function that now expects a + * pointer or reference to a `reducer<%op_or>`, and vice + * versa. + * + * @tparam Type The value type of the reducer. + * + * @see op_or + * @see reducer + * @see ReducersOr + * + * @ingroup ReducersOr + */ +template <typename Type> +class reducer_opor : public reducer< op_or<Type, true> > +{ + typedef reducer< op_or<Type, true> > base; + using base::view; public: - - /// Construct an 'reducer_opor' object with a value of 'Type()'. - reducer_opor(); - - /// Construct an 'reducer_opor' object with the specified initial value. - explicit reducer_opor(const Type& initial_value); - - /// Return a const reference to the current value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Type& get_value() const; - - /// Set the value of this object. - /// - /// @warning: Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Type& value); - - /// OR 'x' to the value of this reducer and produce a temporary and object. - /// The temporary and can be used for additional bit-wise operations - /// or assigned back to this reducer. - temp_or operator|(const Type& x) const; - - /// OR 'x' to the value of this object. - reducer_opor& operator|=(const Type& x); - - /// Merge the result of OR operation into this object. The OR operation - /// must involve this reducer, i.e., x = x + 5; not x = y + 5; - reducer_opor& operator=(const temp_or& temp); - + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view’s rhs proxy type. + typedef typename view_type::rhs_proxy rhs_proxy; + + /// The view type for the reducer. + typedef view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /** @name Constructors + */ + //@{ + + /** Default (identity) constructor. + * + * Constructs the wrapper with the default initial value of `Type()`. + */ + reducer_opor() {} + + /** Value constructor. + * + * Constructs the wrapper with a specified initial value. + */ + explicit reducer_opor(const Type& initial_value) : base(initial_value) {} + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_and_view. */ + //@{ + + /// @copydoc op_or_view::operator|=(const Type&) + reducer_opor& operator|=(const Type& x) + { + view() |= x; return *this; + } + + // The legacy definition of reducer_opor::operator|() has different + // behavior and a different return type than this definition. The legacy + // version is defined as a member function, so this new version is defined + // as a free function to give it a different signature, so that they won’t + // end up sharing a single object file entry. + + /// @copydoc op_or_view::operator|(const Type&) const + friend rhs_proxy operator|(const reducer_opor& r, const Type& x) + { + return r.view() | x; + } + + /// @copydoc op_and_view::operator=(const rhs_proxy&) + reducer_opor& operator=(const rhs_proxy& temp) + { + view() = temp; return *this; + } + //@} + + /** @name Dereference + * @details Dereferencing a wrapper is a no-op. It simply returns the + * wrapper. Combined with the rule that the wrapper forwards view + * operations to its contained view, this means that view operations can + * be written the same way on reducers and wrappers, which is convenient + * for incrementally converting old code using wrappers to use reducers + * instead. That is: + * + * reducer< op_and<int> > r; + * *r &= a; // *r returns the view + * // operator &= is a view member function + * + * reducer_opand<int> w; + * *w &= a; // *w returns the wrapper + * // operator &= is a wrapper member function that + * // calls the corresponding view function + */ + //@{ reducer_opor& operator*() { return *this; } reducer_opor const& operator*() const { return *this; } reducer_opor* operator->() { return this; } reducer_opor const* operator->() const { return this; } - - private: - friend class temp_or; - - // Hyperobject to serve up views - reducer<Monoid> imp_; - - // Not copyable - reducer_opor(const reducer_opor&); - reducer_opor& operator=(const reducer_opor&); + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_or<Type, false> >& () + { + return *reinterpret_cast< reducer< op_or<Type, false> >* >(this); + } + operator const reducer< op_or<Type, false> >& () const + { + return *reinterpret_cast< const reducer< op_or<Type, false> >* >(this); + } + //@} + }; -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// ------------------------------------ -// template class reducer_opor::Monoid -// ------------------------------------ - -template <typename Type> -void -reducer_opor<Type>::Monoid::reduce(Type* left, Type* right) -{ - *left |= *right; -} - -// ---------------------------- -// template class reducer_opor -// ---------------------------- - -template <typename Type> -inline -reducer_opor<Type>::reducer_opor() - : imp_(Type()) -{ -} - -template <typename Type> -inline -reducer_opor<Type>::reducer_opor(const Type& initial_value) - : imp_(initial_value) -{ -} - -template <typename Type> -inline -const Type& reducer_opor<Type>::get_value() const -{ - return imp_.view(); -} - -template <typename Type> -inline -void reducer_opor<Type>::set_value(const Type& value) -{ - imp_.view() = value; -} - -template <typename Type> -inline -typename reducer_opor<Type>::temp_or -reducer_opor<Type>::operator|(const Type& x) const -{ - Type* valuePtr = const_cast<Type*>(&imp_.view()); - *valuePtr = *valuePtr | x; - return temp_or(valuePtr); -} - -template <typename Type> -inline -reducer_opor<Type>& reducer_opor<Type>::operator|=(const Type& x) -{ - imp_.view() |= x; - return *this; -} - -template <typename Type> -inline -reducer_opor<Type>& -reducer_opor<Type>::operator=( - const typename reducer_opor<Type>::temp_or& temp) -{ - // No-op. Just test that temp was constructed from this. - __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_); - return *this; -} - -// -------------------------------------- -// template class reducer_opor::temp_or -// -------------------------------------- - -template <typename Type> -inline -reducer_opor<Type>::temp_or::temp_or(Type *valuePtr) - : valuePtr_(valuePtr) -{ -} - -template <typename Type> -inline -typename reducer_opor<Type>::temp_or& -reducer_opor<Type>::temp_or::operator|(const Type& x) +/// @cond internal +/** Metafunction specialization for reducer conversion. + * + * This specialization of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes the `reducer< op_or<Type> >` class to have an + * `operator reducer_opor<Type>& ()` conversion operator that statically + * downcasts the `reducer<op_or>` to the corresponding `reducer_opor` type. + * (The reverse conversion, from `reducer_opor` to `reducer<op_or>`, is just + * an upcast, which is provided for free by the language.) + * + * @ingroup ReducersOr + */ +template <typename Type, bool Align> +struct legacy_reducer_downcast<reducer<op_or<Type, Align> > > { - *valuePtr_ = *valuePtr_ | x; - return *this; -} + typedef reducer_opor<Type> type; +}; +/// @endcond } // namespace cilk #endif /* __cplusplus */ -/* C Interface + +/** @ingroup ReducersOr */ +//@{ +/** @name C language reducer macros + * + * These macros are used to declare and work with op_or reducers in C code. + * + * @see @ref page_reducers_in_c + */ + //@{ + __CILKRTS_BEGIN_EXTERN_C +/** Opor reducer type name. + * + * This macro expands into the identifier which is the name of the op_or + * reducer type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * + * @see @ref reducers_c_predefined + * @see ReducersOr + */ #define CILK_C_REDUCER_OPOR_TYPE(tn) \ __CILKRTS_MKIDENT(cilk_c_reducer_opor_,tn) + +/** Declare an op_or reducer object. + * + * This macro expands into a declaration of an op_or reducer object for a + * specified numeric type. For example: + * + * CILK_C_REDUCER_OPOR(my_reducer, ulong, 0); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * @param v The initial value for the reducer. (A value which can be + * assigned to the numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + * @see ReducersOr + */ #define CILK_C_REDUCER_OPOR(obj,tn,v) \ CILK_C_REDUCER_OPOR_TYPE(tn) obj = \ - CILK_C_INIT_REDUCER(_Typeof(obj.value), \ + CILK_C_INIT_REDUCER(_Typeof(obj.value), \ __CILKRTS_MKIDENT(cilk_c_reducer_opor_reduce_,tn), \ __CILKRTS_MKIDENT(cilk_c_reducer_opor_identity_,tn), \ __cilkrts_hyperobject_noop_destroy, v) -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opor_,tn); \ +/// @cond internal + +/** Declare the op_or reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which + * implement the reducer functionality for the op_or reducer type for a + * specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPOR_DECLARATION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn); - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_OPOR_INSTANCE(char,char); -CILK_C_REDUCER_OPOR_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_OPOR_INSTANCE(signed char,schar); -CILK_C_REDUCER_OPOR_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_OPOR_INSTANCE(short,short); -CILK_C_REDUCER_OPOR_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_OPOR_INSTANCE(int,int); -CILK_C_REDUCER_OPOR_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_OPOR_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_OPOR_INSTANCE(long,long); -CILK_C_REDUCER_OPOR_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_OPOR_INSTANCE(long long,longlong); -CILK_C_REDUCER_OPOR_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_OPOR_INSTANCE(float,float); -CILK_C_REDUCER_OPOR_INSTANCE(double,double); -CILK_C_REDUCER_OPOR_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPOR_IMP(t,tn) \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn); + +/** Define the op_or reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement + * the reducer functionality for the op_or reducer type for a specified + * numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPOR_DEFINITION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPOR_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opor,tn,l,r) \ { *(t*)l |= *(t*)r; } \ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opor,tn) \ - { *(t*)v = (t)0; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric tyeps. The contents of reducer_opor.c are as follows: - -CILK_C_REDUCER_OPOR_IMP(char,char) -CILK_C_REDUCER_OPOR_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPOR_IMP(signed char,schar) -CILK_C_REDUCER_OPOR_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPOR_IMP(short,short) -CILK_C_REDUCER_OPOR_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPOR_IMP(int,int) -CILK_C_REDUCER_OPOR_IMP(unsigned int,uint) -CILK_C_REDUCER_OPOR_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPOR_IMP(long,long) -CILK_C_REDUCER_OPOR_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPOR_IMP(long long,longlong) -CILK_C_REDUCER_OPOR_IMP(unsigned long long,ulonglong) - -*/ + { *(t*)v = 0; } + +//@{ +/** @def CILK_C_REDUCER_OPOR_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` + * will be defined, and this macro will generate reducer implementation + * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and + * this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPOR_DEFINITION(t,tn) +#else +# define CILK_C_REDUCER_OPOR_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPOR_DECLARATION(t,tn) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +CILK_C_REDUCER_OPOR_INSTANCE(char, char) +CILK_C_REDUCER_OPOR_INSTANCE(unsigned char, uchar) +CILK_C_REDUCER_OPOR_INSTANCE(signed char, schar) +CILK_C_REDUCER_OPOR_INSTANCE(wchar_t, wchar_t) +CILK_C_REDUCER_OPOR_INSTANCE(short, short) +CILK_C_REDUCER_OPOR_INSTANCE(unsigned short, ushort) +CILK_C_REDUCER_OPOR_INSTANCE(int, int) +CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, uint) +CILK_C_REDUCER_OPOR_INSTANCE(unsigned int, unsigned) /* alternate name */ +CILK_C_REDUCER_OPOR_INSTANCE(long, long) +CILK_C_REDUCER_OPOR_INSTANCE(unsigned long, ulong) +CILK_C_REDUCER_OPOR_INSTANCE(long long, longlong) +CILK_C_REDUCER_OPOR_INSTANCE(unsigned long long, ulonglong) + +//@endcond __CILKRTS_END_EXTERN_C +//@} + +//@} + #endif /* REDUCER_OPOR_H_INCLUDED */ diff --git a/libcilkrts/include/cilk/reducer_opxor.h b/libcilkrts/include/cilk/reducer_opxor.h index 8aad7c052fa..5e128e7f2c7 100644 --- a/libcilkrts/include/cilk/reducer_opxor.h +++ b/libcilkrts/include/cilk/reducer_opxor.h @@ -1,33 +1,41 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. +/* reducer_opxor.h -*- C++ -*- * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ -/* - * reducer_opxor.h +/** @file reducer_opxor.h + * + * @brief Defines classes for doing parallel bitwise or reductions. * - * Purpose: Reducer hyperobject to compute bitwise XOR values + * @ingroup ReducersXor + * + * @see ReducersXor */ #ifndef REDUCER_OPXOR_H_INCLUDED @@ -35,308 +43,469 @@ #include <cilk/reducer.h> -#ifdef __cplusplus - -/* C++ interface - * - * Purpose: Reducer hyperobject to compute bitwise XOR values - * When bool is passed as 'Type', it computes logical XOR - * operation. - * - * Classes: reducer_opxxor<Type> - * - * Description: - * ============ - * This component provides a reducer-type hyperobject representation - * that allows conducting bitwise XOR operation to a non-local variable - * using the ^=, ^ operators. A common operation - * when traversing a data structure is to bit-wise XOR values - * into a non-local numeric variable. When Cilk parallelism is - * introduced, however, a data race will occur on the variable holding - * the bit-wise XOR result. By replacing the variable with the - * hyperobject defined in this component, the data race is eliminated. - * - * When bool is passed as the 'Type', this reducer conducts logic XOR - * operation. - * - * Usage Example: - * ============== - * Assume we wish to traverse an array of objects, performing a bit-wise XOR - * operation on each object and accumulating the result of the operation - * into an integer variable. - *.. - * unsigned int compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * unsigned int result = 0; - * for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * result ^= compute(myArray[i]); - * } +/** @defgroup ReducersXor Bitwise Xor Reducers * - * std::cout << "The result is: " << result << std::endl; + * Bitwise and reducers allow the computation of the bitwise and of a set of + * values in parallel. * - * return 0; - * } - *.. - * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel, - * but doing so will create a data race on the 'result' variable. - * The race is solved by changing 'result' to a 'reducer_opxor' hyperobject: - *.. - * unsigned int compute(const X& v); - * - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * cilk::reducer_opxor<unsigned int> result; - * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * *result ^= compute(myArray[i]); + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file `reducers.md`, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redopxor_usage Usage Example + * + * cilk::reducer< cilk::op_xor<unsigned> > r; + * cilk_for (int i = 0; i != N; ++i) { + * *r ^= a[i]; * } + * unsigned result; + * r.move_out(result); * - * std::cout << "The result is: " - * << result.get_value() << std::endl; + * @section redopxor_monoid The Monoid * - * return 0; - * } - * + * @subsection redopxor_monoid_values Value Set + * + * The value set of a bitwise xor reducer is the set of values of `Type`, which + * is expected to be a builtin integer type which has a representation as a + * sequence of bits (or something like it, such as `bool` or `std::bitset`). + * + * @subsection redopxor_monoid_operator Operator + * + * The operator of a bitwise xor reducer is the bitwise xor operator, defined + * by the “`^`” binary operator on `Type`. + * + * @subsection redopxor_monoid_identity Identity * - * Operations provided: - * ==================== - * Given 'reducer_opxor' objects, x and y, the following are - * valid statements: - *.. - * x ^= 5; - * x = x ^ 5; - *.. - * The following are not valid expressions and will result in a run-time error - * in a debug build: - *.. - * x = y; // Cannot assign one reducer to another - * x = y ^ 5; // Mixed reducers - * x = 5 ^ x; // operator^ is not necessarily commutative - *.. - * - * Requirements on the 'Type' parameter - * ==================================== - * The 'Type' parameter used to instantiate the 'reducer_opxor' class must - * provide a ^= operator that meets the requirements for an - * *associative* *mutating* *operator* as defined in the Cilk++ user manual. - * The default constructor for 'Type' must yield an XOR identity, i.e., - * a value (such as unsigned int 0, bool false) that, when performed - * XOR operation to any other value, yields the other value. + * The identity value of the reducer is the value whose representation + * contains all 0-bits. This is expected to be the value of the default + * constructor `Type()`. + * + * @section redopxor_operations Operations + * + * @subsection redopxor_constructors Constructors + * + * reducer() // identity + * reducer(const Type& value) + * reducer(move_in(Type& variable)) + * + * @subsection redopxor_get_set Set and Get + * + * r.set_value(const Type& value) + * const Type& = r.get_value() const + * r.move_in(Type& variable) + * r.move_out(Type& variable) + * + * @subsection redopxor_initial Initial Values + * + * If a bitwise xor reducer is constructed without an explicit initial value, + * then its initial value will be its identity value, as long as `Type` + * satisfies the requirements of @ref redopxor_types. + * + * @subsection redopxor_view_ops View Operations + * + * *r ^= a + * *r = *r ^ a + * *r = *r ^ a1 ^ a2 … ^ an + * + * @section redopxor_types Type and Operator Requirements + * + * `Type` must be `Copy Constructible`, `Default Constructible`, and + * `Assignable`. + * + * The operator “`^=`” must be defined on `Type`, with `x ^= a` having the + * same meaning as `x = x ^ a`. + * + * The expression `Type()` must be a valid expression which yields the + * identity value (the value of `Type` whose representation consists of all + * 0-bits). + * + * @section redopxor_in_c Bitwise Xor Reducers in C + * + * The @ref CILK_C_REDUCER_OPXOR and @ref CILK_C_REDUCER_OPXOR_TYPE macros can + * be used to do bitwise xor reductions in C. For example: + * + * CILK_C_REDUCER_OPXOR(r, uint, 0); + * CILK_C_REGISTER_REDUCER(r); + * cilk_for(int i = 0; i != n; ++i) { + * REDUCER_VIEW(r) ^= a[i]; + * } + * CILK_C_UNREGISTER_REDUCER(r); + * printf("The bitwise XOR of the elements of a is %x\n", REDUCER_VIEW(r)); + * + * See @ref reducers_c_predefined. */ -#include <new> +#ifdef __cplusplus namespace cilk { -/** - * @brief A reducer-type hyperobject representation that supports bitwise XOR - * operations to a non-local variable using the ^=, ^ operators. +/** The bitwise xor reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_xor<Type> >`. It holds the accumulator variable + * for the reduction, and allows only `xor` operations to be performed on it. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `^=` operation would be used in an expression like `*r ^= a`, where + * `r` is an opmod reducer variable. + * + * @tparam Type The type of the contained accumulator variable. This will + * be the value type of a monoid_with_view that is + * instantiated with this view. * - * A common operation when traversing a data structure is to bit-wise XOR - * values into a non-local numeric variable. When Cilk parallelism is - * introduced, however, a data race will occur on the variable holding - * the bit-wise XOR result. By replacing the variable with the - * hyperobject defined in this component, the data race is eliminated. + * @see ReducersXor + * @see op_xor * - * When bool is passed as the 'Type', this reducer conducts logic XOR - * operation. + * @ingroup ReducersXor */ template <typename Type> -class reducer_opxor +class op_xor_view : public scalar_view<Type> { - public: - /// Definition of data view, operation, and identity for reducer_opxor - class Monoid : public monoid_base<Type> - { + typedef scalar_view<Type> base; + +public: + /** Class to represent the right-hand side of `*reducer = *reducer ^ value`. + * + * The only assignment operator for the op_xor_view class takes an + * rhs_proxy as its operand. This results in the syntactic restriction + * that the only expressions that can be assigned to an op_xor_view are + * ones which generate an rhs_proxy — that is, expressions of the form + * `op_xor_view ^ value ... ^ value`. + * + * @warning + * The lhs and rhs views in such an assignment must be the same; + * otherwise, the behavior will be undefined. (I.e., `v1 = v1 ^ x` is + * legal; `v1 = v2 ^ x` is illegal.) This condition will be checked with + * a runtime assertion when compiled in debug mode. + * + * @see op_xor_view + */ + class rhs_proxy { + friend class op_xor_view; + + const op_xor_view* m_view; + Type m_value; + + // Constructor is invoked only from op_xor_view::operator^(). + // + rhs_proxy(const op_xor_view* view, const Type& value) : m_view(view), m_value(value) {} + + rhs_proxy& operator=(const rhs_proxy&); // Disable assignment operator + rhs_proxy(); // Disable default constructor + public: - /// Combines two views of the data - static void reduce(Type* left, Type* right); + /** Bitwise xor with an additional rhs value. If `v` is an op_xor_view + * and `a1` is a value, then the expression `v ^ a1` invokes the + * view’s `operator^()` to create an rhs_proxy for `(v, a1)`; then + * `v ^ a1 ^ a2` invokes the rhs_proxy’s `operator^()` to create a new + * rhs_proxy for `(v, a1^a2)`. This allows the right-hand side of an + * assignment to be not just `view ^ value`, but + ( `view ^ value ^ value ... ^ value`. The effect is that + * + * v = v ^ a1 ^ a2 ... ^ an; + * + * is evaluated as + * + * v = v ^ (a1 ^ a2 ... ^ an); + */ + rhs_proxy& operator^(const Type& x) { m_value ^= x; return *this; } }; - /// "PRIVATE" HELPER CLASS - class temp_xor { - friend class reducer_opxor; - - Type* valuePtr_; - // Default copy constructor, no assignment operator - temp_xor& operator=(const temp_xor&); + /** Default/identity constructor. This constructor initializes the + * contained value to `Type()`. + */ + op_xor_view() : base() {} + + /** Construct with a specified initial value. + */ + explicit op_xor_view(const Type& v) : base(v) {} + + /** Reduction operation. + * + * This function is invoked by the @ref op_xor monoid to combine the views + * of two strands when the right strand merges with the left one. It + * “xors” the value contained in the left-strand view by the value + * contained in the right-strand view, and leaves the value in the + * right-strand view undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_xor monoid to implement the monoid + * reduce operation. + */ + void reduce(op_xor_view* right) { this->m_value ^= right->m_value; } + + /** @name Accumulator variable updates. + * + * These functions support the various syntaxes for “xoring” the + * accumulator variable contained in the view with some value. + */ + //@{ + + /** Xor the accumulator variable with @a x. + */ + op_xor_view& operator^=(const Type& x) { this->m_value ^= x; return *this; } + + /** Create an object representing `*this ^ x`. + * + * @see rhs_proxy + */ + rhs_proxy operator^(const Type& x) const { return rhs_proxy(this, x); } + + /** Assign the result of a `view ^ value` expression to the view. Note that + * this is the only assignment operator for this class. + * + * @see rhs_proxy + */ + op_xor_view& operator=(const rhs_proxy& rhs) { + __CILKRTS_ASSERT(this == rhs.m_view); + this->m_value ^= rhs.m_value; + return *this; + } + + //@} +}; - explicit temp_xor(Type* valuePtr); +/** Monoid class for bitwise xor reductions. Instantiate the cilk::reducer + * template class with an op_xor monoid to create a bitwise xor reducer + * class. For example, to compute the bitwise xor of a set of `unsigned long` + * values: + * + * cilk::reducer< cilk::op_xor<unsigned long> > r; + * + * @tparam Type The reducer value type. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. + * + * @see ReducersXor + * @see op_xor_view + * + * @ingroup ReducersXor + */ +template <typename Type, bool Align = false> +struct op_xor : public monoid_with_view<op_xor_view<Type>, Align> {}; - public: - temp_xor& operator^(const Type& x); - }; +/** Deprecated bitwise xor reducer class. + * + * reducer_opxor is the same as @ref reducer<@ref op_xor>, except that + * reducer_opxor is a proxy for the contained view, so that accumulator + * variable update operations can be applied directly to the reducer. For + * example, a value is xored with a `reducer<%op_xor>` with `*r ^= a`, but a + * value can be xored with a `%reducer_opxor` with `r ^= a`. + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_opand. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_opxor` + * and `reducer<%op_xor>`. This allows incremental code + * conversion: old code that used `%reducer_opxor` can pass a + * `%reducer_opxor` to a converted function that now expects a + * pointer or reference to a `reducer<%op_xor>`, and vice + * versa. + * + * @tparam Type The value type of the reducer. + * + * @see op_xor + * @see reducer + * @see ReducersXor + * + * @ingroup ReducersXor + */ +template <typename Type> +class reducer_opxor : public reducer< op_xor<Type, true> > +{ + typedef reducer< op_xor<Type, true> > base; + using base::view; public: - - /// Construct an 'reducer_opxor' object with a value of 'Type()'. - reducer_opxor(); - - /// Construct an 'reducer_opxor' object with the specified initial value. - explicit reducer_opxor(const Type& initial_value); - - /// Return a const reference to the current value of this object. - /// - /// @warning If this method is called before the parallel calculation is - /// complete, the value returned by this method will be a partial result. - const Type& get_value() const; - - /// Set the value of this object. - /// - /// @warning: Setting the value of a reducer such that it violates the - /// associative operation algebra will yield results that are likely to - /// differ from serial execution and may differ from run to run. - void set_value(const Type& value); - - /// XOR 'x' to the value of this reducer and produce a temporary and object. - /// The temporary and can be used for additional bit-wise operations - /// or assigned back to this reducer. - temp_xor operator^(const Type& x) const; - - /// XOR 'x' to the value of this object. - reducer_opxor& operator^=(const Type& x); - - /// Merge the result of XOR operation into this object. The XOR operation - /// must involve this reducer, i.e., x = x + 5; not x = y + 5; - reducer_opxor& operator=(const temp_xor& temp); - + /// The view type for the reducer. + typedef typename base::view_type view_type; + + /// The view’s rhs proxy type. + typedef typename view_type::rhs_proxy rhs_proxy; + + /// The view type for the reducer. + typedef view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + /** @name Constructors + */ + //@{ + + /** Default (identity) constructor. + * + * Constructs the wrapper with the default initial value of `Type()`. + */ + reducer_opxor() {} + + /** Value constructor. + * + * Constructs the wrapper with a specified initial value. + */ + explicit reducer_opxor(const Type& initial_value) : base(initial_value) {} + + //@} + + /** @name Forwarded functions + * @details Functions that update the contained accumulator variable are + * simply forwarded to the contained @ref op_and_view. */ + //@{ + + /// @copydoc op_xor_view::operator^=(const Type&) + reducer_opxor& operator^=(const Type& x) + { + view() ^= x; return *this; + } + + // The legacy definition of reducer_opxor::operator^() has different + // behavior and a different return type than this definition. The legacy + // version is defined as a member function, so this new version is defined + // as a free function to give it a different signature, so that they won’t + // end up sharing a single object file entry. + + /// @copydoc op_xor_view::operator^(const Type&) const + friend rhs_proxy operator^(const reducer_opxor& r, const Type& x) + { + return r.view() ^ x; + } + + /// @copydoc op_and_view::operator=(const rhs_proxy&) + reducer_opxor& operator=(const rhs_proxy& temp) + { + view() = temp; return *this; + } + //@} + + /** @name Dereference + * @details Dereferencing a wrapper is a no-op. It simply returns the + * wrapper. Combined with the rule that the wrapper forwards view + * operations to its contained view, this means that view operations can + * be written the same way on reducers and wrappers, which is convenient + * for incrementally converting old code using wrappers to use reducers + * instead. That is: + * + * reducer< op_and<int> > r; + * *r &= a; // *r returns the view + * // operator &= is a view member function + * + * reducer_opand<int> w; + * *w &= a; // *w returns the wrapper + * // operator &= is a wrapper member function that + * // calls the corresponding view function + */ + //@{ reducer_opxor& operator*() { return *this; } reducer_opxor const& operator*() const { return *this; } reducer_opxor* operator->() { return this; } reducer_opxor const* operator->() const { return this; } - - private: - friend class temp_or; - - // Hyperobject to serve up views - reducer<Monoid> imp_; - - // Not copyable - reducer_opxor(const reducer_opxor&); - reducer_opxor& operator=(const reducer_opxor&); + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_xor<Type, false> >& () + { + return *reinterpret_cast< reducer< op_xor<Type, false> >* >(this); + } + operator const reducer< op_xor<Type, false> >& () const + { + return *reinterpret_cast< const reducer< op_xor<Type, false> >* >(this); + } + //@} + }; -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// - -// ------------------------------------ -// template class reducer_opxor::Monoid -// ------------------------------------ - -template <typename Type> -void -reducer_opxor<Type>::Monoid::reduce(Type* left, Type* right) -{ - *left ^= *right; -} - -// ---------------------------- -// template class reducer_opxor -// ---------------------------- - -template <typename Type> -inline -reducer_opxor<Type>::reducer_opxor() - : imp_(Type()) -{ -} - -template <typename Type> -inline -reducer_opxor<Type>::reducer_opxor(const Type& initial_value) - : imp_(initial_value) -{ -} - -template <typename Type> -inline -const Type& reducer_opxor<Type>::get_value() const -{ - return imp_.view(); -} - -template <typename Type> -inline -void reducer_opxor<Type>::set_value(const Type& value) -{ - imp_.view() = value; -} - -template <typename Type> -inline -typename reducer_opxor<Type>::temp_xor -reducer_opxor<Type>::operator^(const Type& x) const -{ - Type* valuePtr = const_cast<Type*>(&imp_.view()); - *valuePtr = *valuePtr ^ x; - return temp_xor(valuePtr); -} - -template <typename Type> -inline -reducer_opxor<Type>& reducer_opxor<Type>::operator^=(const Type& x) -{ - imp_.view() ^= x; - return *this; -} - -template <typename Type> -inline -reducer_opxor<Type>& -reducer_opxor<Type>::operator=( - const typename reducer_opxor<Type>::temp_xor& temp) -{ - // No-op. Just test that temp was constructed from this. - __CILKRTS_ASSERT(&imp_.view() == temp.valuePtr_); - return *this; -} - -// -------------------------------------- -// template class reducer_opxor::temp_xor -// -------------------------------------- - -template <typename Type> -inline -reducer_opxor<Type>::temp_xor::temp_xor(Type *valuePtr) - : valuePtr_(valuePtr) -{ -} - -template <typename Type> -inline -typename reducer_opxor<Type>::temp_xor& -reducer_opxor<Type>::temp_xor::operator^(const Type& x) +/// @cond internal +/** Metafunction specialization for reducer conversion. + * + * This specialization of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes the `reducer< op_xor<Type> >` class to have an + * `operator reducer_opxor<Type>& ()` conversion operator that statically + * downcasts the `reducer<op_xor>` to the corresponding `reducer_opxor` type. + * (The reverse conversion, from `reducer_opxor` to `reducer<op_xor>`, is just + * an upcast, which is provided for free by the language.) + * + * @ingroup ReducersXor + */ +template <typename Type, bool Align> +struct legacy_reducer_downcast<reducer<op_xor<Type, Align> > > { - *valuePtr_ = *valuePtr_ ^ x; - return *this; -} + typedef reducer_opxor<Type> type; +}; +/// @endcond } // namespace cilk #endif /* __cplusplus */ -/* C Interface + +/** @ingroup ReducersXor */ +//@{ +/** @name C language reducer macros + * + * These macros are used to declare and work with op_xor reducers in C code. + * + * @see @ref page_reducers_in_c + */ + //@{ + __CILKRTS_BEGIN_EXTERN_C +/** Opxor reducer type name. + * + * This macro expands into the identifier which is the name of the op_xor + * reducer type for a specified numeric type. + * + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * + * @see @ref reducers_c_predefined + * @see ReducersXor + */ #define CILK_C_REDUCER_OPXOR_TYPE(tn) \ __CILKRTS_MKIDENT(cilk_c_reducer_opxor_,tn) + +/** Declare an op_xor reducer object. + * + * This macro expands into a declaration of an op_xor reducer object for a + * specified numeric type. For example: + * + * CILK_C_REDUCER_OPXOR(my_reducer, ulong, 0); + * + * @param obj The variable name to be used for the declared reducer object. + * @param tn The @ref reducers_c_type_names "numeric type name" specifying + * the type of the reducer. + * @param v The initial value for the reducer. (A value which can be + * assigned to the numeric type represented by @a tn.) + * + * @see @ref reducers_c_predefined + * @see ReducersXor + */ #define CILK_C_REDUCER_OPXOR(obj,tn,v) \ CILK_C_REDUCER_OPXOR_TYPE(tn) obj = \ CILK_C_INIT_REDUCER(_Typeof(obj.value), \ @@ -344,57 +513,81 @@ __CILKRTS_BEGIN_EXTERN_C __CILKRTS_MKIDENT(cilk_c_reducer_opxor_identity_,tn), \ __cilkrts_hyperobject_noop_destroy, v) -/* Declare an instance of the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \ - typedef CILK_C_DECLARE_REDUCER(t) \ - __CILKRTS_MKIDENT(cilk_c_reducer_opxor_,tn); \ +/// @cond internal + +/** Declare the op_xor reducer functions for a numeric type. + * + * This macro expands into external function declarations for functions which + * implement the reducer functionality for the op_xor reducer type for a + * specified numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPXOR_DECLARATION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r); \ - __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn); - -/* Declare an instance of the reducer type for each numeric type */ -CILK_C_REDUCER_OPXOR_INSTANCE(char,char); -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned char,uchar); -CILK_C_REDUCER_OPXOR_INSTANCE(signed char,schar); -CILK_C_REDUCER_OPXOR_INSTANCE(wchar_t,wchar_t); -CILK_C_REDUCER_OPXOR_INSTANCE(short,short); -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned short,ushort); -CILK_C_REDUCER_OPXOR_INSTANCE(int,int); -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int,uint); -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int,unsigned); /* alternate name */ -CILK_C_REDUCER_OPXOR_INSTANCE(long,long); -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long,ulong); -CILK_C_REDUCER_OPXOR_INSTANCE(long long,longlong); -CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long long,ulonglong); -CILK_C_REDUCER_OPXOR_INSTANCE(float,float); -CILK_C_REDUCER_OPXOR_INSTANCE(double,double); -CILK_C_REDUCER_OPXOR_INSTANCE(long double,longdouble); - -/* Declare function bodies for the reducer for a specific numeric type */ -#define CILK_C_REDUCER_OPXOR_IMP(t,tn) \ + __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn); + +/** Define the op_xor reducer functions for a numeric type. + * + * This macro expands into function definitions for functions which implement + * the reducer functionality for the op_xor reducer type for a specified + * numeric type. + * + * @param t The value type of the reducer. + * @param tn The value “type name” identifier, used to construct the reducer + * type name, function names, etc. + */ +#define CILK_C_REDUCER_OPXOR_DEFINITION(t,tn) \ + typedef CILK_C_DECLARE_REDUCER(t) CILK_C_REDUCER_OPXOR_TYPE(tn); \ __CILKRTS_DECLARE_REDUCER_REDUCE(cilk_c_reducer_opxor,tn,l,r) \ { *(t*)l ^= *(t*)r; } \ __CILKRTS_DECLARE_REDUCER_IDENTITY(cilk_c_reducer_opxor,tn) \ - { *(t*)v = (t)0; } - -/* c_reducers.c contains definitions for all of the monoid functions - for the C numeric tyeps. The contents of reducer_opxor.c are as follows: - -CILK_C_REDUCER_OPXOR_IMP(char,char) -CILK_C_REDUCER_OPXOR_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPXOR_IMP(signed char,schar) -CILK_C_REDUCER_OPXOR_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPXOR_IMP(short,short) -CILK_C_REDUCER_OPXOR_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPXOR_IMP(int,int) -CILK_C_REDUCER_OPXOR_IMP(unsigned int,uint) -CILK_C_REDUCER_OPXOR_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPXOR_IMP(long,long) -CILK_C_REDUCER_OPXOR_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPXOR_IMP(long long,longlong) -CILK_C_REDUCER_OPXOR_IMP(unsigned long long,ulonglong) - -*/ + { *(t*)v = 0; } + +//@{ +/** @def CILK_C_REDUCER_OPXOR_INSTANCE + * @brief Declare or define implementation functions for a reducer type. + * + * In the runtime source file c_reducers.c, the macro `CILK_C_DEFINE_REDUCERS` + * will be defined, and this macro will generate reducer implementation + * functions. Everywhere else, `CILK_C_DEFINE_REDUCERS` will be undefined, and + * this macro will expand into external declarations for the functions. + */ +#ifdef CILK_C_DEFINE_REDUCERS +# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPXOR_DEFINITION(t,tn) +#else +# define CILK_C_REDUCER_OPXOR_INSTANCE(t,tn) \ + CILK_C_REDUCER_OPXOR_DECLARATION(t,tn) +#endif +//@} + +/* Declare or define an instance of the reducer type and its functions for each + * numeric type. + */ +CILK_C_REDUCER_OPXOR_INSTANCE(char, char) +CILK_C_REDUCER_OPXOR_INSTANCE(unsigned char, uchar) +CILK_C_REDUCER_OPXOR_INSTANCE(signed char, schar) +CILK_C_REDUCER_OPXOR_INSTANCE(wchar_t, wchar_t) +CILK_C_REDUCER_OPXOR_INSTANCE(short, short) +CILK_C_REDUCER_OPXOR_INSTANCE(unsigned short, ushort) +CILK_C_REDUCER_OPXOR_INSTANCE(int, int) +CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, uint) +CILK_C_REDUCER_OPXOR_INSTANCE(unsigned int, unsigned) /* alternate name */ +CILK_C_REDUCER_OPXOR_INSTANCE(long, long) +CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long, ulong) +CILK_C_REDUCER_OPXOR_INSTANCE(long long, longlong) +CILK_C_REDUCER_OPXOR_INSTANCE(unsigned long long, ulonglong) + +//@endcond __CILKRTS_END_EXTERN_C -#endif // REDUCER_OPXOR_H_INCLUDED +//@} + +//@} + +#endif /* REDUCER_OPXOR_H_INCLUDED */ diff --git a/libcilkrts/include/cilk/reducer_ostream.h b/libcilkrts/include/cilk/reducer_ostream.h index 9d57824bce4..d64c740c081 100644 --- a/libcilkrts/include/cilk/reducer_ostream.h +++ b/libcilkrts/include/cilk/reducer_ostream.h @@ -1,26 +1,31 @@ /* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * */ diff --git a/libcilkrts/include/cilk/reducer_string.h b/libcilkrts/include/cilk/reducer_string.h index 9d323271ae2..676b16d0e6e 100644 --- a/libcilkrts/include/cilk/reducer_string.h +++ b/libcilkrts/include/cilk/reducer_string.h @@ -1,108 +1,41 @@ -/* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. - * +/* reducer_string.h -*- C++ -*- + * + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. */ -/* - * reducer_string.h - * - * Purpose: Reducer hyperobject to accumulate a string. - * - * Classes: reducer_basic_string<Elem, Traits, Alloc> - * reducer_string - convenience name for a string-of-char reducer - * reducer_wstring - convenience name for a string-of-wchar_t reducer - * - * Description: - * ============ - * This component provides a reducer-type hyperobject representation that - * allows appending characters to an STL string. By replacing the variable - * with the hyperobject defined in this component, the data race is eliminated. - * - * reducer_basic_string is actually implemented using a list to avoid memory - * fragmentation issues as text is appended to the string. The string - * components are assembled into a single string before being returned by - * get_value(). - * - * Usage Example: - * ============== - * Assume we wish to traverse an array of objects, performing an operation on - * each object and accumulating the result of the operation into an STL string - * variable. - *.. - * char *compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * std::string result; - * for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * result += compute(myArray[i]); - * } - * - * std::cout << "The result is: " << result.c_str() << std::endl; - - * return 0; - * } - *.. - * Changing the 'for' to a 'cilk_for' will cause the loop to run in parallel, - * but doing so will create a data race on the 'result' variable. - * The race is solved by changing 'result' to a 'reducer_string' hyperobject: - *.. - * char *compute(const X& v); - * - * int test() - * { - * const std::size_t ARRAY_SIZE = 1000000; - * extern X myArray[ARRAY_SIZE]; - * // ... - * - * cilk::reducer_string result; - * cilk_for (std::size_t i = 0; i < ARRAY_SIZE; ++i) - * { - * *result += compute(myArray[i]); - * } - * - * std::cout << "The result is: " << result.get_value() << std::endl; +/** @file reducer_string.h * - * return 0; - * } - *.. + * @brief Defines classes for doing parallel string creation by appending. * - * Operations provided: - * ==================== + * @ingroup ReducersString * - * 'reducer_string' supports operator+= and append. - * - * The the current value of the reducer can be retrieved using the 'get_value' - * method. As with most reducers, the 'get_value' method produces deterministic - * results only if called before the first spawn after creating a 'hyperobject' - * or when all strands spawned since creating the 'hyperobject' have been - * synced. + * @see ReducersString */ #ifndef REDUCER_STRING_H_INCLUDED @@ -112,565 +45,680 @@ #include <string> #include <list> -namespace cilk -{ - -/** - * @brief Reducer hyperobject representation of a string. +/** @defgroup ReducersString String Reducers * - * Typedefs for 8-bit character strings (reducer_string) and 16-bit character - * strings (reducer_wstring) are provided at the end of the file. - */ -template<class _Elem, - class _Traits = std::char_traits<_Elem>, - class _Alloc = std::allocator<_Elem> > -class reducer_basic_string -{ -public: - /// Type of the basic_string reducer_basic_string is based on - typedef std::basic_string<_Elem, _Traits, _Alloc> string_type; + * String reducers allow the creation of a string by concatenating a set of + * strings or characters in parallel. + * + * @ingroup Reducers + * + * You should be familiar with @ref pagereducers "Cilk reducers", described in + * file reducers.md, and particularly with @ref reducers_using, before trying + * to use the information in this file. + * + * @section redstring_usage Usage Example + * + * vector<Data> data; + * void expensive_string_computation(const Data& x, string& s); + * cilk::reducer<cilk::op_string> r; + * cilk_for (int i = 0; i != data.size(); ++i) { + * string temp; + * expensive_string_computation(data[i], temp); + * *r += temp; + * } + * string result; + * r.move_out(result); + * + * @section redstring_monoid The Monoid + * + * @subsection redstring_monoid_values Value Set + * + * The value set of a string reducer is the set of values of the class + * `std::basic_string<Char, Traits, Alloc>`, which we refer to as “the + * reducer’s string type”. + * + * @subsection redstring_monoid_operator Operator + * + * The operator of a string reducer is the string concatenation operator, + * defined by the “`+`” binary operator on the reducer’s string type. + * + * @subsection redstring_monoid_identity Identity + * + * The identity value of a string reducer is the empty string, which is the + * value of the expression + * `std::basic_string<Char, Traits, Alloc>([allocator])`. + * + * @section redstring_operations Operations + * + * In the operation descriptions below, the type name `String` refers to the + * reducer’s string type, `std::basic_string<Char, Traits, Alloc>`. + * + * @subsection redstring_constructors Constructors + * + * Any argument list which is valid for a `std::basic_string` constructor is + * valid for a string reducer constructor. The usual move-in constructor is + * also provided: + * + * reducer(move_in(String& variable)) + * + * @subsection redstring_get_set Set and Get + * + * r.set_value(const String& value) + * const String& = r.get_value() const + * r.move_in(String& variable) + * r.move_out(String& variable) + * + * @subsection redstring_initial Initial Values + * + * A string reducer with no constructor arguments, or with only an allocator + * argument, will initially contain the identity value, an empty string. + * + * @subsection redstring_view_ops View Operations + * + * *r += a + * r->append(a) + * r->append(a, b) + * r->push_back(a) + * + * These operations on string reducer views are the same as the corresponding + * operations on strings. + * + * @section redstring_performance Performance Considerations + * + * String reducers work by creating a string for each view, collecting those + * strings in a list, and then concatenating them into a single result string + * at the end of the computation. This last step takes place in serial code, + * and necessarily takes time proportional to the length of the result string. + * Thus, a parallel string reducer cannot actually speed up the time spent + * directly creating the string. This trivial example would probably be slower + * (because of reducer overhead) than the corresponding serial code: + * + * vector<string> a; + * reducer<op_string> r; + * cilk_for (int i = 0; i != a.length(); ++i) { + * *r += a[i]; + * } + * string result; + * r.move_out(result); + * + * What a string reducer _can_ do is to allow the _remainder_ of the + * computation to be done in parallel, without having to worry about managing + * the string computation. + * + * The strings for new views are created (by the view identity constructor) + * using the same allocator as the string that was created when the reducer + * was constructed. Note that this allocator is determined when the reducer is + * constructed. The following two examples may have very different behavior: + * + * string<Char, Traits, Allocator> a_string; + * + * reducer< op_string<Char, Traits, Allocator> reducer1(move_in(a_string)); + * ... parallel computation ... + * reducer1.move_out(a_string); + * + * reducer< op_string<Char, Traits, Allocator> reducer2; + * reducer2.move_in(a_string); + * ... parallel computation ... + * reducer2.move_out(a_string); + * + * * `reducer1` will be constructed with the same allocator as `a_string`, + * because the string was specified in the constructor. The `move_in` + * and `move_out` can therefore be done with a `swap` in constant time. + * * `reducer2` will be constructed with a _default_ allocator of type + * `Allocator`, which may not be the same as the allocator of `a_string`. + * Therefore, the `move_in` and `move_out` may have to be done with a copy + * in _O(N)_ time. + * + * (All instances of an allocator type with no internal state (like + * `std::allocator`) are “the same”. You only need to worry about the “same + * allocator” issue when you create string reducers with custom allocator + * types.) + * + * @section redstring_types Type and Operator Requirements + * + * `std::basic_string<Char, Traits, Alloc>` must be a valid type. +*/ - /// Type of sizes - typedef typename string_type::size_type size_type; +namespace cilk { - /// Character type for reducer_basic_string - typedef _Elem basic_value_type; +/** @ingroup ReducersString */ +//@{ - /// Internal representation of the per-strand view of the data for reducer_basic_string - struct View +/** The string append reducer view class. + * + * This is the view class for reducers created with + * `cilk::reducer< cilk::op_basic_string<Type, Traits, Allocator> >`. It holds + * the accumulator variable for the reduction, and allows only append + * operations to be performed on it. + * + * @note The reducer “dereference” operation (`reducer::operator *()`) + * yields a reference to the view. Thus, for example, the view class’s + * `append` operation would be used in an expression like + * `r->append(a)`, where `r` is a string append reducer variable. + * + * @tparam Char The string element type (not the string type). + * @tparam Traits The character traits type. + * @tparam Alloc The string allocator type. + * + * @see ReducersString + * @see op_basic_string + */ +template<typename Char, typename Traits, typename Alloc> +class op_basic_string_view +{ + typedef std::basic_string<Char, Traits, Alloc> string_type; + typedef std::list<string_type> list_type; + typedef typename string_type::size_type size_type; + + // The view's value is represented by a list of strings and a single + // string. The value is the concatenation of the strings in the list with + // the single string at the end. All string operations apply to the single + // string; reduce operations cause lists of partial strings from multiple + // strands to be combined. + // + mutable string_type m_string; + mutable list_type m_list; + + // Before returning the value of the reducer, concatenate all the strings + // in the list with the single string. + // + void flatten() const { - friend class reducer_basic_string<_Elem, _Traits, _Alloc>; - - /// Type of the basic_string the View is based on - typedef std::basic_string<_Elem, _Traits, _Alloc> string_type; + if (m_list.empty()) return; - /// Type of sizes - typedef typename string_type::size_type size_type; + typename list_type::iterator i; - std::basic_string<_Elem, _Traits, _Alloc> &get_value(); + size_type len = m_string.size(); + for (i = m_list.begin(); i != m_list.end(); ++i) + len += i->size(); - /// Add a character to the View - void add_char(_Elem ch) { m_value += ch; } + string_type result(get_allocator()); + result.reserve(len); - private: - string_type m_value; // Holds current string - std::list<string_type> m_list; // List used to accumulate string fragments - }; - -public: - /// Definition of data view, operation, and identity for reducer_basic_string - struct Monoid: monoid_base< View > - { - static void reduce (View *left, View *right); - }; + for (i = m_list.begin(); i != m_list.end(); ++i) + result += *i; + m_list.clear(); -private: - // Hyperobject to serve up views - reducer<Monoid> imp_; + result += m_string; + result.swap(m_string); + } public: - // Default constructor - Construct an empty reducer_basic_string - reducer_basic_string(); + /** @name Monoid support. + */ + //@{ - // Construct a reducer_basic_string with an initial value - reducer_basic_string(const _Elem *ptr); - reducer_basic_string(const _Elem *ptr, const _Alloc &al); - reducer_basic_string(const _Elem *ptr, size_type count); - reducer_basic_string(const _Elem *ptr, size_type count, const _Alloc &al); - reducer_basic_string(const string_type &right, size_type offset, size_type count); - reducer_basic_string(const string_type &right, size_type offset, size_type count, const _Alloc &al); - reducer_basic_string(size_type count, _Elem ch); - reducer_basic_string(size_type count, _Elem ch, const _Alloc &al); + /// Required by @ref monoid_with_view + typedef string_type value_type; - // Return an immutable reference to the current string - const string_type &get_value() const; + /// Required by @ref op_string + Alloc get_allocator() const + { + return m_string.get_allocator(); + } - // Return a reference to the current string - string_type& get_reference(); - string_type const& get_reference() const; + /** Reduction operation. + * + * This function is invoked by the @ref op_basic_string monoid to combine + * the views of two strands when the right strand merges with the left + * one. It appends the value contained in the right-strand view to the + * value contained in the left-strand view, and leaves the value in the + * right-strand view undefined. + * + * @param right A pointer to the right-strand view. (`this` points to + * the left-strand view.) + * + * @note Used only by the @ref op_basic_string monoid to implement the + * monoid reduce operation. + */ + void reduce(op_basic_string_view* right) + { + if (!right->m_string.empty() || !right->m_list.empty()) { + // (list, string) + (right_list, right_string) => + // (list + {string} + right_list, right_string) + if (!m_string.empty()) { + // simulate m_list.push_back(std::move(m_string)) + m_list.push_back(string_type(get_allocator())); + m_list.back().swap(m_string); + } + m_list.splice(m_list.end(), right->m_list); + m_string.swap(right->m_string); + } + } - // Set the string to a specified value - void set_value(const string_type &value); + //@} - // Append to the string - void append(const _Elem *ptr); - void append(const _Elem *ptr, size_type count); - void append(const string_type &str, size_type offset, size_type count); - void append(const string_type &str); - void append(size_type count, _Elem ch); + /** @name Pass constructor arguments through to the string constructor. + */ + //@{ - // Append to the string - reducer_basic_string<_Elem, _Traits, _Alloc> &operator+=(_Elem ch); - reducer_basic_string<_Elem, _Traits, _Alloc> &operator+=(const _Elem *ptr); - reducer_basic_string<_Elem, _Traits, _Alloc> &operator+=(const string_type &right); + op_basic_string_view() : m_string() {} - reducer_basic_string& operator*() { return *this; } - reducer_basic_string const& operator*() const { return *this; } + template <typename T1> + op_basic_string_view(const T1& x1) : m_string(x1) {} - reducer_basic_string* operator->() { return this; } - reducer_basic_string const* operator->() const { return this; } + template <typename T1, typename T2> + op_basic_string_view(const T1& x1, const T2& x2) : m_string(x1, x2) {} -}; // class reducer_basic_string + template <typename T1, typename T2, typename T3> + op_basic_string_view(const T1& x1, const T2& x2, const T3& x3) : m_string(x1, x2, x3) {} -///////////////////////////////////////////////////////////////////////////// -// Implementation of inline and template functions -///////////////////////////////////////////////////////////////////////////// + template <typename T1, typename T2, typename T3, typename T4> + op_basic_string_view(const T1& x1, const T2& x2, const T3& x3, const T4& x4) : + m_string(x1, x2, x3, x4) {} -// ----------------------------------------- -// template class reducer_basic_string::View -// ----------------------------------------- + //@} -/** - * Assemble the string from the collected fragments - * - * @returns std::basic_string reference to the assembled string - */ -template<class _Elem, class _Traits, class _Alloc> -std::basic_string<_Elem, _Traits, _Alloc> & -reducer_basic_string<_Elem, _Traits, _Alloc>::View::get_value() -{ - // If the list is empty, just return our string - if (m_list.empty()) - return m_value; - - // First calculate the total length of all of the string fragments - size_type len = m_value.size(); - typename std::list<string_type>::iterator i; - for (i = m_list.begin(); i != m_list.end(); ++i) - len += i->size(); - - // Hold onto the string, since it needs to go at the end - string_type tmp; - tmp.swap(m_value); - - // Expand the string that to hold all of the string fragments. - // Allocating it up-front prevents heap fragmentation. - m_value.reserve(len); - - // Concatenate all of the fragments into the string, then clear out the - // list - for (i = m_list.begin(); i != m_list.end(); ++i) - m_value += *i; - m_list.clear(); - - // Finally, add the string value we saved - m_value += tmp; - return m_value; -} - -// ------------------------------------------- -// template class reducer_basic_string::Monoid -// ------------------------------------------- - -/** - * Appends string from "right" reducer_basic_string onto the end of - * the "left". When done, the "right" reducer_basic_string is empty. - */ -template<class _Elem, class _Traits, class _Alloc> -void -reducer_basic_string<_Elem, _Traits, _Alloc>::Monoid::reduce(View *left, - View *right) -{ - // Check if there's anything to do - if (right->m_list.empty() && right->m_value.empty()) - return; - - // If the only thing is the right string, just take it - if (left->m_list.empty() && right->m_list.empty() & left->m_value.empty()) + /** Move-in constructor. + */ + explicit op_basic_string_view(move_in_wrapper<value_type> w) + : m_string(w.value().get_allocator()) { - left->m_value.swap(right->m_value); - return; + m_string.swap(w.value()); } - // Debugging aid - should be removed before ship! -#ifdef DEBUG_STRING_REDUCER - std::cout << "Complex merge" << std::endl; - dump ("Left"); - right->dump("Right"); -#endif + /** @name @ref reducer support. + */ + //@{ - // OK, merge everything together. If there's anything in our string, it's - // got to be added to the list first - if (! left->m_value.empty()) + void view_move_in(string_type& s) { - left->m_list.push_back(left->m_value); - left->m_value.clear(); + m_list.clear(); + if (m_string.get_allocator() == s.get_allocator()) + // Equal allocators. Do a (fast) swap. + m_string.swap(s); + else + // Unequal allocators. Do a (slow) copy. + m_string = s; + s.clear(); } - // Now splice the two lists together, then take the right string - left->m_list.splice(left->m_list.end(), right->m_list); - left->m_value.swap(right->m_value); - - // Debugging aid - should be removed before ship! -#ifdef DEBUG_STRING_REDUCER - dump ("Result"); -#endif -} + void view_move_out(string_type& s) + { + flatten(); + if (m_string.get_allocator() == s.get_allocator()) + // Equal allocators. Do a (fast) swap. + m_string.swap(s); + else + // Unequal allocators. Do a (slow) copy. + s = m_string; + m_string.clear(); + } -// ----------------------------------- -// template class reducer_basic_string -// ----------------------------------- + void view_set_value(const string_type& s) + { m_list.clear(); m_string = s; } -/** - * Default constructor - doesn't do much - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(): - imp_() -{ -} + string_type const& view_get_value() const + { flatten(); return m_string; } -/** - * Construct a reducer_basic_string initializing it from a null-terminated - * string using the default allocator. - * - * @param ptr Null-terminated string to initialize from - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr) : - imp_() -{ - string_type str(ptr); + string_type & view_get_reference() + { flatten(); return m_string; } - View &v = imp_.view(); - v.m_value = str; -} + string_type const& view_get_reference() const + { flatten(); return m_string; } -/** - * Construct a reducer_basic_string initializing it from a null-terminated - * string specifying an allocator. - * - * @param ptr Null-terminated string to initialize from - * @param al Allocator to be used - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr, - const _Alloc &al) : - imp_() -{ - string_type str(ptr, al); + //@} - View &v = imp_.view(); - v.m_value = str; -} + /** @name View modifier operations. + * + * @details These simply wrap the corresponding operations on the underlying string. + */ + //@{ -/** - * Construct a reducer_basic_string initializing it from a null-terminated - * string, copying N characters, using the default allocator. - * - * @param ptr Null-terminated string to initialize from - * @param count Number of characters to copy - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr, - size_type count) : - imp_() -{ - string_type str(ptr, count); + template <typename T> + op_basic_string_view& operator +=(const T& x) + { m_string += x; return *this; } - View &v = imp_.view(); - v.m_value = str; -} + template <typename T1> + op_basic_string_view& append(const T1& x1) + { m_string.append(x1); return *this; } -/** - * Construct a reducer_basic_string initializing it from a null-terminated - * string, copying N characters, specifying an allocator. - * - * @param ptr Null-terminated string to initialize from - * @param count Number of characters to copy - * @param al Allocator to be used - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const _Elem *ptr, - size_type count, - const _Alloc &al) : - imp_() -{ - string_type str(ptr, count, al); - - View &v = imp_.view(); - v.m_value = str; -} + template <typename T1, typename T2> + op_basic_string_view& append(const T1& x1, const T2& x2) + { m_string.append(x1, x2); return *this; } -/** - * Construct a reducer_basic_string initializing it from a string_type - * string starting from an offset, copying N characters, using the default - * allocator. - * - * @param right string_type string to initialize from - * @param offset Character withing right to start copying from - * @param count Number of characters to copy - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const string_type &right, - size_type offset, - size_type count) : - imp_() -{ - string_type str(right, offset, count); + template <typename T1, typename T2, typename T3> + op_basic_string_view& append(const T1& x1, const T2& x2, const T3& x3) + { m_string.append(x1, x2, x3); return *this; } - View &v = imp_.view(); - v.m_value = str; -} + void push_back(const Char x) { m_string.push_back(x); } -/** - * Construct a reducer_basic_string initializing it from a string_type - * string starting from an offset, copying N characters, uspecifying an - * allocator. - * - * @param right string_type string to initialize from - * @param offset Character withing right to start copying from - * @param count Number of characters to copy - * @param al Allocator to be used - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(const string_type &right, - size_type offset, - size_type count, - const _Alloc &al) : - imp_() -{ - string_type str(right, offset, count, al); + //@} +}; - View &v = imp_.view(); - v.m_value = str; -} -/** - * Construct a reducer_basic_string initializing it with a character repeated - * some number of times, using the default allocator. +/** String append monoid class. Instantiate the cilk::reducer template class + * with an op_basic_string monoid to create a string append reducer class. For + * example, to concatenate a collection of standard strings: * - * @param count Number of times to repeat the character - * @param ch Character to initialize reducer_basic_string with - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(size_type count, - _Elem ch) : - imp_() -{ - string_type str(count, ch); - - View &v = imp_.view(); - v.m_value = str; -} - -/** - * Construct a reducer_basic_string initializing it with a character repeated - * some number of times, specifying an allocator. + * cilk::reducer< cilk::op_basic_string<char> > r; * - * @param count Number of times to repeat the character - * @param ch Character to initialize reducer_basic_string with - * @param al Allocator to be used - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc>::reducer_basic_string(size_type count, - _Elem ch, - const _Alloc &al) : - imp_() -{ - string_type str(count, ch, al); - - View &v = imp_.view(); - v.m_value = str; -} - -/** - * Assemble the string from the collected fragments and return a mutable - * reference to it + * @tparam Char The string element type (not the string type). + * @tparam Traits The character traits type. + * @tparam Alloc The string allocator type. + * @tparam Align If `false` (the default), reducers instantiated on this + * monoid will be naturally aligned (the Cilk library 1.0 + * behavior). If `true`, reducers instantiated on this monoid + * will be cache-aligned for binary compatibility with + * reducers in Cilk library version 0.9. * - * @returns std::basic_string reference + * @see ReducersString + * @see op_basic_string_view + * @see reducer_basic_string + * @see op_string + * @see op_wstring */ -template<class _Elem, class _Traits, class _Alloc> -std::basic_string<_Elem, _Traits, _Alloc> & -reducer_basic_string<_Elem, _Traits, _Alloc>::get_reference() +template<typename Char, + typename Traits = std::char_traits<Char>, + typename Alloc = std::allocator<Char>, + bool Align = false> +class op_basic_string : + public monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align > { - View &v = imp_.view(); - - return v.get_value(); -} + typedef monoid_with_view< op_basic_string_view<Char, Traits, Alloc>, Align > + base; + Alloc m_allocator; -/** - * Assemble the string from the collected fragments and return an immutable - * reference to it - * - * @returns std::basic_string reference - */ -template<class _Elem, class _Traits, class _Alloc> -const std::basic_string<_Elem, _Traits, _Alloc> & -reducer_basic_string<_Elem, _Traits, _Alloc>::get_reference() const -{ - // Cast away the const-ness and call mutable get_reference to do the work - reducer_basic_string *pThis = const_cast<reducer_basic_string *>(this); - return pThis->get_reference(); -} +public: -/** - * Assemble the string from the collected fragments and return an immutable - * reference to it - * - * @returns string_type reference + /** View type of the monoid. + */ + typedef typename base::view_type view_type; + + /** Constructor. + * + * There is no default constructor for string monoids, because the + * allocator must always be specified. + * + * @param allocator The list allocator to be used when + * identity-constructing new views. + */ + op_basic_string(const Alloc& allocator = Alloc()) : m_allocator(allocator) + {} + + /** Create an identity view. + * + * String view identity constructors take the string allocator as an + * argument. + * + * @param v The address of the uninitialized memory in which the view + * will be constructed. + */ + void identity(view_type *v) const { ::new((void*) v) view_type(m_allocator); } + + /** @name Construct functions + * + * A string append reduction monoid must have a copy of the allocator of + * the leftmost view’s string, so that it can use it in the `identity` + * operation. This, in turn, requires that string reduction monoids have a + * specialized `construct()` function. + * + * All string reducer monoid `construct()` functions first construct the + * leftmost view, using the arguments that were passed in from the reducer + * constructor. They then call the view’s `get_allocator()` function to + * get the string allocator from the string in the leftmost view, and pass + * that to the monoid constructor. + */ + //@{ + + static void construct(op_basic_string* monoid, view_type* view) + { provisional( new ((void*)view) view_type() ).confirm_if( + new ((void*)monoid) op_basic_string(view->get_allocator()) ); } + + template <typename T1> + static void construct(op_basic_string* monoid, view_type* view, const T1& x1) + { provisional( new ((void*)view) view_type(x1) ).confirm_if( + new ((void*)monoid) op_basic_string(view->get_allocator()) ); } + + template <typename T1, typename T2> + static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2) + { provisional( new ((void*)view) view_type(x1, x2) ).confirm_if( + new ((void*)monoid) op_basic_string(view->get_allocator()) ); } + + template <typename T1, typename T2, typename T3> + static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2, + const T3& x3) + { provisional( new ((void*)view) view_type(x1, x2, x3) ).confirm_if( + new ((void*)monoid) op_basic_string(view->get_allocator()) ); } + + template <typename T1, typename T2, typename T3, typename T4> + static void construct(op_basic_string* monoid, view_type* view, const T1& x1, const T2& x2, + const T3& x3, const T4& x4) + { provisional( new ((void*)view) view_type(x1, x2, x3, x4) ).confirm_if( + new ((void*)monoid) op_basic_string(view->get_allocator()) ); } + + //@} +}; + + +/** Convenience typedef for 8-bit strings */ -template<class _Elem, class _Traits, class _Alloc> -inline -const std::basic_string<_Elem, _Traits, _Alloc> & -reducer_basic_string<_Elem, _Traits, _Alloc>::get_value() const -{ - // Delegate to get_reference() - return this->get_reference(); -} - -/** - * Set the string to a specified value - * - * @param value string_type to set the reducer_basic_string to +typedef op_basic_string<char> op_string; + +/** Convenience typedef for 16-bit strings */ -template<class _Elem, class _Traits, class _Alloc> -void reducer_basic_string<_Elem, _Traits, _Alloc>::set_value(const string_type &value) -{ - View &v = imp_.view(); - - v.m_list.clear(); - v.m_value.assign(value); -} - -/** - * Add a null-terminated string to the string - * - * @param ptr Null-terminated string to be appended +typedef op_basic_string<wchar_t> op_wstring; + + +/** Deprecated string append reducer class. + * + * reducer_basic_string is the same as @ref reducer<@ref op_basic_string>, + * except that reducer_basic_string is a proxy for the contained view, so that + * accumulator variable update operations can be applied directly to the + * reducer. For example, a value is appended to a `reducer<%op_basic_string>` + * with `r->push_back(a)`, but a value can be appended to a `%reducer_opand` + * with `r.push_back(a)`. + * + * @deprecated Users are strongly encouraged to use `reducer<monoid>` + * reducers rather than the old wrappers like reducer_basic_string. + * The `reducer<monoid>` reducers show the reducer/monoid/view + * architecture more clearly, are more consistent in their + * implementation, and present a simpler model for new + * user-implemented reducers. + * + * @note Implicit conversions are provided between `%reducer_basic_string` + * and `reducer<%op_basic_string>`. This allows incremental code + * conversion: old code that used `%reducer_basic_string` can pass a + * `%reducer_basic_string` to a converted function that now expects a + * pointer or reference to a `reducer<%op_basic_string>`, and vice + * versa. + * + * @tparam Char The string element type (not the string type). + * @tparam Traits The character traits type. + * @tparam Alloc The string allocator type. + * + * @see op_basic_string + * @see reducer + * @see ReducersString */ -template<class _Elem, class _Traits, class _Alloc> -void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const _Elem *ptr) +template<typename Char, + typename Traits = std::char_traits<Char>, + typename Alloc = std::allocator<Char> > +class reducer_basic_string : + public reducer< op_basic_string<Char, Traits, Alloc, true> > { - View &v = imp_.view(); + typedef reducer< op_basic_string<Char, Traits, Alloc, true> > base; + using base::view; +public: - v.m_value.append(ptr); -} + /// The reducer’s string type. + typedef typename base::value_type string_type; -/** - * Add a string_type string to the string - * - * @param str string_type to be appended - */ -template<class _Elem, class _Traits, class _Alloc> -void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const string_type &str) -{ - View &v = imp_.view(); + /// The reducer’s primitive component type. + typedef Char basic_value_type; - v.m_value.append(str); -} + /// The string size type. + typedef typename string_type::size_type size_type; -/** - * Add a null-terminated string to the string, specifying the maximum number - * of characters to copy - * - * @param ptr Null-terminated string to be appended - * @param count Maximum number of characters to copy - */ -template<class _Elem, class _Traits, class _Alloc> -void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const _Elem *ptr, - size_type count) -{ - View &v = imp_.view(); + /// The view type for the reducer. + typedef typename base::view_type View; + + /// The monoid type for the reducer. + typedef typename base::monoid_type Monoid; + + + /** @name Constructors + */ + //@{ + + /** @name Forward constructor calls to the base class. + * + * All basic_string constructor forms are supported. + */ + //@{ + reducer_basic_string() {} + + template <typename T1> + reducer_basic_string(const T1& x1) : + base(x1) {} + + template <typename T1, typename T2> + reducer_basic_string(const T1& x1, const T2& x2) : + base(x1, x2) {} + + template <typename T1, typename T2, typename T3> + reducer_basic_string(const T1& x1, const T2& x2, const T3& x3) : + base(x1, x2, x3) {} + + template <typename T1, typename T2, typename T3, typename T4> + reducer_basic_string(const T1& x1, const T2& x2, const T3& x3, const T4& x4) : + base(x1, x2, x3, x4) {} + //@} + + /** Allow mutable access to the string within the current view. + * + * @warning If this method is called before the parallel calculation is + * complete, the string returned by this method will be a + * partial result. + * + * @returns A mutable reference to the string within the current view. + */ + string_type &get_reference() + { return view().view_get_reference(); } + + /** Allow read-only access to the string within the current view. + * + * @warning If this method is called before the parallel calculation is + * complete, the string returned by this method will be a + * partial result. + * + * @returns A const reference to the string within the current view. + */ + string_type const &get_reference() const + { return view().view_get_reference(); } + + /** @name Append to the string. + * + * These operations are simply forwarded to the view. + */ + //@{ + void append(const Char *ptr) + { view().append(ptr); } + void append(const Char *ptr, size_type count) + { view().append(ptr, count); } + void append(const string_type &str, size_type offset, size_type count) + { view().append(str, offset, count); } + void append(const string_type &str) + { view().append(str); } + void append(size_type count, Char ch) + { view().append(count, ch); } - v.m_value.append(ptr, count); -} + // Append to the string + reducer_basic_string<Char, Traits, Alloc> &operator+=(Char ch) + { view() += ch; return *this; } + reducer_basic_string<Char, Traits, Alloc> &operator+=(const Char *ptr) + { view() += ptr; return *this; } + reducer_basic_string<Char, Traits, Alloc> &operator+=(const string_type &right) + { view() += right; return *this; } + //@} + + /** @name Dereference + * @details Dereferencing a wrapper is a no-op. It simply returns the + * wrapper. Combined with the rule that the wrapper forwards view + * operations to its contained view, this means that view operations can + * be written the same way on reducers and wrappers, which is convenient + * for incrementally converting old code using wrappers to use reducers + * instead. That is: + * + * reducer<op_string> r; + * r->push_back(a); // r-> returns the view + * // push_back() is a view member function + * + * reducer_string w; + * w->push_back(a); // *w returns the wrapper + * // push_back() is a wrapper member function + * // that calls the corresponding view function + */ + //@{ + reducer_basic_string& operator*() { return *this; } + reducer_basic_string const& operator*() const { return *this; } -/** - * Add a string_type string to the string, specifying the starting offset and - * maximum number of characters to copy - * - * @param str Null-terminated string to be appended - * @param offset Offset in the string_type to start copy at - * @param count Maximum number of characters to copy - */ -template<class _Elem, class _Traits, class _Alloc> -void reducer_basic_string<_Elem, _Traits, _Alloc>::append(const string_type &str, - size_type offset, - size_type count) -{ - View &v = imp_.view(); + reducer_basic_string* operator->() { return this; } + reducer_basic_string const* operator->() const { return this; } + //@} + + /** @name Upcast + * @details In Cilk library 0.9, reducers were always cache-aligned. In + * library 1.0, reducer cache alignment is optional. By default, reducers + * are unaligned (i.e., just naturally aligned), but legacy wrappers + * inherit from cache-aligned reducers for binary compatibility. + * + * This means that a wrapper will automatically be upcast to its aligned + * reducer base class. The following conversion operators provide + * pseudo-upcasts to the corresponding unaligned reducer class. + */ + //@{ + operator reducer< op_basic_string<Char, Traits, Alloc, false> >& () + { + return *reinterpret_cast< reducer< + op_basic_string<Char, Traits, Alloc, false> >* + >(this); + } + operator const reducer< op_basic_string<Char, Traits, Alloc, false> >& () const + { + return *reinterpret_cast< const reducer< + op_basic_string<Char, Traits, Alloc, false> >* + >(this); + } + //@} +}; - v.m_value.append(str, offset, count); -} -/** - * Add one or more repeated characters to the string - * - * @param count Number of times to repeat the character - * @param ch Character to be added one or more times to the string +/** Convenience typedef for 8-bit strings */ -// append - add one or more repeated characters to the list -template<class _Elem, class _Traits, class _Alloc> -void reducer_basic_string<_Elem, _Traits, _Alloc>::append(size_type count, - _Elem ch) -{ - View &v = imp_.view(); - - v.m_value.append(count, ch); -} +typedef reducer_basic_string<char> reducer_string; -/** - * append a single character to the string - * - * @param ch Character to be appended +/** Convenience typedef for 16-bit strings */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc> & -reducer_basic_string<_Elem, _Traits, _Alloc>::operator+=(_Elem ch) -{ - View &v = imp_.view(); +typedef reducer_basic_string<wchar_t> reducer_wstring; - v.m_value.append(1, ch); - return *this; -} +/// @cond internal -/** - * append a null-terminated string to the string +/// @cond internal +/** Metafunction specialization for reducer conversion. * - * @param ptr Null-terminated string to be appended - */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc> & -reducer_basic_string<_Elem, _Traits, _Alloc>::operator+=(const _Elem *ptr) -{ - View &v = imp_.view(); - - v.m_value.append(ptr); - return *this; -} - -/** - * append a string-type to the string + * This specialization of the @ref legacy_reducer_downcast template class + * defined in reducer.h causes the `reducer< op_basic_string<Char> >` class to + * have an `operator reducer_basic_string<Char>& ()` conversion operator that + * statically downcasts the `reducer<op_basic_string>` to the corresponding + * `reducer_basic_string` type. (The reverse conversion, from + * `reducer_basic_string` to `reducer<op_basic_string>`, is just an upcast, + * which is provided for free by the language.) * - * @param right string-type to be appended + * @ingroup ReducersString */ -template<class _Elem, class _Traits, class _Alloc> -reducer_basic_string<_Elem, _Traits, _Alloc> & -reducer_basic_string<_Elem, _Traits, _Alloc>::operator+=(const string_type &right) +template<typename Char, typename Traits, typename Alloc, bool Align> +struct legacy_reducer_downcast< + reducer<op_basic_string<Char, Traits, Alloc, Align> > > { - View &v = imp_.view(); - - v.m_value.append(right); - return *this; -} + typedef reducer_basic_string<Char, Traits, Alloc> type; +}; -/** - * Convenience typedefs for 8-bit strings - */ -typedef reducer_basic_string<char, - std::char_traits<char>, - std::allocator<char> > - reducer_string; +/// @endcond -/** - * Convenience typedefs for 16-bit strings - */ -typedef reducer_basic_string<wchar_t, - std::char_traits<wchar_t>, - std::allocator<wchar_t> > - reducer_wstring; +//@} -} // namespace cilk +} // namespace cilk #endif // REDUCER_STRING_H_INCLUDED diff --git a/libcilkrts/include/cilktools/cilkscreen.h b/libcilkrts/include/cilktools/cilkscreen.h index 0975f8ca7db..47a363e9c98 100644 --- a/libcilkrts/include/cilktools/cilkscreen.h +++ b/libcilkrts/include/cilktools/cilkscreen.h @@ -2,24 +2,29 @@ * ************************************************************************* * - * Copyright (C) 2010-2011 + * @copyright + * Copyright (C) 2010-2011 * Intel Corporation * + * @copyright * This file is part of the Intel Cilk Plus Library. This library is free * software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the * Free Software Foundation; either version 3, or (at your option) * any later version. * + * @copyright * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * + * @copyright * Under Section 7 of GPL version 3, you are granted additional * permissions described in the GCC Runtime Library Exception, version * 3.1, as published by the Free Software Foundation. * + * @copyright * You should have received a copy of the GNU General Public License and * a copy of the GCC Runtime Library Exception along with this program; * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see @@ -41,7 +46,7 @@ # define __cilkscreen_metacall(annotation,expr) \ __notify_zc_intrinsic((char *)annotation, expr) #else -# define __cilkscreen_metacall(annotation,expr) (annotation, (void) (expr)) +# define __cilkscreen_metacall(annotation,expr) ((void)annotation, (void)(expr)) #endif /* Call once when a user thread enters a spawning function */ diff --git a/libcilkrts/include/cilktools/cilkview.h b/libcilkrts/include/cilktools/cilkview.h index cb1d235af95..e4656260049 100644 --- a/libcilkrts/include/cilktools/cilkview.h +++ b/libcilkrts/include/cilktools/cilkview.h @@ -2,24 +2,29 @@ * ************************************************************************* * - * Copyright (C) 2010-2011 + * @copyright + * Copyright (C) 2010-2011 * Intel Corporation * + * @copyright * This file is part of the Intel Cilk Plus Library. This library is free * software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the * Free Software Foundation; either version 3, or (at your option) * any later version. * + * @copyright * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * + * @copyright * Under Section 7 of GPL version 3, you are granted additional * permissions described in the GCC Runtime Library Exception, version * 3.1, as published by the Free Software Foundation. * + * @copyright * You should have received a copy of the GNU General Public License and * a copy of the GCC Runtime Library Exception along with this program; * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see @@ -40,7 +45,7 @@ __CILKRTS_END_EXTERN_C # endif #endif // _WIN32 -#if defined __unix__ || defined __APPLE__ +#if defined __unix__ || defined __APPLE__ || defined __VXWORKS__ # include <sys/time.h> #endif // defined __unix__ || defined __APPLE__ @@ -61,7 +66,7 @@ static inline unsigned long long __cilkview_getticks() #ifdef _WIN32 // Return milliseconds elapsed since the system started return GetTickCount(); -#elif defined(__unix__) || defined(__APPLE__) +#elif defined(__unix__) || defined(__APPLE__) || defined __VXWORKS__ // Return milliseconds elapsed since the Unix Epoch // (1-Jan-1970 00:00:00.000 UTC) struct timeval t; @@ -96,7 +101,7 @@ typedef struct { cilkview_data_t *start; // Values at start of interval cilkview_data_t *end; // Values at end of interval - char *label; // Name for this interval + const char *label; // Name for this interval unsigned int flags; // What to do - see flags below } cilkview_report_t; @@ -107,10 +112,13 @@ enum CV_REPORT_WRITE_TO_RESULTS = 2 // Write parallelism data to results file }; -void __cilkview_do_report(cilkview_data_t *start, +#ifndef CILKVIEW_NO_REPORT +static void __cilkview_do_report(cilkview_data_t *start, cilkview_data_t *end, - char *label, + const char *label, unsigned int flags); +#endif /* CILKVIEW_NO_REPORT */ + /* * Metacall data * @@ -198,7 +206,7 @@ enum static void __cilkview_do_report(cilkview_data_t *start, cilkview_data_t *end, - char *label, + const char *label, unsigned int flags) { int under_cilkview = 0; @@ -242,7 +250,7 @@ static void __cilkview_do_report(cilkview_data_t *start, // Open the output file and write the trial data to it outfile = getenv("CILKVIEW_OUTFILE"); if (NULL == outfile) - outfile = "cilkview.out"; + outfile = (char *)"cilkview.out"; f = fopen(outfile, "a"); if (NULL == f) diff --git a/libcilkrts/include/cilktools/fake_mutex.h b/libcilkrts/include/cilktools/fake_mutex.h index d11a10d700b..76276a63caf 100644 --- a/libcilkrts/include/cilktools/fake_mutex.h +++ b/libcilkrts/include/cilktools/fake_mutex.h @@ -2,24 +2,29 @@ * ************************************************************************* * - * Copyright (C) 2011 + * @copyright + * Copyright (C) 2013 * Intel Corporation * + * @copyright * This file is part of the Intel Cilk Plus Library. This library is free * software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the * Free Software Foundation; either version 3, or (at your option) * any later version. * + * @copyright * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * + * @copyright * Under Section 7 of GPL version 3, you are granted additional * permissions described in the GCC Runtime Library Exception, version * 3.1, as published by the Free Software Foundation. * + * @copyright * You should have received a copy of the GNU General Public License and * a copy of the GCC Runtime Library Exception along with this program; * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see @@ -40,47 +45,43 @@ #include <cilktools/cilkscreen.h> -// If this is Windows, specify the linkage -#ifdef _WIN32 -#define CILKSCREEN_CDECL __cdecl -#else -#define CILKSCREEN_CDECL -#endif // _WIN32 - namespace cilkscreen { class fake_mutex { public: + fake_mutex() : locked(false) + { + } + + ~fake_mutex() + { + __CILKRTS_ASSERT(! locked); + } // Wait until mutex is available, then enter - virtual void lock() + void lock() { - __cilkscreen_acquire_lock(&lock_val); + __cilkscreen_acquire_lock(&locked); + __CILKRTS_ASSERT(! locked); + locked = true; } // A fake mutex is always available - virtual bool try_lock() { lock(); return true; } + bool try_lock() { lock(); return true; } // Releases the mutex - virtual void unlock() + void unlock() { - __cilkscreen_release_lock(&lock_val); + __CILKRTS_ASSERT(locked); + locked = false; + __cilkscreen_release_lock(&locked); } private: - int lock_val; + bool locked; }; - // Factory function for fake mutex - inline - fake_mutex *CILKSCREEN_CDECL create_fake_mutex() { return new fake_mutex(); } - - // Destructor function for fake mutex - The mutex cannot be used after - // calling this function - inline - void CILKSCREEN_CDECL destroy_fake_mutex(fake_mutex *m) { delete m; } - } // namespace cilk #endif // FAKE_MUTEX_H_INCLUDED diff --git a/libcilkrts/include/cilktools/lock_guard.h b/libcilkrts/include/cilktools/lock_guard.h index 02c8b401e31..bcb5eb5b2c3 100644 --- a/libcilkrts/include/cilktools/lock_guard.h +++ b/libcilkrts/include/cilktools/lock_guard.h @@ -2,24 +2,29 @@ * ************************************************************************* * - * Copyright (C) 2011 + * @copyright + * Copyright (C) 2011 * Intel Corporation * + * @copyright * This file is part of the Intel Cilk Plus Library. This library is free * software; you can redistribute it and/or modify it under the * terms of the GNU General Public License as published by the * Free Software Foundation; either version 3, or (at your option) * any later version. * + * @copyright * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * + * @copyright * Under Section 7 of GPL version 3, you are granted additional * permissions described in the GCC Runtime Library Exception, version * 3.1, as published by the Free Software Foundation. * + * @copyright * You should have received a copy of the GNU General Public License and * a copy of the GCC Runtime Library Exception along with this program; * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see diff --git a/libcilkrts/include/internal/abi.h b/libcilkrts/include/internal/abi.h index 0db800cc231..8f64b1bc5df 100644 --- a/libcilkrts/include/internal/abi.h +++ b/libcilkrts/include/internal/abi.h @@ -1,28 +1,33 @@ /* * abi.h * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * ******************************************************************************/ @@ -38,6 +43,7 @@ #include <cilk/common.h> +#include <stddef.h> // Needed to define size_t /** * Jump buffers are OS and architecture dependent @@ -85,14 +91,14 @@ typedef struct #endif /* defined(_MSC_VER) */ /* struct tags */ -typedef struct __cilkrts_worker __cilkrts_worker; -typedef struct __cilkrts_worker* __cilkrts_worker_ptr; -typedef struct __cilkrts_stack_frame __cilkrts_stack_frame; +typedef struct __cilkrts_stack_frame __cilkrts_stack_frame; ///< struct tag for stack frame // Forwarded declarations -typedef struct global_state_t global_state_t; -typedef struct local_state local_state; -typedef struct cilkred_map cilkred_map; +typedef struct global_state_t global_state_t; ///< Forwarded declaration for global state +typedef struct local_state local_state; ///< Forwarded declaration for local state +typedef struct cilkred_map cilkred_map; ///< Forward declaration for reducer map + +/// Forwarded declaration for system-dependent worker state typedef struct __cilkrts_worker_sysdep_state __cilkrts_worker_sysdep_state; @@ -559,6 +565,70 @@ CILK_ABI_THROWS(void) __cilkrts_cilk_for_64(__cilk_abi_f64_t body, cilk64_t count, int grain); -__CILKRTS_END_EXTERN_C +/** + * @brief Allocate memory for variable length arrays. If the frame is + * sync'd, the memory will be allocated on the stack, otherwise it will + * be allocated from the heap. + * + * @param sf The __cilkrts_stack_frame for the function allocating the + * memory. + * @param size The number of bytes requested. + * @param distance_from_sp_to_alloca_area ?. + * @param align Alignment required. Always >= minimum stack alignment, + * >= ptr_size, and always a power of 2. + * @param needs_tag Non-zero if the pointer being returned needs to be + * tagged + * + * @return The address of the memory block allocated. + */ + +CILK_ABI(__cilkrts_void_ptr) +__cilkrts_stack_alloc(__cilkrts_stack_frame *sf, + size_t size, + size_t distance_from_sp_to_alloca_area, + uint32_t align, + uint32_t needs_tag); +/** + * @brief Free memory allocated by _cilkrts_stack_alloc() for variable length + * arrays. + * + * @param sf The __cilkrts_stack_frame for the function allocating the + * memory. + * @param p Pointer to the memory block to be freed. + * @param size The number of bytes requested. + * @param distance_from_sp_to_alloca_area ?. + * @param align Alignment required. Always >= minimum stack alignment, + * >= ptr_size, and always a power of 2. + * @param know_from_stack Non-zero if the pointer is known to have been + * allocated on the stack and has no tag. + */ +CILK_ABI(void) +__cilkrts_stack_free(__cilkrts_stack_frame *sf, + void *p, + size_t size, + size_t distance_from_sp_to_alloca_area, + uint32_t align, + uint32_t known_from_stack); + +/** + * @brief System-dependent code to save floating point control information + * to an ABI 1 or higher @c __cilkrts_stack_frame. If possible (and necessary) + * the code to save the floating point control information should be inlined. + * + * Note that this function does *not* save the current floating point + * registers. It saves the floating point control words that control + * precision and rounding and stuff like that. + * + * This function will be a noop for architectures that don't have warts + * like the floating point control words, or where the information is + * already being saved by the setjmp. + * + * @param sf @c __cilkrts_stack_frame for the frame we're saving the + * floating point control information in. + */ +CILK_ABI(void) +__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf); + +__CILKRTS_END_EXTERN_C #endif /* include guard */ diff --git a/libcilkrts/include/internal/cilk_fake.h b/libcilkrts/include/internal/cilk_fake.h new file mode 100644 index 00000000000..2dc8efff765 --- /dev/null +++ b/libcilkrts/include/internal/cilk_fake.h @@ -0,0 +1,441 @@ +/* cilk_fake.h -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2011-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +/** + * @file cilk_fake.h + * + * @brief Macros to simulate a compiled Cilk program. + * + * Used carefully, these macros can be used to create a Cilk program with a + * non-Cilk compiler by manually inserting the code necessary for interacting + * with the Cilk runtime library. They are not intended to be pretty (you + * wouldn't want to write a whole program using these macros), but they are + * useful for experiments. They also work well as an illustration of what the + * compiler generates. + * + * Details of the mechanisms used in these macros are described in + * design-notes/CilkPlusABI.docx + * + * Example 1: fib in C++ + * --------------------- + * + * #include <internal/cilk_fake.h> + * + * int fib(int n) + * { + * CILK_FAKE_PROLOG(); + * + * if (n < 2) + * return n; + * + * int a, b; + * CILK_FAKE_SPAWN_R(a, fib(n - 1)); + * b = fib(n - 2); + * CILK_FAKE_SYNC(); + * + * return a + b; + * } + * + * + * Example 2: fib in C + * ------------------- + * + * #include <internal/cilk_fake.h> + * + * int fib(int n); + * + * void fib_spawn_helper(__cilkrts_stack_frame* parent_sf, int* a, int n) + * { + * CILK_FAKE_SPAWN_HELPER_PROLOG(*parent_sf); + * *a = fib(n - 1); + * CILK_FAKE_SPAWN_HELPER_EPILOG(); + * } + * + * int fib(int n) + * { + * CILK_FAKE_PROLOG(); + * + * if (n < 2) + * return n; + * + * int a, b; + * CILK_FAKE_CALL_SPAWN_HELPER(fib_spawn_helper(&__cilk_sf, &a, n)); + * b = fib(n - 2); + * CILK_FAKE_SYNC(); + * + * CILK_FAKE_EPILOG(); + * return a + b; + * } + */ + +#ifndef INCLUDED_CILK_FAKE_DOT_H +#define INCLUDED_CILK_FAKE_DOT_H + +// This header implements ABI version 1. If __CILKRTS_ABI_VERSION is already +// defined but is less than 1, then the data structures in <internal/abi.h> +// will not match the expectations of facilities in this header. Therefore, +// for successful compilation, __CILKRTS_ABI_VERSION must either be not +// defined, or defined to be 1 or greater. +#ifndef __CILKRTS_ABI_VERSION + // ABI version was not specified. Set it to 1. +# define __CILKRTS_ABI_VERSION 1 +#elif __CILKRTS_ABI_VERSION < 1 + // ABI version was specified but was too old. Fail compilation. +# error cilk_fake.h requirs an ABI version of 1 or greater +#endif + +#include <internal/abi.h> + +// alloca is defined in malloc.h on Windows, alloca.h on Linux +#ifndef _MSC_VER +#include <alloca.h> +#else +#include <malloc.h> +// Define offsetof +#include <stddef.h> +#endif + +#define CILK_FAKE_VERSION_FLAG (__CILKRTS_ABI_VERSION << 24) + +/* Initialize frame. To be called when worker is known */ +__CILKRTS_INLINE void __cilk_fake_enter_frame_fast(__cilkrts_stack_frame *sf, + __cilkrts_worker *w) +{ + sf->call_parent = w->current_stack_frame; + sf->worker = w; + sf->flags = CILK_FAKE_VERSION_FLAG; + w->current_stack_frame = sf; +} + +/* Initialize frame. To be called when worker is not known */ +__CILKRTS_INLINE void __cilk_fake_enter_frame(__cilkrts_stack_frame *sf) +{ + __cilkrts_worker* w = __cilkrts_get_tls_worker(); + uint32_t last_flag = 0; + if (! w) { + w = __cilkrts_bind_thread_1(); + last_flag = CILK_FRAME_LAST; + } + __cilk_fake_enter_frame_fast(sf, w); + sf->flags |= last_flag; +} + +/* Initialize frame. To be called within the spawn helper */ +__CILKRTS_INLINE void __cilk_fake_helper_enter_frame( + __cilkrts_stack_frame *sf, + __cilkrts_stack_frame *parent_sf) +{ + sf->worker = 0; + sf->call_parent = parent_sf; +} + +/* Called from the spawn helper to push the parent continuation on the task + * deque so that it can be stolen. + */ +__CILKRTS_INLINE void __cilk_fake_detach(__cilkrts_stack_frame *sf) +{ + /* Initialize spawn helper frame. + * call_parent was saved in __cilk_fake_helper_enter_frame */ + __cilkrts_stack_frame *parent = sf->call_parent; + __cilkrts_worker *w = parent->worker; + __cilk_fake_enter_frame_fast(sf, w); + + /* Append a node to the pedigree */ + sf->spawn_helper_pedigree = w->pedigree; + parent->parent_pedigree = w->pedigree; + w->pedigree.rank = 0; + w->pedigree.parent = &sf->spawn_helper_pedigree; + + /* Push parent onto the task deque */ + __cilkrts_stack_frame *volatile *tail = w->tail; + *tail++ = sf->call_parent; + /* The stores must be separated by a store fence (noop on x86) + * or the second store is a release (st8.rel on Itanium) */ + w->tail = tail; + sf->flags |= CILK_FRAME_DETACHED; +} + +/* This variable is used in CILK_FAKE_FORCE_FRAME_PTR(), below */ +static int __cilk_fake_dummy = 8; + +/* The following macro is used to force the compiler into generating a frame + * pointer. We never change the value of __cilk_fake_dummy, so the alloca() + * is never called, but we need the 'if' statement and the __cilk_fake_dummy + * variable so that the compiler does not attempt to optimize it away. + */ +#define CILK_FAKE_FORCE_FRAME_PTR(sf) do { \ + if (__builtin_expect(1 & __cilk_fake_dummy, 0)) \ + (sf).worker = (__cilkrts_worker*) alloca(__cilk_fake_dummy); \ +} while (0) + +#ifndef CILK_FAKE_NO_SHRINKWRAP + /* "shrink-wrap" optimization enabled. Do not initialize frame on entry, + * except to clear worker pointer. Instead, defer initialization until + * the first spawn. + */ +# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) ((void) ((sf).worker = 0)) +# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) do { \ + if (! (sf).worker) __cilk_fake_enter_frame(&(sf)); \ + } while (0) +#else + /* "shrink-wrap" optimization disabled. Initialize frame immediately on + * entry. Do not initialize frame on spawn. + */ +# define CILK_FAKE_INITIAL_ENTER_FRAME(sf) \ + __cilk_fake_enter_frame(&(sf)) +# define CILK_FAKE_DEFERRED_ENTER_FRAME(sf) ((void) &(sf)) +#endif + +/* Prologue of a spawning function. Declares and initializes the stack + * frame. + */ +#define CILK_FAKE_PROLOG() \ + __cilk_fake_stack_frame __cilk_sf; \ + CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \ + CILK_FAKE_INITIAL_ENTER_FRAME(__cilk_sf) + +/* Prologue of a spawning function where the current worker is already known. + * Declares and initializes the stack frame without looking up the worker from + * TLS. + */ +#define CILK_FAKE_PROLOG_FAST(w) \ + __cilk_fake_stack_frame __cilk_sf; \ + CILK_FAKE_FORCE_FRAME_PTR(__cilk_sf); \ + __cilk_fake_enter_frame_fast(&__cilk_sf, (w)) + +/* Simulate a cilk_sync */ +#define CILK_FAKE_SYNC() CILK_FAKE_SYNC_IMP(__cilk_sf) + +/* Epilog at the end of a spawning function. Does a sync and calls the + * runtime for leaving the frame. + */ +#ifdef __cplusplus + // Epilogue is run automatically by __cilk_fake_stack_frame destructor. +# define CILK_FAKE_EPILOG() ((void) __cilk_sf) +#else +# define CILK_FAKE_EPILOG() CILK_FAKE_CLEANUP_FRAME(__cilk_sf) +#endif // C + +/* Implementation of spawning function epilog. See CILK_FAKE_EPILOG macro and + * __cilk_fake_stack_frame destructor body. + */ +#define CILK_FAKE_CLEANUP_FRAME(sf) do { \ + if (! (sf).worker) break; \ + CILK_FAKE_SYNC_IMP(sf); \ + CILK_FAKE_POP_FRAME(sf); \ + if ((sf).flags != CILK_FAKE_VERSION_FLAG) \ + __cilkrts_leave_frame(&(sf)); \ +} while (0) + +/* Implementation of CILK_FAKE_SYNC with sf argument */ +#define CILK_FAKE_SYNC_IMP(sf) do { \ + if (__builtin_expect((sf).flags & CILK_FRAME_UNSYNCHED, 0)) { \ + (sf).parent_pedigree = (sf).worker->pedigree; \ + CILK_FAKE_SAVE_FP(sf); \ + if (! CILK_SETJMP((sf).ctx)) \ + __cilkrts_sync(&(sf)); \ + } \ + ++(sf).worker->pedigree.rank; \ +} while (0) + +/* Save the floating-point control registers. + * The definition of CILK_FAKE_SAVE_FP is compiler specific (and + * architecture specific on Windows) + */ +#ifdef _MSC_VER +# define MXCSR_OFFSET offsetof(struct __cilkrts_stack_frame, mxcsr) +# define FPCSR_OFFSET offsetof(struct __cilkrts_stack_frame, fpcsr) +# if defined(_M_IX86) +/* Windows x86 */ +# define CILK_FAKE_SAVE_FP(sf) do { \ + __asm \ + { \ + mov eax, sf \ + stmxcsr [eax+MXCSR_OFFSET] \ + fnstcw [eax+FPCSR_OFFSET] \ + } \ + } while (0) +# elif defined(_M_X64) +/* Windows Intel64 - Not needed - saved by setjmp call */ +# define CILK_FAKE_SAVE_FP(sf) ((void) sf) +# else +# error "Unknown architecture" +# endif /* Microsoft architecture specifics */ +#else +/* Non-Windows */ +# define CILK_FAKE_SAVE_FP(sf) do { \ + __asm__ ( "stmxcsr %0\n\t" \ + "fnstcw %1" : : "m" ((sf).mxcsr), "m" ((sf).fpcsr)); \ + } while (0) +#endif + +/* Call the spawn helper as part of a fake spawn */ +#define CILK_FAKE_CALL_SPAWN_HELPER(helper) do { \ + CILK_FAKE_DEFERRED_ENTER_FRAME(__cilk_sf); \ + CILK_FAKE_SAVE_FP(__cilk_sf); \ + if (__builtin_expect(! CILK_SETJMP(__cilk_sf.ctx), 1)) { \ + helper; \ + } \ +} while (0) + +/* Body of a spawn helper function. In addition to the worker and the + * expression to spawn, pass it any number of statements to be executed before + * detaching. + */ +#define CILK_FAKE_SPAWN_HELPER_BODY(parent_sf, expr, ...) \ + CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf); \ + __VA_ARGS__; \ + __cilk_fake_detach(&__cilk_sf); \ + expr; \ + CILK_FAKE_SPAWN_HELPER_EPILOG() + +/* Prolog for a spawn helper function */ +#define CILK_FAKE_SPAWN_HELPER_PROLOG(parent_sf) \ + __cilk_fake_spawn_helper_stack_frame __cilk_sf; \ + __cilk_fake_helper_enter_frame(&__cilk_sf, &(parent_sf)) + +/* Implementation of spawn helper epilog. See CILK_FAKE_SPAWN_HELPER_EPILOG + * and the __cilk_fake_spawn_helper_frame destructor. + */ +#define CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(sf) do { \ + if (! (sf).worker) break; \ + CILK_FAKE_POP_FRAME(sf); \ + __cilkrts_leave_frame(&(sf)); \ +} while (0) + +/* Epilog to execute at the end of a spawn helper */ +#ifdef __cplusplus + // Epilog handled by __cilk_fake_spawn_helper_stack_frame destructor +# define CILK_FAKE_SPAWN_HELPER_EPILOG() ((void) __cilk_sf) +#else +# define CILK_FAKE_SPAWN_HELPER_EPILOG() \ + CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(__cilk_sf) +#endif + +/* Pop the current frame off of the call chain */ +#define CILK_FAKE_POP_FRAME(sf) do { \ + (sf).worker->current_stack_frame = (sf).call_parent; \ + (sf).call_parent = 0; \ +} while (0) + +#ifdef _WIN32 +/* define macros for synching functions before allowing them to propagate. */ +# define CILK_FAKE_EXCEPT_BEGIN \ + if (0 == CILK_SETJMP(__cilk_sf.except_ctx)) { + +# define CILK_FAKE_EXCEPT_END \ + } else { \ + assert((__cilk_sf.flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING))\ + == CILK_FRAME_EXCEPTING); \ + __cilkrts_rethrow(&__cilk_sf); \ + exit(0); \ + } +#else +# define CILK_EXCEPT_BEGIN { +# define CILK_EXCEPT_END } +#endif + +#ifdef __cplusplus +// The following definitions depend on C++ features. + +// Simulate "_Cilk_spawn expr", where expr must be a function call. +// +// Note: this macro does not correctly construct function arguments. +// According to the ABI specification, function arguments should be evaluated +// before the detach and destroyed after the detach. This macro both +// evaluates and destroys them after the detach. This means that if any part +// of the function argument expression depends on a value that is modified in +// the continuation of the spawn, race will occur between the continuation and +// the argument evaluation. +// +// To work around this problem, this macro accepts an arbitrary list of +// declarations and statements (separated by semicolons) that are evaluated +// before the detach. Thus, to simulate: +// +// _Cilk_spawn f(expr); +// +// one would write: +// +// CILK_FAKE_SPAWN(f(arg), auto arg = expr); +// +// Despite appearing in the reverse order, the 'arg' variable is created and +// initialized before the detach and the call to f(arg) occurs after the +// detach. +#define CILK_FAKE_SPAWN(expr, ...) \ + CILK_FAKE_CALL_SPAWN_HELPER( \ + CILK_FAKE_SPAWN_HELPER(expr, __VA_ARGS__)(&__cilk_sf)) + +// Simulate "ret = cilk_spawn expr". See CILK_FAKE_SPAWN for constraints. +#define CILK_FAKE_SPAWN_R(ret, expr, ...) \ + CILK_FAKE_SPAWN(((ret) = (expr)), __VA_ARGS__) + +// Create a spawn helper as a C++11 lambda function. In addition to the +// expression to spawn, this macro takes a any number of statements to be +// executed before detaching. +#define CILK_FAKE_SPAWN_HELPER(expr, ...) \ + [&](__cilkrts_stack_frame *parent_sf) { \ + CILK_FAKE_SPAWN_HELPER_BODY(*parent_sf, expr, __VA_ARGS__); \ + } + +// C++ version of a __cilkrts_stack_frame for a spawning function. +// This struct is identical to __cilkrts_stack_frame except that the +// destructor automatically does frame cleanup. +struct __cilk_fake_stack_frame : __cilkrts_stack_frame +{ + // Extension of __cilkrts_stack_frame with constructor and destructor + __cilk_fake_stack_frame() { } + __forceinline ~__cilk_fake_stack_frame() { + CILK_FAKE_CLEANUP_FRAME(*this); + } +}; + +// C++ version of a __cilkrts_stack_frame for a spawn helper. +// This struct is identical to __cilkrts_stack_frame except that the +// destructor automatically does frame cleanup. +struct __cilk_fake_spawn_helper_stack_frame : __cilkrts_stack_frame +{ + // Extension of __cilkrts_stack_frame with constructor and destructor + __cilk_fake_spawn_helper_stack_frame() { worker = 0; } + __forceinline ~__cilk_fake_spawn_helper_stack_frame() { + CILK_FAKE_SPAWN_HELPER_CLEANUP_FRAME(*this); + } +}; +#else +// For C, __cilk_fake_stack_frame and __cilk_fake_spawn_helper_stack_frame are +// identical to __cilkrts_stack_frame. Frame cleanup must be performed +// excplicitly (in CILK_FAKE_EPILOG and CILK_FAKE_SPAWN_HELPER_EPILOG) +typedef __cilkrts_stack_frame __cilk_fake_stack_frame; +typedef __cilkrts_stack_frame __cilk_fake_spawn_helper_stack_frame; +#endif + +#endif // ! defined(INCLUDED_CILK_FAKE_DOT_H) diff --git a/libcilkrts/include/internal/cilk_version.h b/libcilkrts/include/internal/cilk_version.h index d0d3bc051d4..30d40393d3b 100644 --- a/libcilkrts/include/internal/cilk_version.h +++ b/libcilkrts/include/internal/cilk_version.h @@ -1,37 +1,42 @@ // cilk_version.h // -// Copyright (C) 2009-2012 -// Intel Corporation -// -// This file is part of the Intel Cilk Plus Library. This library is free -// software; you can redistribute it and/or modify it under the -// terms of the GNU General Public License as published by the -// Free Software Foundation; either version 3, or (at your option) -// any later version. -// -// This library is distributed in the hope that it will be useful, -// but WITHOUT ANY WARRANTY; without even the implied warranty of -// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -// GNU General Public License for more details. -// -// Under Section 7 of GPL version 3, you are granted additional -// permissions described in the GCC Runtime Library Exception, version -// 3.1, as published by the Free Software Foundation. -// -// You should have received a copy of the GNU General Public License and -// a copy of the GCC Runtime Library Exception along with this program; -// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -// <http://www.gnu.org/licenses/>. +// @copyright +// Copyright (C) 2009-2013 +// Intel Corporation +// +// @copyright +// This file is part of the Intel Cilk Plus Library. This library is free +// software; you can redistribute it and/or modify it under the +// terms of the GNU General Public License as published by the +// Free Software Foundation; either version 3, or (at your option) +// any later version. +// +// @copyright +// This library is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// @copyright +// Under Section 7 of GPL version 3, you are granted additional +// permissions described in the GCC Runtime Library Exception, version +// 3.1, as published by the Free Software Foundation. +// +// @copyright +// You should have received a copy of the GNU General Public License and +// a copy of the GCC Runtime Library Exception along with this program; +// see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +// <http://www.gnu.org/licenses/>. // DO NOT EDIT THIS FILE! // // It was automatically generated by cilkrts/include/internal/Makefile #define VERSION_MAJOR 2 #define VERSION_MINOR 0 -#define VERSION_BUILD 2856 +#define VERSION_BUILD 3520 #define VERSION_REV 0 -#define VERSION_STRING "2,0,2856,0" -#define VERSION_HASH "71912a126cb8" -#define VERSION_BRANCH "v13.0" +#define VERSION_STRING "2,0,3520,0" +#define VERSION_HASH "d5d11f1fb4cf" +#define VERSION_BRANCH "eng" #define TBB_REV_NUMBER "" -#define VERSION_YEAR "2012" +#define VERSION_YEAR "2013" diff --git a/libcilkrts/include/internal/metacall.h b/libcilkrts/include/internal/metacall.h index a3450eae9be..9418ad57279 100644 --- a/libcilkrts/include/internal/metacall.h +++ b/libcilkrts/include/internal/metacall.h @@ -1,28 +1,33 @@ // -*- C++ -*- /* - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * ****************************************************************************** * diff --git a/libcilkrts/include/internal/rev.mk b/libcilkrts/include/internal/rev.mk index 574df641ab7..5b86e6a863c 100644 --- a/libcilkrts/include/internal/rev.mk +++ b/libcilkrts/include/internal/rev.mk @@ -1,27 +1,36 @@ ######################################################################### # -# Copyright (C) 2011-2012 -# Intel Corporation -# -# This file is part of the Intel Cilk Plus Library. This library is free -# software; you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the -# Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# Under Section 7 of GPL version 3, you are granted additional -# permissions described in the GCC Runtime Library Exception, version -# 3.1, as published by the Free Software Foundation. -# -# You should have received a copy of the GNU General Public License and -# a copy of the GCC Runtime Library Exception along with this program; -# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -# <http://www.gnu.org/licenses/>. +# @copyright +# Copyright (C) 2011-2013 +# Intel Corporation +# +# @copyright +# This file is part of the Intel Cilk Plus Library. This library is free +# software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# @copyright +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# @copyright +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# @copyright +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. ########################################################################### -CILK_REVISION = 2856 +# DO NOT EDIT THIS FILE! +# +# It was automatically generated by cilkrts/include/internal/Makefile + +CILK_REVISION = 3520 diff --git a/libcilkrts/runtime/acknowledgements.dox b/libcilkrts/runtime/acknowledgements.dox new file mode 100644 index 00000000000..9715098ab7a --- /dev/null +++ b/libcilkrts/runtime/acknowledgements.dox @@ -0,0 +1,46 @@ +/* acknowledgements.dox
+ *
+ *************************************************************************
+ *
+ * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/
+
+/*
+ * This file contains acknowledgements of community contributions to the
+ * Cilk Plus runtime.
+ */
+
+/**
+ * @mainpage
+ *
+ * @section Acknowledgements Acknowledgements
+ *
+ * Modifications to build the Cilk Plus runtime for VxWorks provided by
+ * Brian Kuhl of Wind River.
+ */
diff --git a/libcilkrts/runtime/bug.cpp b/libcilkrts/runtime/bug.cpp index 4ed4d6e553a..1a626b7ed02 100644 --- a/libcilkrts/runtime/bug.cpp +++ b/libcilkrts/runtime/bug.cpp @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "bug.h" diff --git a/libcilkrts/runtime/bug.h b/libcilkrts/runtime/bug.h index 5117ba442a9..1732f049a65 100644 --- a/libcilkrts/runtime/bug.h +++ b/libcilkrts/runtime/bug.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -62,12 +67,31 @@ COMMON_PORTABLE extern const char *const __cilkrts_assertion_failed; #define CILK_ASSERT(ex) \ (__builtin_expect((ex) != 0, 1) ? (void)0 : \ __cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, #ex)) + +#define CILK_ASSERT_MSG(ex, msg) \ + (__builtin_expect((ex) != 0, 1) ? (void)0 : \ + __cilkrts_bug(__cilkrts_assertion_failed, __FILE__, __LINE__, \ + #ex "\n " msg)) #endif // CILK_ASSERT /** * Assert that there is no uncaught exception. + * + * Not valid on Windows or Android. + * + * On Android, calling std::uncaught_exception with the stlport library causes + * a seg fault. Since we're not supporting exceptions there at this point, + * just don't do the check. It works with the GNU STL library, but that's + * GPL V3 licensed. */ COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void); +#if defined(_WIN32) || defined(ANDROID) +# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION() +#else +# define CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION() \ + cilkbug_assert_no_uncaught_exception() +#endif + /** * Call __cilkrts_bug with a standard message that the runtime state is @@ -76,7 +100,9 @@ COMMON_PORTABLE void cilkbug_assert_no_uncaught_exception(void); COMMON_SYSDEP void abort_because_rts_is_corrupted(void); // Debugging aids -#ifdef _WIN32 +#ifndef _DEBUG +# define DBGPRINTF(_fmt, ...) +#elif defined(_WIN32) /** * Write debugging output. On windows this is written to the debugger. @@ -93,16 +119,17 @@ COMMON_SYSDEP void __cilkrts_dbgprintf(const char *fmt,...) cilk_nothrow; * @param _fmt printf-style format string. Any remaining parameters will be * be interpreted based on the format string text. */ -# ifdef _DEBUG # define DBGPRINTF(_fmt, ...) __cilkrts_dbgprintf(_fmt, __VA_ARGS__) -# else -# define DBGPRINTF(_fmt, ...) -# endif // _DEBUG -#else - // Not yet implemented on the Unix side -# define DBGPRINTF(_fmt, ...) -#endif // _WIN32 +#else /* if _DEBUG && !_WIN32 */ + /* Non-Windows debug logging. Someday we should make GetCurrentFiber() + * and GetWorkerFiber() do something. + */ +# include <stdio.h> + __CILKRTS_INLINE void* GetCurrentFiber() { return 0; } + __CILKRTS_INLINE void* GetWorkerFiber(__cilkrts_worker* w) { return 0; } +# define DBGPRINTF(_fmt, ...) fprintf(stderr, _fmt, __VA_ARGS__) +#endif // _DEBUG __CILKRTS_END_EXTERN_C diff --git a/libcilkrts/runtime/c_reducers.c b/libcilkrts/runtime/c_reducers.c index 5ed23582154..0e775ec2990 100644 --- a/libcilkrts/runtime/c_reducers.c +++ b/libcilkrts/runtime/c_reducers.c @@ -2,186 +2,51 @@ * ************************************************************************* * - * Copyright (C) 2010-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2010-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ /* Implementation of C reducers */ -#include <cilk/reducer_opadd.h> -#include <cilk/reducer_opand.h> -#include <cilk/reducer_opor.h> -#include <cilk/reducer_opxor.h> -#include <cilk/reducer_max.h> -#include <cilk/reducer_min.h> -#include <limits.h> -#include <math.h> /* HUGE_VAL */ - -#ifndef _MSC_VER -# include <stdint.h> /* WCHAR_MIN */ -#else -# include <wchar.h> /* WCHAR_MIN */ -#endif - -/* Floating-point constants */ -#ifndef HUGE_VALF - static const unsigned int __huge_valf[] = {0x7f800000}; -# define HUGE_VALF (*((const float *)__huge_valf)) -#endif - -#ifndef HUGE_VALL - static const unsigned int __huge_vall[] = {0, 0, 0x00007f80, 0}; -# define HUGE_VALL (*((const long double *)__huge_vall)) -#endif - // Disable warning about integer conversions losing significant bits. // The code is correct as is. +#ifdef __INTEL_COMPILER #pragma warning(disable:2259) +#endif -CILK_C_REDUCER_OPADD_IMP(char,char) -CILK_C_REDUCER_OPADD_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPADD_IMP(signed char,schar) -CILK_C_REDUCER_OPADD_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPADD_IMP(short,short) -CILK_C_REDUCER_OPADD_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPADD_IMP(int,int) -CILK_C_REDUCER_OPADD_IMP(unsigned int,uint) -CILK_C_REDUCER_OPADD_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPADD_IMP(long,long) -CILK_C_REDUCER_OPADD_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPADD_IMP(long long,longlong) -CILK_C_REDUCER_OPADD_IMP(unsigned long long,ulonglong) -CILK_C_REDUCER_OPADD_IMP(float,float) -CILK_C_REDUCER_OPADD_IMP(double,double) -CILK_C_REDUCER_OPADD_IMP(long double,longdouble) - -CILK_C_REDUCER_OPAND_IMP(char,char) -CILK_C_REDUCER_OPAND_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPAND_IMP(signed char,schar) -CILK_C_REDUCER_OPAND_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPAND_IMP(short,short) -CILK_C_REDUCER_OPAND_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPAND_IMP(int,int) -CILK_C_REDUCER_OPAND_IMP(unsigned int,uint) -CILK_C_REDUCER_OPAND_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPAND_IMP(long,long) -CILK_C_REDUCER_OPAND_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPAND_IMP(long long,longlong) -CILK_C_REDUCER_OPAND_IMP(unsigned long long,ulonglong) - -CILK_C_REDUCER_OPOR_IMP(char,char) -CILK_C_REDUCER_OPOR_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPOR_IMP(signed char,schar) -CILK_C_REDUCER_OPOR_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPOR_IMP(short,short) -CILK_C_REDUCER_OPOR_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPOR_IMP(int,int) -CILK_C_REDUCER_OPOR_IMP(unsigned int,uint) -CILK_C_REDUCER_OPOR_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPOR_IMP(long,long) -CILK_C_REDUCER_OPOR_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPOR_IMP(long long,longlong) -CILK_C_REDUCER_OPOR_IMP(unsigned long long,ulonglong) - -CILK_C_REDUCER_OPXOR_IMP(char,char) -CILK_C_REDUCER_OPXOR_IMP(unsigned char,uchar) -CILK_C_REDUCER_OPXOR_IMP(signed char,schar) -CILK_C_REDUCER_OPXOR_IMP(wchar_t,wchar_t) -CILK_C_REDUCER_OPXOR_IMP(short,short) -CILK_C_REDUCER_OPXOR_IMP(unsigned short,ushort) -CILK_C_REDUCER_OPXOR_IMP(int,int) -CILK_C_REDUCER_OPXOR_IMP(unsigned int,uint) -CILK_C_REDUCER_OPXOR_IMP(unsigned int,unsigned) // alternate name -CILK_C_REDUCER_OPXOR_IMP(long,long) -CILK_C_REDUCER_OPXOR_IMP(unsigned long,ulong) -CILK_C_REDUCER_OPXOR_IMP(long long,longlong) -CILK_C_REDUCER_OPXOR_IMP(unsigned long long,ulonglong) - -CILK_C_REDUCER_MAX_IMP(char,char,CHAR_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned char,uchar,0) -CILK_C_REDUCER_MAX_IMP(signed char,schar,SCHAR_MIN) -CILK_C_REDUCER_MAX_IMP(wchar_t,wchar_t,WCHAR_MIN) -CILK_C_REDUCER_MAX_IMP(short,short,SHRT_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned short,ushort,0) -CILK_C_REDUCER_MAX_IMP(int,int,INT_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned int,uint,0) -CILK_C_REDUCER_MAX_IMP(unsigned int,unsigned,0) // alternate name -CILK_C_REDUCER_MAX_IMP(long,long,LONG_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned long,ulong,0) -CILK_C_REDUCER_MAX_IMP(long long,longlong,LLONG_MIN) -CILK_C_REDUCER_MAX_IMP(unsigned long long,ulonglong,0) -CILK_C_REDUCER_MAX_IMP(float,float,-HUGE_VALF) -CILK_C_REDUCER_MAX_IMP(double,double,-HUGE_VAL) -CILK_C_REDUCER_MAX_IMP(long double,longdouble,-HUGE_VALL) -CILK_C_REDUCER_MAX_INDEX_IMP(char,char,CHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned char,uchar,0) -CILK_C_REDUCER_MAX_INDEX_IMP(signed char,schar,SCHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(wchar_t,wchar_t,WCHAR_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(short,short,SHRT_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned short,ushort,0) -CILK_C_REDUCER_MAX_INDEX_IMP(int,int,INT_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,uint,0) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned int,unsigned,0) // alternate name -CILK_C_REDUCER_MAX_INDEX_IMP(long,long,LONG_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long,ulong,0) -CILK_C_REDUCER_MAX_INDEX_IMP(long long,longlong,LLONG_MIN) -CILK_C_REDUCER_MAX_INDEX_IMP(unsigned long long,ulonglong,0) -CILK_C_REDUCER_MAX_INDEX_IMP(float,float,-HUGE_VALF) -CILK_C_REDUCER_MAX_INDEX_IMP(double,double,-HUGE_VAL) -CILK_C_REDUCER_MAX_INDEX_IMP(long double,longdouble,-HUGE_VALL) +#define CILK_C_DEFINE_REDUCERS -CILK_C_REDUCER_MIN_IMP(char,char,CHAR_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned char,uchar,CHAR_MIN) -CILK_C_REDUCER_MIN_IMP(signed char,schar,SCHAR_MAX) -CILK_C_REDUCER_MIN_IMP(wchar_t,wchar_t,WCHAR_MAX) -CILK_C_REDUCER_MIN_IMP(short,short,SHRT_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned short,ushort,USHRT_MAX) -CILK_C_REDUCER_MIN_IMP(int,int,INT_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned int,uint,UINT_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned int,unsigned,UINT_MAX) // alternate name -CILK_C_REDUCER_MIN_IMP(long,long,LONG_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned long,ulong,ULONG_MAX) -CILK_C_REDUCER_MIN_IMP(long long,longlong,LLONG_MAX) -CILK_C_REDUCER_MIN_IMP(unsigned long long,ulonglong,ULLONG_MAX) -CILK_C_REDUCER_MIN_IMP(float,float,HUGE_VALF) -CILK_C_REDUCER_MIN_IMP(double,double,HUGE_VAL) -CILK_C_REDUCER_MIN_IMP(long double,longdouble,HUGE_VALL) -CILK_C_REDUCER_MIN_INDEX_IMP(char,char,CHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned char,uchar,CHAR_MIN) -CILK_C_REDUCER_MIN_INDEX_IMP(signed char,schar,SCHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(wchar_t,wchar_t,WCHAR_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(short,short,SHRT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned short,ushort,USHRT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(int,int,INT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,uint,UINT_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned int,unsigned,UINT_MAX) // alternate name -CILK_C_REDUCER_MIN_INDEX_IMP(long,long,LONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long,ulong,ULONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(long long,longlong,LLONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(unsigned long long,ulonglong,ULLONG_MAX) -CILK_C_REDUCER_MIN_INDEX_IMP(float,float,HUGE_VALF) -CILK_C_REDUCER_MIN_INDEX_IMP(double,double,HUGE_VAL) -CILK_C_REDUCER_MIN_INDEX_IMP(long double,longdouble,HUGE_VALL) +#include <cilk/reducer_opadd.h> +#include <cilk/reducer_opand.h> +#include <cilk/reducer_opmul.h> +#include <cilk/reducer_opor.h> +#include <cilk/reducer_opxor.h> +#include <cilk/reducer_min_max.h> /* End reducer_opadd.c */ diff --git a/libcilkrts/runtime/cilk-abi-cilk-for.cpp b/libcilkrts/runtime/cilk-abi-cilk-for.cpp index a584f86c2ca..89c3d5cd5b2 100644 --- a/libcilkrts/runtime/cilk-abi-cilk-for.cpp +++ b/libcilkrts/runtime/cilk-abi-cilk-for.cpp @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2011, 2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ @@ -114,6 +119,9 @@ void call_cilk_for_loop_body(count_t low, count_t high, __cilkrts_worker *w, __cilkrts_pedigree *loop_root_pedigree) { + // Cilkscreen should not report this call in a stack trace + __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0); + // The worker is only valid until the first spawn. Fetch the // __cilkrts_stack_frame out of the worker, since it will be stable across // steals. The sf pointer actually points to the *parent's* @@ -201,7 +209,6 @@ capture_spawn_arg_stack_frame(__cilkrts_stack_frame* &sf, __cilkrts_worker* w) return w; } - /* * cilk_for_recursive * @@ -225,6 +232,10 @@ void cilk_for_recursive(count_t low, count_t high, __cilkrts_pedigree *loop_root_pedigree) { tail_recurse: + // Cilkscreen should not report this call in a stack trace + // This needs to be done everytime the worker resumes + __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0); + count_t count = high - low; // Invariant: count > 0, grain >= 1 if (count > grain) @@ -269,6 +280,9 @@ static void noop() { } template <typename count_t, typename F> static void cilk_for_root(F body, void *data, count_t count, int grain) { + // Cilkscreen should not report this call in a stack trace + __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0); + // Pedigree computation: // // If the last pedigree node on entry to the _Cilk_for has value X, @@ -353,6 +367,9 @@ extern "C" { CILK_ABI_THROWS_VOID __cilkrts_cilk_for_32(__cilk_abi_f32_t body, void *data, cilk32_t count, int grain) { + // Cilkscreen should not report this call in a stack trace + __notify_zc_intrinsic((char *)"cilkscreen_hide_call", 0); + // Check for an empty range here as an optimization - don't need to do any // __cilkrts_stack_frame initialization if (count > 0) diff --git a/libcilkrts/runtime/cilk-abi-vla-internal.c b/libcilkrts/runtime/cilk-abi-vla-internal.c new file mode 100644 index 00000000000..2669ed37d75 --- /dev/null +++ b/libcilkrts/runtime/cilk-abi-vla-internal.c @@ -0,0 +1,78 @@ +/* cilk-abi-vla-internal.c -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +/* + * These functions are provided in their own compilation unit so I can debug + * them. cilk-abi-vla.c must always be compiled with optimization on so that + * inlining occurs. + */ + +#include "internal/abi.h" +#include "cilk-abi-vla-internal.h" +#include "bug.h" +#include "full_frame.h" +#include "local_state.h" + +#include <stdlib.h> +#include <stdint.h> + +#include "bug.h" + +void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf, + size_t full_size, + uint32_t align) +{ + return malloc(full_size); +} + +void vla_internal_heap_free(void *t, size_t size) +{ + free(t); +} + +void vla_free_from_original_stack(__cilkrts_stack_frame *sf, + size_t full_size) +{ + // The __cilkrts_stack_frame must be initialized + CILK_ASSERT(sf->worker); + +#if 1 + // Add full_size to ff->sync_sp so that when we return, the VLA will no + // longer be allocated on the stack + __cilkrts_adjust_stack(sf->worker->l->frame_ff, full_size); +#else + // Inline __cilkrts_adjust_stack for Kevin + full_frame *ff = sf->worker->l->frame_ff; + ff->sync_sp = ff->sync_sp + full_size; +#endif +} diff --git a/libcilkrts/runtime/cilk-abi-vla-internal.h b/libcilkrts/runtime/cilk-abi-vla-internal.h new file mode 100644 index 00000000000..f8d3c5aaa89 --- /dev/null +++ b/libcilkrts/runtime/cilk-abi-vla-internal.h @@ -0,0 +1,85 @@ +/* cilk-abi-vla-internal.h -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +/** + * @file cilk-abi-vla-internal.h + * + * @brief Allocation/deallocation function for use with Variable Length + * Arrays in spawning functions. + * + * These should be the only functions in the Cilk runtime allocating memory + * from the standard C runtime heap. This memory will be provided to user + * code for use in VLAs, when the memory cannot be allocated from the stack. + * + * While these functions are simply passthroughs to malloc and free at the + * moment, once we've got the basics of VLA allocations working we'll make + * them do fancier tricks. + */ + +/** + * @brief Allocate memory from the heap for use by a Variable Length Array in + * a spawning function. + * + * @param sf The __cilkrts_stack_frame for the spawning function containing + * the VLA. + * @param full_size The number of bytes to be allocated, including any tags + * needed to identify this as allocated from the heap. + * @param align Any alignment necessary for the allocation. + */ + +void *vla_internal_heap_alloc(__cilkrts_stack_frame *sf, + size_t full_size, + uint32_t align); + +/** + * @brief Deallocate memory from the heap used by a Variable Length Array in + * a spawning function. + * + * @param t The address of the memory block to be freed. + * @param size The size of the memory block to be freed. + */ + +void vla_internal_heap_free(void *t, + size_t size); + +/** + * @brief Deallocate memory from the original stack. We'll do this by adding + * full_size to ff->sync_sp. So after the sync, the Variable Length Array + * will no longer be allocated on the stack. + * + * @param sf The __cilkrts_stack_frame for the spawning function that is + * deallocating a VLA. + * @param full_size The size of the VLA, including any alignment and tags. + */ +void vla_free_from_original_stack(__cilkrts_stack_frame *sf, + size_t full_size); diff --git a/libcilkrts/runtime/cilk-abi-vla.c b/libcilkrts/runtime/cilk-abi-vla.c new file mode 100644 index 00000000000..9de1f9f3670 --- /dev/null +++ b/libcilkrts/runtime/cilk-abi-vla.c @@ -0,0 +1,417 @@ +/* cilk-abi-vla.cpp -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + * + **************************************************************************/ + +/* + * Implementation of Variable Length Array (VLA) ABI. + * + * __cilkrts_stack_alloc() and __cilkrts_stack_free must be compiled + * such that ebp/rbp is used for the stack frames. This is done by having + * each of them use alloca, which forces the special frame types needed on + * each of the ABIs. Additionally, for some forms of stack frame, special + * care must be taken because the alloca space may not be at the bottom of the + * stack frame of the caller. For Intel64 windows, and for some options + * with other ABIs, a preallocated parameter block may exist on the stack + * at a lower address than the alloca. If this is the case, the parameter + * distance_from_sp_to_alloca_area will be non-zero, and will indicate how + * much pre-allocated parameter space resides in the caller's stack frame + * between the alloca area, and the bottom of the stack when the call to + * the cilkrts is made. As such, when non-zero it also includes any space + * used for passing the cilkrts_stack_alloc or cilkrts_stack_free parameters. + */ + +#include <assert.h> +#include <stdlib.h> +#include <stdint.h> +#ifdef _WIN32 +# define alloca _alloca +# define INLINE static __inline +# pragma warning(disable:1025) // Don't whine about zero extending result of unary operation +#else +# include <alloca.h> +# define INLINE static inline +#endif + +#include "internal/abi.h" +#include "cilk-abi-vla-internal.h" + +#if defined(__x86_64) || defined(_M_X64) +INLINE void setsp(void *val) +{ + __asm__("movq %0, %%rsp" : : "r"(val): "rsp"); +} +INLINE char* getsp(void) +{ + void *res; + + __asm__("movq %%rsp, %0" : "=r"(res): : "rsp"); + return res; +} +INLINE char* getbp(void) +{ + void *res; + + __asm__("movq %%rbp, %0" : "=r"(res): : "rbp"); + return res; +} +INLINE void copy_frame_down_and_move_bp( + char *dst, + char *src, + size_t cpy_bytes, + char *new_ebp +) +{ + // In this version, dst is guaranteed to be lower address than src, + // therefore copying upwards from src into dst is safe in case + // there is overlap. The number of bytes is also guaranteed to be + // a multiple of 8, and the copy is done in 64 bit word chunks for + // best efficiency. + __asm__( + "movq %0, %%rdi;" + "movq %1, %%rsi;" + "movq %2, %%rcx;" + "shrq $3, %%rcx;" + "rep movsq;" + "movq %3, %%rbp" : + : + "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) : + "rsi", "rdi", "rcx", "rbp", "memory"); +} +INLINE void copy_frame_up_and_move_bp( + char *dst, + char *src, + size_t cpy_bytes, + char *new_ebp +) +{ + // In this version, dst is guaranteed to be higher address than src, + // therefore copying downwards from src into dst is safe in case + // there is overlap. The number of bytes is also guaranteed to be + // a multiple of 8, and the copy is done in 64 bit word chunks for + // best efficiency. + dst += cpy_bytes - 8; + src += cpy_bytes - 8; + __asm__( + "movq %0, %%rdi;" + "movq %1, %%rsi;" + "movq %2, %%rcx;" + "shrq $3, %%rcx;" + "std; rep movsq; cld;" + "movl %3, %%rbp;" : + : + "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) : + "rsi", "rdi", "rcx", "rbp", "memory"); +} +#else +INLINE void setsp(void *val) +{ + __asm__("movl %0, %%esp" : : "r"(val): "esp"); +} +INLINE char* getsp(void) +{ + void *res; + + __asm__("movl %%esp, %0" : "=r"(res): : "esp"); + return res; +} +INLINE char* getbp(void) +{ + void *res; + + __asm__("movl %%ebp, %0" : "=r"(res): : "ebp"); + return res; +} +INLINE void copy_frame_down_and_move_bp( + char *dst, + char *src, + size_t cpy_bytes, + char *new_ebp +) +{ + // In this version, dst is guaranteed to be lower address than src, + // therefore copying upwards from src into dst is safe in case + // there is overlap. The number of bytes is also guaranteed to be + // a multiple of 4, and the copy is done in 32 bit word chunks for + // best efficiency. + __asm__( + "movl %0, %%edi;" + "movl %1, %%esi;" + "movl %2, %%ecx;" + "shrl $2, %%ecx;" + "rep movsd;" + "movl %3, %%ebp" : + : + "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) : + "esi", "edi", "ecx", "ebp", "memory"); +} +INLINE void copy_frame_up_and_move_bp( + char *dst, + char *src, + size_t cpy_bytes, + char *new_ebp +) +{ + // In this version, dst is guaranteed to be higher address than src, + // therefore copying downwards from src into dst is safe in case + // there is overlap. The number of bytes is also guaranteed to be + // a multiple of 4, and the copy is done in 32 bit word chunks for + // best efficiency. + dst += cpy_bytes - 4; + src += cpy_bytes - 4; + __asm__( + "movl %0, %%edi;" + "movl %1, %%esi;" + "movl %2, %%ecx;" + "shrl $2, %%ecx;" + "std; rep movsd; cld;" + "movl %3, %%ebp" : + // "=D"(dst), "=S"(src), "=C"(cpy_bytes) : + : + "rm"(dst), "rm"(src), "rm"(cpy_bytes), "rm"(new_ebp) : + "esi", "edi", "ecx", "ebp", "memory"); +} +#endif + + +#define c_cilk_ptr_from_heap 0xc2f2f00d +#define c_cilk_ptr_from_stack 0xc3f30d0f + +CILK_ABI(__cilkrts_void_ptr) +__cilkrts_stack_alloc( + __cilkrts_stack_frame *sf, + size_t size, + size_t distance_from_sp_to_alloca_area, + uint32_t align, // align is always >= minimum stack alignment and + // >= ptr_size as well, and must be a power of 2. + uint32_t needs_tag // non-zero if the pointer being returned needs to + // be tagged +) +{ +#ifdef __INTEL_COMPILER + // full_size will be a multiple of align, and contains + // enough extra space to allocate a marker. + size_t full_size = (size + align - 1) & ~(align - 1); + + if (needs_tag) { + full_size += align; + } + + char *t; + if (sf->worker != 0 && + ((sf->flags & CILK_FRAME_UNSYNCHED) != 0)) { + t = vla_internal_heap_alloc(sf, full_size, align); + if (needs_tag) { + t += align; + ((uint32_t*)t)[-1] = c_cilk_ptr_from_heap; + } + return (void *)t; + } + + // stack is still synced, allocate full_size from esp, + // and record in 32 bits immediately below the space + // allocated that this was space that this was + // allocated in the stack. + char *old_ebp = getbp(); + char *old_esp = getsp(); + + // make top_ptr point to base of first parameter. + char *top_ptr = ((char *)(_AddressOfReturnAddress()) + + sizeof(char *)); + size_t param_size = 0; + +#if defined(__x86_64) + // For Intel64 linux & MACH ABI, all the parameters were passed in + // register, so top of the stack frame above the return address + // is just the size of the return address plus + // distance_from_sp_to_alloca_area on the chance that the alloca + // area isn't at the very bottom of the calling functions stack. +#elif defined(__MACH__) + // For ia32 MACH, parameter size is always a mutliple of 16 + // bytes to keep the stack 16 byte aligned. So we need to round + // number of parameters up to multiple of 4. + param_size = 8 * sizeof(char *); +#else + // For both windows Intel64 ABI, and the IA32 windows and + // linux ABIs, space is reserved on the stack for all these + // parameters. param_size is 5 * size of a stack slot. + param_size = 5 * sizeof(char *); +#endif + + // now make top_ptr point above the params, or if + // distance_from_sp_to_alloca_area is not zero, make + // it point above that area. When non-zero, + // distance_from_sp_to_alloca area is expected to contain + // the parameter space, so we only add one or the other, + // not both. + top_ptr += (distance_from_sp_to_alloca_area != 0) ? + distance_from_sp_to_alloca_area : param_size; + + // t needs to end up at current value of top_ptr less full_size and less + // distance_from_sp_to_alloca_area and + // then rounded down to the alignment needed. Then we have to bump + // esp down by current frame_size, so that when all is done with respect + // to executing the return sequence, the final value of esp will be the + // same value as t. + t = (top_ptr - full_size) - distance_from_sp_to_alloca_area; + intptr_t temp = (intptr_t)t; + temp &= ~((intptr_t)(align - 1)); + t = (char *)temp; + + // ok, the value of t is set where we need it. Now set esp + // to the value of t less the current frame size. + // So now when we do regular return esp should be left such + // that it has moved down by full_size. + size_t cur_fm_size = (top_ptr - old_esp); + char *new_esp = t - cur_fm_size; + char *new_ebp = old_ebp - (old_esp - new_esp); + + // extend the stack down by at least the difference between where + // I want it to be and where it currently is. This should take care + // of touching any pages necessary. + char *foo = alloca(old_esp - new_esp); + setsp(foo < new_esp ? foo : new_esp); + + // Now set esp exactly where I want it. + // setsp(new_esp); + + copy_frame_down_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp); + + if (needs_tag) { + t += align; + ((uint32_t*)t)[-1] = c_cilk_ptr_from_stack; + } + + return t; +#else // Not __INTEL_COMPILER + // Not supported unless we can figure out how to get the size of the frame + return NULL; +#endif +} + +// This frees the space allocated for a variable length array. +CILK_ABI(void) +__cilkrts_stack_free( + __cilkrts_stack_frame *sf, + void *p, + size_t size, + size_t distance_from_sp_to_alloca_area, + uint32_t align, // same requirements as for align in allocation, + // and must match alignment that was passed when + // doing the allocation + uint32_t known_from_stack // non-zero if this is known to be allocated + // on the stack, and therefore has no tag +) +{ +#ifdef __INTEL_COMPILER + uint32_t *t = (uint32_t*)p; + + // full_size will be a multiple of align, and contains + // enough extra space to allocate a marker if one was needed. + size_t full_size = (size + align - 1) & ~(align - 1); + if (known_from_stack == 0) { + // if the compiler hasn't told the run-time that this is + // known to be on the stack, then this pointer must have been + // tagged such that the run-time can tell. + assert(t[-1] == c_cilk_ptr_from_stack || + t[-1] == c_cilk_ptr_from_heap); + + known_from_stack = t[-1] == c_cilk_ptr_from_stack; + full_size += align; // accounts for extra space for marker + t = (uint32_t *)(((char *)t) - align); + } + + if (known_from_stack) { + // alloca useage forces an ebp/rbp based stack frame even though + // 0 and unused. + char *foo = alloca(0); + if (sf->worker == 0 || (sf->flags & CILK_FRAME_UNSYNCHED) == 0) { + // p was allocated from current stack frame and we + // are synced on current stack frame. Return the + // amount of the stack that needs to be freed. + char *old_ebp = getbp(); + char *old_esp = getsp(); + + // make top_ptr point to base of first parameter. + char *top_ptr = ((char *)(_AddressOfReturnAddress()) + + sizeof(char *)); + size_t param_size = 0; + +#if defined(__x86_64) + // For Intel64 linux & MACH ABI, all the parameters were passed in + // register, so top of the stack frame above the return address + // is just the size of the return address plus + // distance_from_sp_to_alloca_area on the chance that the alloca + // area isn't at the very bottom of the calling functions stack. +#elif defined(__MACH__) + // For ia32 MACH, parameter size is always a mutliple of 16 + // bytes to keep the stack 16 byte aligned. So we need to round + // number of parameters up to multiple of 4. + param_size = 8 * sizeof(char *); +#else + // For both windows Intel64 ABI, and the IA32 windows and + // linux ABIs, space is reserved on the stack for all these + // parameters. param_size is 5 * size of a stack slot. + param_size = 6 * sizeof(char *); +#endif + + // now make top_ptr point above the params, or if + // distance_from_sp_to_alloca_area is not zero, make + // it point above that area. When non-zero, + // distance_from_sp_to_alloca area is expected to contain + // the parameter space, so we only add one or the other, + // not both. + top_ptr += (distance_from_sp_to_alloca_area != 0) ? + distance_from_sp_to_alloca_area : param_size; + + size_t cur_fm_size = (top_ptr - old_esp); + char *new_esp = old_esp + full_size; + char *new_ebp = old_ebp + full_size; + + copy_frame_up_and_move_bp(new_esp, old_esp, cur_fm_size, new_ebp); + setsp(new_esp); + } + else { + // p was allocated on stack frame, but that is + // no longer the current stack frame. Need to adjust the + // saved esp that is somewhere in the cilk runtime so that + // on sync, esp will be cut back correctly. + vla_free_from_original_stack(sf, full_size); + } + } + else { + vla_internal_heap_free(t, full_size); + } +#else // Not __INTEL_COMPILER + // Not supported unless we can figure out how to get the size of the frame +#endif +} diff --git a/libcilkrts/runtime/cilk-abi.c b/libcilkrts/runtime/cilk-abi.c index fb525154862..be7e1497561 100644 --- a/libcilkrts/runtime/cilk-abi.c +++ b/libcilkrts/runtime/cilk-abi.c @@ -2,31 +2,43 @@ * ************************************************************************* * - * Copyright (C) 2010-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2010-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ +/** + * @file cilk-abi.c + * + * @brief cilk-abi.c implements all of the entrypoints to the Intel Cilk + * Plus runtime. + */ + /* * Define this macro so that compiliation of this file generates the * non-inlined versions of certain functions in cilk_api.h. @@ -35,7 +47,6 @@ #include "cilk/cilk_api.h" #include "cilk/cilk_undocumented.h" #include "cilktools/cilkscreen.h" -#include "internal/inspector-abi.h" #include "global_state.h" #include "os.h" @@ -48,6 +59,7 @@ #include "sysdep.h" #include "except.h" #include "cilk_malloc.h" +#include "record-replay.h" #include <errno.h> #include <string.h> @@ -72,7 +84,14 @@ void * _ReturnAddress(void); #define TBB_INTEROP_DATA_DELAYED_UNTIL_BIND (void *)-1 -// ABI version +/** + * __cilkrts_bind_thread is a versioned entrypoint. The runtime should be + * exporting copies of __cilkrts_bind_version for the current and all previous + * versions of the ABI. + * + * This macro should always be set to generate a version to match the current + * version; __CILKRTS_ABI_VERSION. + */ #define BIND_THREAD_RTN __cilkrts_bind_thread_1 static inline @@ -125,7 +144,21 @@ CILK_ABI_VOID __cilkrts_enter_frame_fast_1(__cilkrts_stack_frame *sf) sf->reserved = 0; } -/* Return true if undo-detach failed. */ +/** + * A component of the THE protocol. __cilkrts_undo_detach checks whether + * this frame's parent has been stolen. If it hasn't, the frame can return + * normally. If the parent has been stolen, of if we suspect it might be, + * then __cilkrts_leave_frame() needs to call into the runtime. + * + * @note __cilkrts_undo_detach() is comparing the exception pointer against + * the tail pointer. The exception pointer is modified when another worker + * is considering whether it can steal a frame. The head pointer is updated + * to match when the worker lock is taken out and the thief is sure that + * it can complete the steal. If the steal cannot be completed, the thief + * will restore the exception pointer. + * + * @return true if undo-detach failed. + */ static int __cilkrts_undo_detach(__cilkrts_stack_frame *sf) { __cilkrts_worker *w = sf->worker; @@ -196,7 +229,9 @@ CILK_ABI_VOID __cilkrts_leave_frame(__cilkrts_stack_frame *sf) #ifndef _WIN32 if (__builtin_expect(sf->flags & CILK_FRAME_EXCEPTING, 0)) { - update_pedigree_on_leave_frame(w, sf); +// Pedigree will be updated in __cilkrts_leave_frame. We need the +// pedigree before the update for record/replay +// update_pedigree_on_leave_frame(w, sf); __cilkrts_return_exception(sf); /* If return_exception returns the caller is attached. leave_frame is called from a cleanup (destructor) @@ -205,13 +240,19 @@ CILK_ABI_VOID __cilkrts_leave_frame(__cilkrts_stack_frame *sf) return; } #endif + + // During replay, check whether w was the last worker to continue + replay_wait_for_steal_if_parent_was_stolen(w); + + // Attempt to undo the detach if (__builtin_expect(__cilkrts_undo_detach(sf), 0)) { - // The update of pedigree for leaving the frame occurs - // inside this call if it does not return. + // The update of pedigree for leaving the frame occurs + // inside this call if it does not return. __cilkrts_c_THE_exception_check(w, sf); } - update_pedigree_on_leave_frame(w, sf); + update_pedigree_on_leave_frame(w, sf); + /* This path is taken when undo-detach wins the race with stealing. Otherwise this strand terminates and the caller will be resumed via setjmp at sync. */ @@ -247,11 +288,6 @@ CILK_ABI_VOID __cilkrts_sync(__cilkrts_stack_frame *sf) } #endif - /* Save return address so we can report it to Piersol. */ -#ifdef _WIN32 - w->l->sync_return_address = _ReturnAddress(); -#endif - __cilkrts_c_sync(w, sf); } @@ -322,7 +358,9 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void) { __cilkrts_worker *w; int start_cilkscreen = 0; +#ifdef USE_ITTNOTIFY static int unique_obj; +#endif // Cannot set this pointer until after __cilkrts_init_internal() call: global_state_t* g; @@ -350,15 +388,19 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void) __cilkrts_cilkscreen_establish_worker(w); { full_frame *ff = __cilkrts_make_full_frame(w, 0); - ff->stack_self = sysdep_make_user_stack(w); - tbb_interop_use_saved_stack_op_info(w, ff->stack_self); - w->l->user_thread_imported = 0; + + ff->fiber_self = cilk_fiber_allocate_from_thread(); + CILK_ASSERT(ff->fiber_self); + + cilk_fiber_set_owner(ff->fiber_self, w); + cilk_fiber_tbb_interop_use_saved_stack_op_info(ff->fiber_self); + CILK_ASSERT(ff->join_counter == 0); ff->join_counter = 1; w->l->frame_ff = ff; w->reducer_map = __cilkrts_make_reducer_map(w); __cilkrts_set_leftmost_reducer_map(w->reducer_map, 1); - load_pedigree_leaf_into_user_worker(w); + load_pedigree_leaf_into_user_worker(w); } // Make sure that the head and tail are reset, and saved_protected_tail @@ -371,10 +413,32 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void) CILK_ASSERT(w->tail == w->l->ltq); CILK_ASSERT(w->protected_tail == w->ltq_limit); - if (0 != __cilkrts_sysdep_bind_thread(w)) - // User thread couldn't be bound (probably because of a lack of - // resources). Continue, but don't allow stealing from this user - // thread. + // There may have been an old pending exception which was freed when the + // exception was caught outside of Cilk + w->l->pending_exception = NULL; + + w->reserved = NULL; + + // If we've already created a scheduling fiber for this worker, we'll just + // reuse it. If w->self < 0, it means that this is an ad-hoc user worker + // not known to the global state. Thus, we need to create a scheduling + // stack only if we don't already have one and w->self >= 0. + if (NULL == w->l->scheduling_fiber && w->self >= 0) + { + START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) { + // Create a scheduling fiber for this worker. + w->l->scheduling_fiber = + cilk_fiber_allocate_from_heap(CILK_SCHEDULING_STACK_SIZE); + cilk_fiber_reset_state(w->l->scheduling_fiber, + scheduler_fiber_proc_for_user_worker); + cilk_fiber_set_owner(w->l->scheduling_fiber, w); + } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE); + } + + // If the scheduling fiber is NULL, we've either exceeded our quota for + // fibers or workers or we're out of memory, so we should lose parallelism + // by disallowing stealing. + if (NULL == w->l->scheduling_fiber) __cilkrts_disallow_stealing(w, NULL); start_cilkscreen = (0 == w->g->Q); @@ -420,8 +484,8 @@ CILK_ABI_WORKER_PTR BIND_THREAD_RTN(void) * * For Windows, the aliased symbol is exported in cilk-exports.def. */ -#ifdef _DARWIN_C_SOURCE -/* +#if defined(_DARWIN_C_SOURCE) || defined(__APPLE__) +/** * Mac OS X: Unfortunately, Darwin doesn't allow aliasing, so we just make a * call and hope the optimizer does the right thing. */ @@ -429,18 +493,27 @@ CILK_ABI_WORKER_PTR __cilkrts_bind_thread (void) { return BIND_THREAD_RTN(); } #else -/* + +/** + * Macro to convert a parameter to a string. Used on Linux or BSD. + */ +#define STRINGIFY(x) #x + +/** + * Macro to generate an __attribute__ for an aliased name + */ +#define ALIASED_NAME(x) __attribute__ ((alias (STRINGIFY(x)))) + +/** * Linux or BSD: Use the alias attribute to make the labels for the versioned * functions point to the same place in the code as the original. Using * the two macros is annoying but required. */ -#define STRINGIFY(x) #x -#define ALIASED_NAME(x) __attribute__ ((alias (STRINGIFY(x)))) CILK_ABI_WORKER_PTR __cilkrts_bind_thread(void) ALIASED_NAME(BIND_THREAD_RTN); -#endif // defined _DARWIN_C_SOURCE +#endif // defined _DARWIN_C_SOURCE || defined __APPLE__ #endif // !defined _MSC_VER CILK_API_SIZET @@ -464,60 +537,6 @@ CILK_API_VOID __cilkrts_dump_stats(void) global_os_mutex_unlock(); } -/* - * __cilkrts_get_stack_region_id - * - * Interface called by Inspector (Piersol) - * - * Returns a __cilkrts_region_id for the stack currently executing on a thread. - * Returns NULL on failure. - */ - -CILK_INSPECTOR_ABI(__cilkrts_region_id) -__cilkrts_get_stack_region_id(__cilkrts_thread_id thread_id) -{ - global_state_t *g = cilkg_get_global_state(); - int i; - - if (NULL == g) - return NULL; - - for (i = 0; i < g->total_workers; i++) - { - if (WORKER_FREE != g->workers[i]->l->type) - { - if (__cilkrts_sysdep_is_worker_thread_id(g, i, thread_id)) - return (__cilkrts_region_id)g->workers[i]->l->frame_ff->stack_self; - } - } - - return NULL; -} - -/* - * __cilkrts_get_stack_region_properties - * - * Interface called by Inspector (Piersol) - * - * Fills in the properties for a region_id. - * - * Returns false on invalid region_id or improperly sized - * __cilkrts_region_properties - */ - -CILK_INSPECTOR_ABI(int) -__cilkrts_get_stack_region_properties(__cilkrts_region_id region_id, - __cilkrts_region_properties *properties) -{ - if (NULL == properties) - return 0; - - if (properties->size != sizeof(__cilkrts_region_properties)) - return 0; - - return __cilkrts_sysdep_get_stack_region_properties((__cilkrts_stack *)region_id, properties); -} - #ifndef _WIN32 CILK_ABI_THROWS_VOID __cilkrts_rethrow(__cilkrts_stack_frame *sf) { @@ -535,16 +554,15 @@ static __cilk_tbb_retcode __cilkrts_unwatch_stack(void *data) { __cilk_tbb_stack_op_thunk o; - // If the __cilkrts_stack wasn't available fetch it now + // If the cilk_fiber wasn't available fetch it now if (TBB_INTEROP_DATA_DELAYED_UNTIL_BIND == data) { - __cilkrts_stack *sd; full_frame *ff; __cilkrts_worker *w = __cilkrts_get_tls_worker(); if (NULL == w) { // Free any saved stack op information - tbb_interop_free_stack_op_info(); + cilk_fiber_tbb_interop_free_stack_op_info(); return 0; /* Success! */ } @@ -552,30 +570,28 @@ static __cilk_tbb_retcode __cilkrts_unwatch_stack(void *data) __cilkrts_worker_lock(w); ff = w->l->frame_ff; __cilkrts_frame_lock(w,ff); - data = ff->stack_self; + data = ff->fiber_self; __cilkrts_frame_unlock(w,ff); __cilkrts_worker_unlock(w); } #if CILK_LIB_DEBUG /* Debug code */ /* Get current stack */ - __cilkrts_stack *sd; full_frame *ff; __cilkrts_worker *w = __cilkrts_get_tls_worker(); __cilkrts_worker_lock(w); ff = w->l->frame_ff; __cilkrts_frame_lock(w,ff); - sd = ff->stack_self; - CILK_ASSERT (data==sd); + CILK_ASSERT (data == ff->fiber_self); __cilkrts_frame_unlock(w,ff); __cilkrts_worker_unlock(w); #endif /* Clear the callback information */ o.data = NULL; - o.routine = NULL; - __cilkrts_set_stack_op( (struct __cilkrts_stack*)data, o ); - + o.routine = NULL; + cilk_fiber_set_stack_op((cilk_fiber*)data, o); + // Note. Do *NOT* free any saved stack information here. If they want to // free the saved stack op information, they'll do it when the thread is // unbound @@ -597,7 +613,7 @@ CILK_API_TBB_RETCODE __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u, __cilk_tbb_stack_op_thunk o) { - __cilkrts_stack *sd; + cilk_fiber* current_fiber; __cilkrts_worker *w; #ifdef _MSC_VER @@ -612,8 +628,8 @@ __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u, { // Save data for later. We'll deal with it when/if this thread binds // to the runtime - tbb_interop_save_stack_op_info(o); - + cilk_fiber_tbb_interop_save_stack_op_info(o); + u->routine = __cilkrts_unwatch_stack; u->data = TBB_INTEROP_DATA_DELAYED_UNTIL_BIND; @@ -622,7 +638,7 @@ __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u, /* Get current stack */ __cilkrts_worker_lock(w); - sd = w->l->frame_ff->stack_self; + current_fiber = w->l->frame_ff->fiber_self; __cilkrts_worker_unlock(w); /* CILK_ASSERT( !sd->stack_op_data ); */ @@ -630,9 +646,9 @@ __cilkrts_watch_stack(__cilk_tbb_unwatch_thunk *u, /* Give TBB our callback */ u->routine = __cilkrts_unwatch_stack; - u->data = sd; + u->data = current_fiber; /* Save the callback information */ - __cilkrts_set_stack_op( sd, o ); + cilk_fiber_set_stack_op(current_fiber, o); return 0; /* Success! */ } @@ -702,4 +718,11 @@ __cilkrts_bump_loop_rank_internal(__cilkrts_worker* w) return 0; } +CILK_ABI_VOID +__cilkrts_save_fp_ctrl_state(__cilkrts_stack_frame *sf) +{ + // Pass call onto OS/architecture dependent function + sysdep_save_fp_ctrl_state(sf); +} + /* end cilk-abi.c */ diff --git a/libcilkrts/runtime/cilk-ittnotify.h b/libcilkrts/runtime/cilk-ittnotify.h index 498aa700a3b..cf28a6591ae 100644 --- a/libcilkrts/runtime/cilk-ittnotify.h +++ b/libcilkrts/runtime/cilk-ittnotify.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2013 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #ifndef INCLUDED_CILK_ITTNOTIFY_DOT_H @@ -33,6 +38,11 @@ #endif #include <stdio.h> +// ITTNOTIFY does not support ARM at this time +#ifdef __arm__ +#undef USE_ITTNOTIFY +#endif + #ifdef USE_ITTNOTIFY #include <ittnotify.h> diff --git a/libcilkrts/runtime/cilk-tbb-interop.h b/libcilkrts/runtime/cilk-tbb-interop.h index 2972f037292..0ff501b6ddb 100644 --- a/libcilkrts/runtime/cilk-tbb-interop.h +++ b/libcilkrts/runtime/cilk-tbb-interop.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** diff --git a/libcilkrts/runtime/cilk_api.c b/libcilkrts/runtime/cilk_api.c index 33a24861a78..277941ba4a7 100644 --- a/libcilkrts/runtime/cilk_api.c +++ b/libcilkrts/runtime/cilk_api.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /* diff --git a/libcilkrts/runtime/cilk_fiber-unix.cpp b/libcilkrts/runtime/cilk_fiber-unix.cpp new file mode 100644 index 00000000000..afdce4e1e03 --- /dev/null +++ b/libcilkrts/runtime/cilk_fiber-unix.cpp @@ -0,0 +1,240 @@ +/* cilk_fiber-unix.cpp -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +#include "cilk_fiber-unix.h" +#include "cilk_malloc.h" +#include "bug.h" +#include "os.h" + +#include <cstdio> +#include <cstdlib> + +#include <alloca.h> +#include <errno.h> +#include <sys/mman.h> +#include <unistd.h> + +// MAP_ANON is deprecated on Linux, but seems to be required on Mac... +#ifndef MAP_ANONYMOUS +#define MAP_ANONYMOUS MAP_ANON +#endif + +// Magic number for sanity checking fiber structure +const unsigned magic_number = 0x5afef00d; + +int cilk_fiber_sysdep::s_page_size = getpagesize(); + +cilk_fiber_sysdep::cilk_fiber_sysdep(std::size_t stack_size) + : cilk_fiber(stack_size) + , m_magic(magic_number) +{ + // Set m_stack and m_stack_base. + make_stack(stack_size); + + // Get high-address of stack, with 32-bytes of spare space, and rounded + // down to the nearest 32-byte boundary. + const uintptr_t align_mask = 32 - 1; + m_stack_base -= ((std::size_t) m_stack_base) & align_mask; +} + +cilk_fiber_sysdep::cilk_fiber_sysdep(from_thread_t) + : cilk_fiber() + , m_magic(magic_number) +{ + this->set_allocated_from_thread(true); + + // Dummy stack data for thread-main fiber + m_stack = NULL; + m_stack_base = NULL; +} + +void cilk_fiber_sysdep::convert_fiber_back_to_thread() +{ + // Does nothing on Linux. +} + +cilk_fiber_sysdep::~cilk_fiber_sysdep() +{ + CILK_ASSERT(magic_number == m_magic); + if (!this->is_allocated_from_thread()) + free_stack(); +} + +#if SUPPORT_GET_CURRENT_FIBER +cilk_fiber_sysdep* cilk_fiber_sysdep::get_current_fiber_sysdep() +{ + return cilkos_get_tls_cilk_fiber(); +} +#endif + +// Jump to resume other fiber. We may or may not come back. +inline void cilk_fiber_sysdep::resume_other_sysdep(cilk_fiber_sysdep* other) +{ + if (other->is_resumable()) { + other->set_resumable(false); + // Resume by longjmp'ing to the place where we suspended. + CILK_LONGJMP(other->m_resume_jmpbuf); + } + else { + // Otherwise, we've never ran this fiber before. Start the + // proc method. + other->run(); + } +} + +void cilk_fiber_sysdep::suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other) +{ +#if SUPPORT_GET_CURRENT_FIBER + cilkos_set_tls_cilk_fiber(other); +#endif + CILK_ASSERT(this->is_resumable()); + + + // Jump to the other fiber. We expect to come back. + if (! CILK_SETJMP(m_resume_jmpbuf)) { + resume_other_sysdep(other); + } + + // Return here when another fiber resumes me. + // If the fiber that switched to me wants to be deallocated, do it now. + do_post_switch_actions(); +} + +NORETURN cilk_fiber_sysdep::jump_to_resume_other_sysdep(cilk_fiber_sysdep* other) +{ +#if SUPPORT_GET_CURRENT_FIBER + cilkos_set_tls_cilk_fiber(other); +#endif + CILK_ASSERT(!this->is_resumable()); + + // Jump to the other fiber. But we are never coming back because + // this fiber is being reset. + resume_other_sysdep(other); + + // We should never come back here... + __cilkrts_bug("Should not get here"); +} + + +NORETURN cilk_fiber_sysdep::run() +{ + // Only fibers created from a pool have a proc method to run and execute. + CILK_ASSERT(m_start_proc); + CILK_ASSERT(!this->is_allocated_from_thread()); + CILK_ASSERT(!this->is_resumable()); + + // TBD: This setjmp/longjmp pair simply changes the stack pointer. + // We could probably replace this code with some assembly. + if (! CILK_SETJMP(m_resume_jmpbuf)) + { + // Change stack pointer to fiber stack + JMPBUF_SP(m_resume_jmpbuf) = m_stack_base; + CILK_LONGJMP(m_resume_jmpbuf); + } + + // Verify that 1) 'this' is still valid and 2) '*this' has not been + // corrupted. + CILK_ASSERT(magic_number == m_magic); + + // If the fiber that switched to me wants to be deallocated, do it now. + do_post_switch_actions(); + + // Now call the user proc on the new stack + m_start_proc(this); + + // alloca() to force generation of frame pointer. The argument to alloca + // is contrived to prevent the compiler from optimizing it away. This + // code should never actually be executed. + int* dummy = (int*) alloca((sizeof(int) + (std::size_t) m_start_proc) & 0x1); + *dummy = 0xface; + + // User proc should never return. + __cilkrts_bug("Should not get here"); +} + +void cilk_fiber_sysdep::make_stack(size_t stack_size) +{ + char* p; + // We've already validated that the stack size is page-aligned and + // is a reasonable value. No need to do any extra rounding here. + size_t rounded_stack_size = stack_size; + + // Normally, we have already validated that the stack size is + // aligned to 4K. In the rare case that pages are huge though, we + // need to do some extra checks. + if (rounded_stack_size < 3 * (size_t)s_page_size) { + // If the specified stack size is too small, round up to 3 + // pages. We need at least 2 extra for the guard pages. + rounded_stack_size = 3 * (size_t)s_page_size; + } + else { + // Otherwise, the stack size is large enough, but might not be + // a multiple of page size. Round up to nearest multiple of + // s_page_size, just to be safe. + size_t remainder = rounded_stack_size % s_page_size; + if (remainder) { + rounded_stack_size += s_page_size - remainder; + } + } + + p = (char*)mmap(0, rounded_stack_size, + PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, + -1, 0); + if (MAP_FAILED == p) { + // For whatever reason (probably ran out of memory), mmap() failed. + // There is no stack to return, so the program loses parallelism. + m_stack = NULL; + m_stack_base = NULL; + return; + } + + // mprotect guard pages. + mprotect(p + rounded_stack_size - s_page_size, s_page_size, PROT_NONE); + mprotect(p, s_page_size, PROT_NONE); + + m_stack = p; + m_stack_base = p + rounded_stack_size - s_page_size; +} + + +void cilk_fiber_sysdep::free_stack() +{ + if (m_stack) { + size_t rounded_stack_size = m_stack_base - m_stack + s_page_size; + if (munmap(m_stack, rounded_stack_size) < 0) + __cilkrts_bug("Cilk: stack munmap failed error %d\n", errno); + } +} + +/* End cilk_fiber-unix.cpp */ diff --git a/libcilkrts/runtime/cilk_fiber-unix.h b/libcilkrts/runtime/cilk_fiber-unix.h new file mode 100644 index 00000000000..5665bd576bf --- /dev/null +++ b/libcilkrts/runtime/cilk_fiber-unix.h @@ -0,0 +1,144 @@ +/* cilk_fiber-unix.h -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +#ifndef INCLUDED_CILK_FIBER_UNIX_DOT_H +#define INCLUDED_CILK_FIBER_UNIX_DOT_H + +#ifndef __cplusplus +# error cilk_fiber-unix.h is a C++-only header +#endif + +#include "cilk_fiber.h" +#include "jmpbuf.h" + +/** + * @file cilk_fiber-unix.h + * + * @brief Unix-specific implementation for cilk_fiber. + */ + +/** + * @brief Unix-specific fiber class derived from portable fiber class + */ +struct cilk_fiber_sysdep : public cilk_fiber +{ + public: + +#if SUPPORT_GET_CURRENT_FIBER + /** + * @brief Gets the current fiber from TLS. + */ + static cilk_fiber_sysdep* get_current_fiber_sysdep(); +#endif + + /** + * @brief Construct the system-dependent portion of a fiber. + * + * @param stack_size The size of the stack for this fiber. + */ + cilk_fiber_sysdep(std::size_t stack_size); + + /** + * @brief Construct the system-dependent of a fiber created from a + * thread. + */ + cilk_fiber_sysdep(from_thread_t); + + /** + * @brief Destructor + */ + ~cilk_fiber_sysdep(); + + /** + * @brief OS-specific calls to convert this fiber back to thread. + * + * Nothing to do for Linux. + */ + void convert_fiber_back_to_thread(); + + /** + * @brief System-dependent function to suspend self and resume execution of "other". + * + * This fiber is suspended. + * + * @pre @c is_resumable() should be true. + * + * @param other Fiber to resume. + */ + void suspend_self_and_resume_other_sysdep(cilk_fiber_sysdep* other); + + /** + * @brief System-dependent function called to jump to @p other + * fiber. + * + * @pre @c is_resumable() should be false. + * + * @param other Fiber to resume. + */ + NORETURN jump_to_resume_other_sysdep(cilk_fiber_sysdep* other); + + /** + * @brief Runs the start_proc. + * @pre is_resumable() should be false. + * @pre is_allocated_from_thread() should be false. + * @pre m_start_proc must be valid. + */ + NORETURN run(); + + /** + * @brief Returns the base of this fiber's stack. + */ + inline char* get_stack_base_sysdep() { return m_stack_base; } + + private: + char* m_stack_base; ///< The base of this fiber's stack. + char* m_stack; // Stack memory (low address) + __CILK_JUMP_BUFFER m_resume_jmpbuf; // Place to resume fiber + unsigned m_magic; // Magic number for checking + + static int s_page_size; // Page size for + // stacks. + + // Allocate memory for a stack. This method + // initializes m_stack and m_stack_base. + void make_stack(size_t stack_size); + + // Deallocates memory for the stack. + void free_stack(); + + // Common helper method for implementation of resume_other_sysdep + // variants. + inline void resume_other_sysdep(cilk_fiber_sysdep* other); +}; + +#endif // ! defined(INCLUDED_CILK_FIBER_UNIX_DOT_H) diff --git a/libcilkrts/runtime/cilk_fiber.cpp b/libcilkrts/runtime/cilk_fiber.cpp new file mode 100644 index 00000000000..aee09875755 --- /dev/null +++ b/libcilkrts/runtime/cilk_fiber.cpp @@ -0,0 +1,1073 @@ +/* cilk_fiber.cpp -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +/* Implementations of non-platform-specific aspects of cilk_fiber, especially + * the cilk_fiber_pool interface. + */ +#include "cilk_fiber.h" +#ifdef _WIN32 +# include "cilk_fiber-win.h" +#else +# include "cilk_fiber-unix.h" +#endif +#include "cilk_malloc.h" +#include "bug.h" +#include <new> + +#include <climits> +#include <cstdio> +#include <cstdlib> +#include <cstring> + +#include "sysdep.h" + + +extern "C" { + +inline int cilk_fiber_pool_sanity_check(cilk_fiber_pool *pool, const char* desc) +{ + int errors = 0; +#if FIBER_DEBUG >= 1 + if ((NULL != pool) && pool->total > 0) { + + // Root pool should not allocate more fibers than alloc_max + errors += ((pool->parent == NULL) && + (pool->total > pool->alloc_max)); + errors += (pool->total > pool->high_water); + + if (errors) { + fprintf(stderr, "ERROR at %s: pool=%p has max_size=%u, total=%d, high_water=%d\n", + desc, + pool, pool->max_size, pool->total, pool->high_water); + } + } +#endif + return (errors == 0); +} + +inline void increment_pool_total(cilk_fiber_pool* pool) +{ + ++pool->total; + if (pool->high_water < pool->total) + pool->high_water = pool->total; +} + +inline void decrement_pool_total(cilk_fiber_pool* pool, int fibers_freed) +{ + pool->total -= fibers_freed; +} + + +/** + * @brief Free fibers from this pool until we have at most @c + * num_to_keep fibers remaining, and then put a fiber back. + * + * @pre We do not hold @c pool->lock + * @post After completion, we do not hold @c pool->lock + */ +static void cilk_fiber_pool_free_fibers_from_pool(cilk_fiber_pool* pool, + unsigned num_to_keep, + cilk_fiber* fiber_to_return) +{ + // Free our own fibers, until we fall below our desired threshold. + // Each iteration of this loop proceeds in the following stages: + // 1. Acquire the pool lock, + // 2. Grabs up to B fibers from the pool, stores them into a buffer. + // 3. Check if pool is empty enough. If yes, put the last fiber back, + // and remember that we should quit. + // 4. Release the pool lock, and actually free any buffered fibers. + // 5. Check if we are done and should exit the loop. Otherwise, try again. + // + const bool need_lock = pool->lock; + bool last_fiber_returned = false; + + do { + const int B = 10; // Pull at most this many fibers from the + // parent for one lock acquisition. Make + // this value large enough to amortize + // against the cost of acquiring and + // releasing the lock. + int num_to_free = 0; + cilk_fiber* fibers_to_free[B]; + + // Stage 1: Grab the lock. + if (need_lock) { + spin_mutex_lock(pool->lock); + } + + // Stage 2: Grab up to B fibers to free. + int fibers_freed = 0; + while ((pool->size > num_to_keep) && (num_to_free < B)) { + fibers_to_free[num_to_free++] = pool->fibers[--pool->size]; + fibers_freed++; + } + decrement_pool_total(pool, fibers_freed); + + // Stage 3. Pool is below threshold. Put extra fiber back. + if (pool->size <= num_to_keep) { + // Put the last fiber back into the pool. + if (fiber_to_return) { + CILK_ASSERT(pool->size < pool->max_size); + pool->fibers[pool->size] = fiber_to_return; + pool->size++; + } + last_fiber_returned = true; + } + + // Stage 4: Release the lock, and actually free any fibers + // buffered. + if (need_lock) { + spin_mutex_unlock(pool->lock); + } + + for (int i = 0; i < num_to_free; ++i) { + fibers_to_free[i]->deallocate_to_heap(); + } + + } while (!last_fiber_returned); +} + + +/****************************************************************** + * TBD: We want to simplify / rework the logic for allocating and + * deallocating fibers, so that they are hopefully simpler and work + * more elegantly for more than two levels. + ******************************************************************/ + +/** + * @brief Transfer fibers from @c pool to @c pool->parent. + * + * @pre Must hold @c pool->lock if it exists. + * @post After completion, some number of fibers + * have been moved from this pool to the parent. + * The lock @c pool->lock is still held. + * + * TBD: Do we wish to guarantee that the lock has never been + * released? It may depend on the implementation... + */ +static void cilk_fiber_pool_move_fibers_to_parent_pool(cilk_fiber_pool* pool, + unsigned num_to_keep) +{ + // ASSERT: We should hold the lock on pool (if it has one). + CILK_ASSERT(pool->parent); + cilk_fiber_pool* parent_pool = pool->parent; + + // Move fibers from our pool to the parent until we either run out + // of space in the parent, or hit our threshold. + // + // This operation must be done while holding the parent lock. + + // If the parent pool appears to be full, just return early. + if (parent_pool->size >= parent_pool->max_size) + return; + + spin_mutex_lock(pool->parent->lock); + while ((parent_pool->size < parent_pool->max_size) && + (pool->size > num_to_keep)) { + parent_pool->fibers[parent_pool->size++] = + pool->fibers[--pool->size]; + } + + // If the child pool has deallocated more than fibers to the heap + // than it has allocated, then transfer this "surplus" to the + // parent, so that the parent is free to allocate more from the + // heap. + // + // This transfer means that the total in the parent can + // temporarily go negative. + if (pool->total < 0) { + // Reduce parent total by the surplus we have in the local + // pool. + parent_pool->total += pool->total; + pool->total = 0; + } + + spin_mutex_unlock(pool->parent->lock); +} + +void cilk_fiber_pool_init(cilk_fiber_pool* pool, + cilk_fiber_pool* parent, + size_t stack_size, + unsigned buffer_size, + int alloc_max, + int is_shared) +{ +#if FIBER_DEBUG >= 1 + fprintf(stderr, "fiber_pool_init, pool=%p, parent=%p, alloc_max=%u\n", + pool, parent, alloc_max); +#endif + + pool->lock = (is_shared ? spin_mutex_create() : NULL); + pool->parent = parent; + pool->stack_size = stack_size; + pool->max_size = buffer_size; + pool->size = 0; + pool->total = 0; + pool->high_water = 0; + pool->alloc_max = alloc_max; + pool->fibers = + (cilk_fiber**) __cilkrts_malloc(buffer_size * sizeof(cilk_fiber*)); + CILK_ASSERT(NULL != pool->fibers); + +#ifdef __MIC__ +#define PREALLOCATE_FIBERS +#endif + +#ifdef PREALLOCATE_FIBERS + // Pre-allocate 1/4 of fibers in the pools ahead of time. This + // value is somewhat arbitrary. It was chosen to be less than the + // threshold (of about 3/4) of fibers to keep in the pool when + // transferring fibers to the parent. + + int pre_allocate_count = buffer_size/4; + for (pool->size = 0; pool->size < pre_allocate_count; pool->size++) { + pool->fibers[pool->size] = cilk_fiber::allocate_from_heap(pool->stack_size); + } +#endif +} + + +void cilk_fiber_pool_set_fiber_limit(cilk_fiber_pool* root_pool, + unsigned max_fibers_to_allocate) +{ + // Should only set limit on root pool, not children. + CILK_ASSERT(NULL == root_pool->parent); + root_pool->alloc_max = max_fibers_to_allocate; +} + +void cilk_fiber_pool_destroy(cilk_fiber_pool* pool) +{ + CILK_ASSERT(cilk_fiber_pool_sanity_check(pool, "pool_destroy")); + + // Lock my own pool, if I need to. + if (pool->lock) { + spin_mutex_lock(pool->lock); + } + + // Give any remaining fibers to parent pool. + if (pool->parent) { + cilk_fiber_pool_move_fibers_to_parent_pool(pool, 0); + } + + // Unlock pool. + if (pool->lock) { + spin_mutex_unlock(pool->lock); + } + + // If I have any left in my pool, just free them myself. + // This method may acquire the pool lock. + cilk_fiber_pool_free_fibers_from_pool(pool, 0, NULL); + + // Destroy the lock if there is one. + if (pool->lock) { + spin_mutex_destroy(pool->lock); + } + __cilkrts_free(pool->fibers); +} + + +cilk_fiber* cilk_fiber_allocate(cilk_fiber_pool* pool) +{ + CILK_ASSERT(cilk_fiber_pool_sanity_check(pool, "allocate")); + return cilk_fiber::allocate(pool); +} + +cilk_fiber* cilk_fiber_allocate_from_heap(size_t stack_size) +{ + return cilk_fiber::allocate_from_heap(stack_size); +} + +void cilk_fiber_reset_state(cilk_fiber* fiber, cilk_fiber_proc start_proc) +{ + fiber->reset_state(start_proc); +} + +int cilk_fiber_remove_reference(cilk_fiber *fiber, cilk_fiber_pool *pool) +{ + return fiber->remove_reference(pool); +} + +cilk_fiber* cilk_fiber_allocate_from_thread() +{ + return cilk_fiber::allocate_from_thread(); +} + +int cilk_fiber_deallocate_from_thread(cilk_fiber *fiber) +{ + return fiber->deallocate_from_thread(); +} + +int cilk_fiber_remove_reference_from_thread(cilk_fiber *fiber) +{ + return fiber->remove_reference_from_thread(); +} + +int cilk_fiber_is_allocated_from_thread(cilk_fiber *fiber) +{ + return fiber->is_allocated_from_thread(); +} + +#if SUPPORT_GET_CURRENT_FIBER +cilk_fiber* cilk_fiber_get_current_fiber(void) +{ + return cilk_fiber::get_current_fiber(); +} +#endif + +void cilk_fiber_suspend_self_and_resume_other(cilk_fiber* self, + cilk_fiber* other) +{ + self->suspend_self_and_resume_other(other); +} + + +void cilk_fiber::reset_state(cilk_fiber_proc start_proc) +{ + // Setup the fiber and return. + this->m_start_proc = start_proc; + + CILK_ASSERT(!this->is_resumable()); + CILK_ASSERT(NULL == this->m_pending_remove_ref); + CILK_ASSERT(NULL == this->m_pending_pool); +} + +NORETURN +cilk_fiber_remove_reference_from_self_and_resume_other(cilk_fiber* self, + cilk_fiber_pool* self_pool, + cilk_fiber* other) +{ +#if FIBER_DEBUG >= 3 + __cilkrts_worker* w = __cilkrts_get_tls_worker(); + fprintf(stderr, "W=%d: cilk_fiber_deactivate_self_and_resume_other: self=%p, other=%p\n", + w->self, + self, other); +#endif + CILK_ASSERT(cilk_fiber_pool_sanity_check(self_pool, "remove_reference_from_self_resume_other")); + self->remove_reference_from_self_and_resume_other(self_pool, other); + + // We should never return here. +} + +void cilk_fiber_set_post_switch_proc(cilk_fiber *self, + cilk_fiber_proc post_switch_proc) +{ + self->set_post_switch_proc(post_switch_proc); +} + +void cilk_fiber_invoke_tbb_stack_op(cilk_fiber* fiber, + __cilk_tbb_stack_op op) +{ + fiber->invoke_tbb_stack_op(op); +} + +cilk_fiber_data* cilk_fiber_get_data(cilk_fiber* fiber) +{ + return fiber->get_data(); + + /// TBD: Change this code to "return (cilk_fiber_data*)fiber;" + // plus a static assert, so that this function is + // more easily inlined by the compiler. +} + +int cilk_fiber_is_resumable(cilk_fiber *fiber) +{ + return fiber->is_resumable(); +} + +char* cilk_fiber_get_stack_base(cilk_fiber *fiber) +{ + return fiber->get_stack_base(); +} + + +#if defined(_WIN32) && 0 // Only works on Windows. Disable debugging for now. +#define DBG_STACK_OPS(_fmt, ...) __cilkrts_dbgprintf(_fmt, __VA_ARGS__) +#else +#define DBG_STACK_OPS(_fmt, ...) +#endif + +void cilk_fiber_set_stack_op(cilk_fiber *fiber, + __cilk_tbb_stack_op_thunk o) +{ + cilk_fiber_data *fdata = cilk_fiber_get_data(fiber); + DBG_STACK_OPS ("cilk_fiber_set_stack_op - cilk_fiber %p, routine: %p, data: %p\n", + fiber, + o.routine, + o.data); + fdata->stack_op_routine = o.routine; + fdata->stack_op_data = o.data; +} + +#if 0 // Debugging function +static +const char *NameStackOp (enum __cilk_tbb_stack_op op) +{ + switch(op) + { + case CILK_TBB_STACK_ORPHAN: return "CILK_TBB_STACK_ORPHAN"; + case CILK_TBB_STACK_ADOPT: return "CILK_TBB_STACK_ADOPT"; + case CILK_TBB_STACK_RELEASE: return "CILK_TBB_STACK_RELEASE"; + default: return "Unknown"; + } +} +#endif + +/* + * Save TBB interop information for an unbound thread. It will get picked + * up when the thread is bound to the runtime. + */ +void cilk_fiber_tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o) +{ + __cilk_tbb_stack_op_thunk *saved_thunk = + __cilkrts_get_tls_tbb_interop(); + + DBG_STACK_OPS("Calling save_stack_op; o.routine=%p, o.data=%p, saved_thunk=%p\n", + o.routine, o.data, saved_thunk); + + // If there is not already space allocated, allocate some. + if (NULL == saved_thunk) { + saved_thunk = (__cilk_tbb_stack_op_thunk*) + __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk)); + __cilkrts_set_tls_tbb_interop(saved_thunk); + } + + *saved_thunk = o; + + DBG_STACK_OPS ("Unbound Thread %04x: tbb_interop_save_stack_op_info - saved info\n", + cilkos_get_current_thread_id()); +} + +/* + * Save TBB interop information from the cilk_fiber. It will get picked + * up when the thread is bound to the runtime next time. + */ +void cilk_fiber_tbb_interop_save_info_from_stack(cilk_fiber *fiber) +{ + __cilk_tbb_stack_op_thunk *saved_thunk; + cilk_fiber_data* fdata; + + if (NULL == fiber) + return; + + fdata = cilk_fiber_get_data(fiber); + // If there is no TBB interop data, just return + if (NULL == fdata->stack_op_routine) + return; + + saved_thunk = __cilkrts_get_tls_tbb_interop(); + + // If there is not already space allocated, allocate some. + if (NULL == saved_thunk) { + saved_thunk = (__cilk_tbb_stack_op_thunk*) + __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk)); + __cilkrts_set_tls_tbb_interop(saved_thunk); + } + + saved_thunk->routine = fdata->stack_op_routine; + saved_thunk->data = fdata->stack_op_data; +} + +/* + * If there's TBB interop information that was saved before the thread was + * bound, apply it now + */ +void cilk_fiber_tbb_interop_use_saved_stack_op_info(cilk_fiber* fiber) +{ + __cilk_tbb_stack_op_thunk *saved_thunk = + __cilkrts_get_tls_tbb_interop(); + + CILK_ASSERT(fiber); + // If we haven't allocated a TBB interop index, we don't have any saved info + if (NULL == saved_thunk) { + DBG_STACK_OPS ("cilk_fiber %p: tbb_interop_use_saved_stack_op_info - no saved info\n", + fiber); + return; + } + + DBG_STACK_OPS ("cilk_fiber %p: tbb_interop_use_saved_stack_op_info - using saved info\n", + fiber); + + // Associate the saved info with the __cilkrts_stack + cilk_fiber_set_stack_op(fiber, *saved_thunk); + + // Free the saved data. We'll save it again if needed when the code + // returns from the initial function + cilk_fiber_tbb_interop_free_stack_op_info(); +} + +/* + * Free saved TBB interop memory. Should only be called when the thread is + * not bound. + */ +void cilk_fiber_tbb_interop_free_stack_op_info(void) +{ + __cilk_tbb_stack_op_thunk *saved_thunk = + __cilkrts_get_tls_tbb_interop(); + + // If we haven't allocated a TBB interop index, we don't have any saved info + if (NULL == saved_thunk) + return; + + DBG_STACK_OPS ("tbb_interop_free_stack_op_info - freeing saved info\n"); + + // Free the memory and wipe out the TLS value + __cilkrts_free(saved_thunk); + __cilkrts_set_tls_tbb_interop(NULL); +} + + + +#if NEED_FIBER_REF_COUNTS +int cilk_fiber_has_references(cilk_fiber *fiber) +{ + return (fiber->get_ref_count() > 0); +} + +int cilk_fiber_get_ref_count(cilk_fiber *fiber) +{ + return fiber->get_ref_count(); +} + +void cilk_fiber_add_reference(cilk_fiber *fiber) +{ + fiber->inc_ref_count(); +} +#endif // NEED_FIBER_REF_COUNTS + + +} // End extern "C" + + +cilk_fiber_sysdep* cilk_fiber::sysdep() +{ + return static_cast<cilk_fiber_sysdep*>(this); +} + + +cilk_fiber::cilk_fiber() + : m_start_proc(NULL) + , m_post_switch_proc(NULL) + , m_pending_remove_ref(NULL) + , m_pending_pool(NULL) + , m_flags(0) +{ + // Clear cilk_fiber_data base-class data members + std::memset((cilk_fiber_data*) this, 0, sizeof(cilk_fiber_data)); + + // cilk_fiber data members + init_ref_count(0); +} + +cilk_fiber::cilk_fiber(std::size_t stack_size) +{ + *this = cilk_fiber(); // A delegating constructor would be nice here + this->stack_size = stack_size; +} + +cilk_fiber::~cilk_fiber() +{ + // Empty destructor. +} + + +char* cilk_fiber::get_stack_base() +{ + return this->sysdep()->get_stack_base_sysdep(); +} + +cilk_fiber* cilk_fiber::allocate_from_heap(std::size_t stack_size) +{ + // Case 1: pool is NULL. create a new fiber from the heap + // No need for locks here. + cilk_fiber_sysdep* ret = + (cilk_fiber_sysdep*) __cilkrts_malloc(sizeof(cilk_fiber_sysdep)); + + // Error condition. If we failed to allocate a fiber from the + // heap, we are in trouble though... + if (!ret) + return NULL; + + ::new(ret) cilk_fiber_sysdep(stack_size); + + CILK_ASSERT(0 == ret->m_flags); + CILK_ASSERT(NULL == ret->m_pending_remove_ref); + CILK_ASSERT(NULL == ret->m_pending_pool); + ret->init_ref_count(1); + return ret; +} + + +#if USE_FIBER_TRY_ALLOCATE_FROM_POOL +/** + * Helper method: try to allocate a fiber from this pool or its + * ancestors without going to the OS / heap. + * + * Returns allocated pool, or NULL if no pool is found. + * + * If pool contains a suitable fiber. Return it. Otherwise, try to + * recursively grab a fiber from the parent pool, if there is one. + * + * This method will not allocate a fiber from the heap. + * + * This method could be written either recursively or iteratively. + * It probably does not matter which one we do. + * + * @note This method is compiled, but may not be used unless the + * USE_FIBER_TRY_ALLOCATE_FROM_POOL switch is set. + */ +cilk_fiber* cilk_fiber::try_allocate_from_pool_recursive(cilk_fiber_pool* pool) +{ + cilk_fiber* ret = NULL; + + if (pool->size > 0) { + // Try to get the lock. + if (pool->lock) { + // For some reason, it seems to be better to just block on the parent + // pool lock, instead of using a try-lock? +#define USE_TRY_LOCK_IN_FAST_ALLOCATE 0 +#if USE_TRY_LOCK_IN_FAST_ALLOCATE + int got_lock = spin_mutex_trylock(pool->lock); + if (!got_lock) { + // If we fail, skip to the parent. + if (pool->parent) { + return try_allocate_from_pool_recursive(pool->parent); + } + } +#else + spin_mutex_lock(pool->lock); +#endif + } + + // Check in the pool if we have the lock. + if (pool->size > 0) { + ret = pool->fibers[--pool->size]; + } + + // Release the lock once we are done updating pool fields. + if (pool->lock) { + spin_mutex_unlock(pool->lock); + } + } + + if ((!ret) && (pool->parent)) { + return try_allocate_from_pool_recursive(pool->parent); + } + + if (ret) { + // When we pull a fiber out of the pool, set its reference + // count before we return it. + ret->init_ref_count(1); + } + return ret; +} +#endif // USE_FIBER_TRY_ALLOCATE_FROM_POOL + + +cilk_fiber* cilk_fiber::allocate(cilk_fiber_pool* pool) +{ + // Pool should not be NULL in this method. But I'm not going to + // actually assert it, because we are likely to seg fault anyway + // if it is. + // CILK_ASSERT(NULL != pool); + + cilk_fiber *ret = NULL; + +#if USE_FIBER_TRY_ALLOCATE_FROM_POOL + // "Fast" path, which doesn't go to the heap or OS until checking + // the ancestors first. + ret = try_allocate_from_pool_recursive(pool); + if (ret) + return ret; +#endif + + // If we don't get anything from the "fast path", then go through + // a slower path to look for a fiber. + // + // 1. Lock the pool if it is shared. + // 2. Look in our local pool. If we find one, release the lock + // and quit searching. + // 3. Otherwise, check whether we can allocate from heap. + // 4. Release the lock if it was acquired. + // 5. Try to allocate from the heap, if step 3 said we could. + // If we find a fiber, then quit searching. + // 6. If none of these steps work, just recursively try again + // from the parent. + + // 1. Lock the pool if it is shared. + if (pool->lock) { + spin_mutex_lock(pool->lock); + } + + // 2. Look in local pool. + if (pool->size > 0) { + ret = pool->fibers[--pool->size]; + if (ret) { + // If we found one, release the lock once we are + // done updating pool fields, and break out of the + // loop. + if (pool->lock) { + spin_mutex_unlock(pool->lock); + } + + // When we pull a fiber out of the pool, set its reference + // count just in case. + ret->init_ref_count(1); + return ret; + } + } + + // 3. Check whether we can allocate from the heap. + bool can_allocate_from_heap = false; + if (pool->total < pool->alloc_max) { + // Track that we are allocating a new fiber from the + // heap, originating from this pool. + // This increment may be undone if we happen to fail to + // allocate from the heap. + increment_pool_total(pool); + can_allocate_from_heap = true; + } + + // 4. Unlock the pool, and then allocate from the heap. + if (pool->lock) { + spin_mutex_unlock(pool->lock); + } + + // 5. Actually try to allocate from the heap / OS. + if (can_allocate_from_heap) { + ret = allocate_from_heap(pool->stack_size); + // If we got something from the heap, just return it. + if (ret) { + return ret; + } + + // Otherwise, we failed in our attempt to allocate a + // fiber from the heap. Grab the lock and decrement + // the total again. + if (pool->lock) { + spin_mutex_lock(pool->lock); + } + decrement_pool_total(pool, 1); + if (pool->lock) { + spin_mutex_unlock(pool->lock); + } + } + + // 6. If we get here, then searching this pool failed. Go search + // the parent instead if we have one. + if (pool->parent) { + return allocate(pool->parent); + } + + return ret; +} + +int cilk_fiber::remove_reference(cilk_fiber_pool* pool) +{ + int ref_count = this->dec_ref_count(); + if (ref_count == 0) { + if (pool) { + deallocate_self(pool); + } + else { + deallocate_to_heap(); + } + } + return ref_count; +} + +cilk_fiber* cilk_fiber::allocate_from_thread() +{ + void* retmem = __cilkrts_malloc(sizeof(cilk_fiber_sysdep)); + CILK_ASSERT(retmem); + cilk_fiber_sysdep* ret = ::new(retmem) cilk_fiber_sysdep(from_thread); + + // A fiber allocated from a thread begins with a reference count + // of 2. The first is for being created, and the second is for + // being running. + // + // Suspending this fiber will decrement the count down to 1. + ret->init_ref_count(2); + +#if SUPPORT_GET_CURRENT_FIBER + // We're creating the main fiber for this thread. Set this fiber as the + // current fiber. + cilkos_set_tls_cilk_fiber(ret); +#endif + return ret; +} + +int cilk_fiber::deallocate_from_thread() +{ + CILK_ASSERT(this->is_allocated_from_thread()); +#if SUPPORT_GET_CURRENT_FIBER + CILK_ASSERT(this == cilkos_get_tls_cilk_fiber()); + // Reverse of "allocate_from_thread". + cilkos_set_tls_cilk_fiber(NULL); +#endif + + this->assert_ref_count_at_least(2); + + // Suspending the fiber should conceptually decrement the ref + // count by 1. + cilk_fiber_sysdep* self = this->sysdep(); + self->convert_fiber_back_to_thread(); + + // Then, freeing the fiber itself decrements the ref count again. + int ref_count = this->sub_from_ref_count(2); + if (ref_count == 0) { + self->~cilk_fiber_sysdep(); + __cilkrts_free(self); + } + return ref_count; +} + +int cilk_fiber::remove_reference_from_thread() +{ + int ref_count = dec_ref_count(); + if (ref_count == 0) { + cilk_fiber_sysdep* self = this->sysdep(); + self->~cilk_fiber_sysdep(); + __cilkrts_free(self); + } + return ref_count; +} + + +#if SUPPORT_GET_CURRENT_FIBER +cilk_fiber* cilk_fiber::get_current_fiber() +{ + return cilk_fiber_sysdep::get_current_fiber_sysdep(); +} +#endif + +void cilk_fiber::do_post_switch_actions() +{ + if (m_post_switch_proc) + { + cilk_fiber_proc proc = m_post_switch_proc; + m_post_switch_proc = NULL; + proc(this); + } + + if (m_pending_remove_ref) + { + m_pending_remove_ref->remove_reference(m_pending_pool); + + // Even if we don't free it, + m_pending_remove_ref = NULL; + m_pending_pool = NULL; + } +} + +void cilk_fiber::suspend_self_and_resume_other(cilk_fiber* other) +{ +#if FIBER_DEBUG >=1 + fprintf(stderr, "suspend_self_and_resume_other: self =%p, other=%p [owner=%p, resume_sf=%p]\n", + this, other, other->owner, other->resume_sf); +#endif + + // Decrement my reference count (to suspend) + // Increment other's count (to resume) + // Suspended fiber should have a reference count of at least 1. (It is not in a pool). + this->dec_ref_count(); + other->inc_ref_count(); + this->assert_ref_count_at_least(1); + + // Pass along my owner. + other->owner = this->owner; + this->owner = NULL; + + // Change this fiber to resumable. + CILK_ASSERT(!this->is_resumable()); + this->set_resumable(true); + + // Normally, I'd assert other->is_resumable(). But this flag may + // be false the first time we try to "resume" a fiber. + cilk_fiber_sysdep* self = this->sysdep(); + self->suspend_self_and_resume_other_sysdep(other->sysdep()); + + // HAVE RESUMED EXECUTION + // When we come back here, we should have at least two references: + // one for the fiber being allocated / out of a pool, and one for it being active. + this->assert_ref_count_at_least(2); +} + +NORETURN +cilk_fiber::remove_reference_from_self_and_resume_other(cilk_fiber_pool* self_pool, + cilk_fiber* other) +{ + // Decrement my reference count once (to suspend) + // Increment other's count (to resume) + // Suspended fiber should have a reference count of at least 1. (It is not in a pool). + this->dec_ref_count(); + other->inc_ref_count(); + + // Set a pending remove reference for this fiber, once we have + // actually switched off. + other->m_pending_remove_ref = this; + other->m_pending_pool = self_pool; + + // Pass along my owner. + other->owner = this->owner; + this->owner = NULL; + + // Since we are deallocating self, this fiber does not become + // resumable. + CILK_ASSERT(!this->is_resumable()); + + cilk_fiber_sysdep* self = this->sysdep(); + self->jump_to_resume_other_sysdep(other->sysdep()); + + __cilkrts_bug("Deallocating fiber. We should never come back here."); + std::abort(); +} + + +void cilk_fiber::deallocate_to_heap() +{ + cilk_fiber_sysdep* self = this->sysdep(); + self->~cilk_fiber_sysdep(); + __cilkrts_free(self); +} + +void cilk_fiber::deallocate_self(cilk_fiber_pool* pool) +{ + this->set_resumable(false); + + CILK_ASSERT(NULL != pool); + CILK_ASSERT(!this->is_allocated_from_thread()); + this->assert_ref_count_equals(0); + + // Cases: + // + // 1. pool has space: Add to this pool. + // 2. pool is full: Give some fibers to parent, and then free + // enough to make space for the fiber we are deallocating. + // Then put the fiber back into the pool. + + const bool need_lock = pool->lock; + // Grab the lock for the remaining cases. + if (need_lock) { + spin_mutex_lock(pool->lock); + } + + // Case 1: this pool has space. Return the fiber. + if (pool->size < pool->max_size) + { + // Add this fiber to pool + pool->fibers[pool->size++] = this; + if (need_lock) { + spin_mutex_unlock(pool->lock); + } + return; + } + + // Case 2: Pool is full. + // + // First free up some space by giving fibers to the parent. + if (pool->parent) + { + // Pool is full. Move all but "num_to_keep" fibers to parent, + // if we can. + unsigned num_to_keep = pool->max_size/2 + pool->max_size/4; + cilk_fiber_pool_move_fibers_to_parent_pool(pool, num_to_keep); + } + + if (need_lock) { + spin_mutex_unlock(pool->lock); + } + + // Now, free a fiber to make room for the one we need to put back, + // and then put this fiber back. This step may actually return + // fibers to the heap. + cilk_fiber_pool_free_fibers_from_pool(pool, pool->max_size -1, this); +} + + +// NOTE: Except for print-debug, this code is the same as in Windows. +void cilk_fiber::invoke_tbb_stack_op(__cilk_tbb_stack_op op) +{ + cilk_fiber_data *fdata = this->get_data(); + + if (0 == fdata->stack_op_routine) + { + if (CILK_TBB_STACK_RELEASE != op) + DBG_STACK_OPS ("Wkr %p: invoke_tbb_stack_op - %s (%d) for cilk_fiber %p, fiber %p, thread id %04x - No stack op routine\n", + fdata->owner, + NameStackOp(op), + op, + fdata, + this, + cilkos_get_current_thread_id()); + return; + } + + // Call TBB to do it's thing + DBG_STACK_OPS ("Wkr %p: invoke_tbb_stack_op - op %s data %p for cilk_fiber %p, fiber %p, thread id %04x\n", + fdata->owner, + NameStackOp(op), + fdata->stack_op_data, + fdata, + this, + cilkos_get_current_thread_id()); + + (*fdata->stack_op_routine)(op, fdata->stack_op_data); + if (op == CILK_TBB_STACK_RELEASE) + { + fdata->stack_op_routine = 0; + fdata->stack_op_data = 0; + } +} + + + +#if NEED_FIBER_REF_COUNTS + +void cilk_fiber::atomic_inc_ref_count() +{ + cilkos_atomic_add(&m_outstanding_references, 1); +} + +long cilk_fiber::atomic_dec_ref_count() +{ + return cilkos_atomic_add(&m_outstanding_references, -1); +} + +long cilk_fiber::atomic_sub_from_ref_count(long v) +{ + return cilkos_atomic_add(&m_outstanding_references, -v); +} + +#endif // NEED_FIBER_REF_COUNTS + +/* End cilk_fibers.cpp */ diff --git a/libcilkrts/runtime/cilk_fiber.h b/libcilkrts/runtime/cilk_fiber.h new file mode 100644 index 00000000000..e1d5f5b32a3 --- /dev/null +++ b/libcilkrts/runtime/cilk_fiber.h @@ -0,0 +1,877 @@ +/* cilk_fiber.h -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +/** + * @file cilk_fiber.h + * + * @brief Abstraction of a "fiber": A coprocess-like stack and auxiliary data + */ + +#ifndef INCLUDED_CILK_FIBER_DOT_H +#define INCLUDED_CILK_FIBER_DOT_H + +#include <cilk/common.h> +#ifdef __cplusplus +# include <cstddef> +#else +# include <stddef.h> +#endif + +#include "bug.h" +#include "cilk-tbb-interop.h" +#include "spin_mutex.h" +#include "internal/abi.h" // Define __cilkrts_stack_frame + +/** + * @brief Debugging level for Cilk fiber code. + * + * A value of 0 means no debugging. + * Higher values generate more debugging output. + */ +#define FIBER_DEBUG 0 + +/** + * @brief Flag for validating reference counts. + * + * Set to 1 to assert that fiber reference counts are reasonable. + */ +#define FIBER_CHECK_REF_COUNTS 1 + +/** + * @brief Flag to determine whether fibers support reference counting. + * We require reference counting only on Windows, for exception + * processing. Unix does not need reference counting. + */ +#if defined(_WIN32) +# define NEED_FIBER_REF_COUNTS 1 +#endif + +/** + * @brief Flag to enable support for the + * cilk_fiber_get_current_fiber() method. + * + * I'd like this flag to be 0. However, the cilk_fiber test depends + * on being able to call this method. + */ +#if !defined(SUPPORT_GET_CURRENT_FIBER) +# define SUPPORT_GET_CURRENT_FIBER 0 +#endif + +/** + * @brief Switch for enabling "fast path" check for fibers, which + * doesn't go to the heap or OS until checking the ancestors first. + * + * Doing this check seems to make the stress test in + * cilk_fiber_pool.t.cpp run faster. But it doesn't seem to make much + * difference in other benchmarks, so it is disabled by default. + */ +#define USE_FIBER_TRY_ALLOCATE_FROM_POOL 0 + + +__CILKRTS_BEGIN_EXTERN_C + +/// @brief Forward reference to fiber pool. +typedef struct cilk_fiber_pool cilk_fiber_pool; + +/** @brief Opaque data structure representing a fiber */ +typedef struct cilk_fiber cilk_fiber; + +/** @brief Function pointer type for use as a fiber's "main" procedure */ +typedef void (*cilk_fiber_proc)(cilk_fiber*); + +/** @brief Data structure associated with each fiber. */ +typedef struct cilk_fiber_data +{ + __STDNS size_t stack_size; /**< Size of stack for fiber */ + __cilkrts_worker* owner; /**< Worker using this fiber */ + __cilkrts_stack_frame* resume_sf; /**< Stack frame to resume */ + __cilk_tbb_pfn_stack_op stack_op_routine; /**< Cilk/TBB interop callback */ + void* stack_op_data; /**< Data for Cilk/TBB callback */ + void* client_data; /**< Data managed by client */ + +#ifdef _WIN32 + char *initial_sp; /**< Initalized in fiber_stub */ +# ifdef _WIN64 + char *steal_frame_sp; /**< RSP for frame stealing work */ + // Needed for exception handling so we can + // identify when about to unwind off stack +# endif +#endif + +} cilk_fiber_data; + +/** @brief Pool of cilk_fiber for fiber reuse + * + * Pools form a hierarchy, with each pool pointing to its parent. When the + * pool undeflows, it gets a fiber from its parent. When a pool overflows, + * it returns some fibers to its parent. If the root pool underflows, it + * allocates and initializes a new fiber from the heap but only if the total + * is less than max_size; otherwise, fiber creation fails. + */ +struct cilk_fiber_pool +{ + spin_mutex* lock; ///< Mutual exclusion for pool operations + __STDNS size_t stack_size; ///< Size of stacks for fibers in this pool. + cilk_fiber_pool* parent; ///< @brief Parent pool. + ///< If this pool is empty, get from parent + + // Describes inactive fibers stored in the pool. + cilk_fiber** fibers; ///< Array of max_size fiber pointers + unsigned max_size; ///< Limit on number of fibers in pool + unsigned size; ///< Number of fibers currently in the pool + + // Statistics on active fibers that were allocated from this pool, + // but no longer in the pool. + int total; ///< @brief Fibers allocated - fiber deallocated from pool + ///< total may be negative for non-root pools. + int high_water; ///< High water mark of total fibers + int alloc_max; ///< Limit on number of fibers allocated from the heap/OS +}; + +/** @brief Initializes a cilk_fiber_pool structure + * + * @param pool - The address of the pool that is to be initialized + * @param parent - The address of this pool's parent, or NULL for root pool + * @param stack_size - Size of stacks for fibers allocated from this pool. + * @param buffer_size - The maximum number of fibers that may be pooled. + * @param alloc_max - Limit on # of fibers this pool can allocate from the heap. + * @param is_shared - True if accessing this pool needs a lock, false otherwise. + */ +void cilk_fiber_pool_init(cilk_fiber_pool* pool, + cilk_fiber_pool* parent, + size_t stack_size, + unsigned buffer_size, + int alloc_max, + int is_shared); + +/** @brief Sets the maximum number of fibers to allocate from a root pool. + * + * @param root_pool - A root fiber pool + * @param max_fibers_to_allocate - The limit on # of fibers to allocate. + * + * Sets the maximum number of fibers that can be allocated from this + * pool and all its descendants. This pool must be a root pool. + */ +void cilk_fiber_pool_set_fiber_limit(cilk_fiber_pool* root_pool, + unsigned max_fibers_to_allocate); + +/** @brief De-initalizes a cilk_fiber_pool + * + * @param pool - The address of the pool that is to be destroyed + */ +void cilk_fiber_pool_destroy(cilk_fiber_pool* pool); + +/** @brief Allocates a new cilk_fiber. + * + * If the specified pool is empty, this method may choose to either + * allocate a fiber from the heap (if pool->total < pool->alloc_max), + * or retrieve a fiber from the parent pool. + * + * @note If a non-null fiber is returned, @c cilk_fiber_reset_state + * should be called on this fiber before using it. + * + * An allocated fiber begins with a reference count of 1. + * This method may lock @c pool or one of its ancestors. + * + * @pre pool should not be NULL. + * + * @param pool The fiber pool from which to retrieve a fiber. + * @return An allocated fiber, or NULL if failed to allocate. + */ +cilk_fiber* cilk_fiber_allocate(cilk_fiber_pool* pool); + +/** @brief Allocate and initialize a new cilk_fiber using memory from + * the heap and/or OS. + * + * The allocated fiber begins with a reference count of 1. + * + * @param stack_size The size (in bytes) to be allocated for the fiber's + * stack. + * @return An initialized fiber. This method should not return NULL + * unless some exceptional condition has occurred. + */ +cilk_fiber* cilk_fiber_allocate_from_heap(size_t stack_size); + + +/** @brief Resets an fiber object just allocated from a pool with the + * specified proc. + * + * After this call, cilk_fiber_data object associated with this fiber + * is filled with zeros. + * + * This function can be called only on a fiber that has been allocated + * from a pool, but never used. + * + * @param fiber The fiber to reset and initialize. + * @param start_proc The function to run when switching to the fiber. If + * null, the fiber can be used with cilk_fiber_run_proc() + * but not with cilk_fiber_resume(). + */ +void cilk_fiber_reset_state(cilk_fiber* fiber, + cilk_fiber_proc start_proc); + +/** @brief Remove a reference from this fiber, possibly deallocating it. + * + * This fiber is deallocated only when there are no other references + * to it. Deallocation happens either by returning the fiber to the + * specified pool, or returning it to the heap. + * + * A fiber that is currently executing should not remove the last + * reference to itself. + * + * When a fiber is deallocated, destructors are not called for the + * objects (if any) still on its stack. The fiber's stack and fiber + * data is returned to the stack pool but the client fiber data is not + * deallocated. + * + * If the pool overflows because of a deallocation, then some fibers + * will be returned to the parent pool. If the root pool overflows, + * then the fiber is returned to the heap. + * + * @param fiber The Cilk fiber to remove a reference to. + * @param pool The fiber pool to which the fiber should be returned. The + * caller is assumed to have exclusive access to the pool + * either because there is no contention for it or because + * its lock has been acquired. If pool is NULL, any + * deallocated fiber is destroyed and returned to the + * heap. + * + * @return Final reference count. If the count is 0, the fiber was + * returned to a pool or the heap. + */ +int cilk_fiber_remove_reference(cilk_fiber *fiber, cilk_fiber_pool *pool); + +/** @brief Allocates and intializes this thread's main fiber + * + * Each thread has an "implicit" main fiber that control's the + * thread's initial stack. This function makes this fiber visible to + * the client and allocates the Cilk-specific aspects of the implicit + * fiber. A call to this function must be paired with a call to + * cilk_fiber_deallocate_fiber_from_thread() + * or a memory leak (or worse) will result. + * + * A fiber allocated from a thread begins with a reference count of 2. + * One is for being allocated, and one is for being active. + * (A fiber created from a thread is automatically currently executing.) + * The matching calls above each decrement the reference count by 1. + * + * @return A fiber for the currently executing thread. + */ +cilk_fiber* cilk_fiber_allocate_from_thread(void); + +/** @brief Remove a fiber created from a thread, + * possibly deallocating it. + * + * Same as cilk_fiber_remove_reference, except that it works on fibers + * created via cilk_fiber_allocate_from_thread(). + * + * Fibers created from a thread are never returned to a pool. + * + * @param fiber The Cilk fiber to remove a reference from. + * @return Final reference count. If the count is 0, the fiber was + * returned to the heap. + */ +int cilk_fiber_remove_reference_from_thread(cilk_fiber *fiber); + +/** @brief Deallocate a fiber created from a thread, + * possibly destroying it. + * + * This method decrements the reference count of the fiber by 2, and + * destroys the fiber struct if the reference count is 0. + * + * OS-specific cleanup for the fiber executes unconditionally with + * this method. The destruction of the actual object, however, does + * not occur unless the reference count is 0. + * + * @param fiber The cilk_fiber to deallocate from a thread. + * @return Final reference count. If the count is 0, the fiber was + * returned to the heap. + */ +int cilk_fiber_deallocate_from_thread(cilk_fiber *fiber); + +/** @brief Returns true if this fiber is allocated from a thread. + */ +int cilk_fiber_is_allocated_from_thread(cilk_fiber *fiber); + + +/** @brief Suspend execution on current fiber resumes other fiber. + * + * Suspends the current fiber and transfers control to a new fiber. Execution + * on the new fiber resumes from the point at which fiber suspended itself to + * run a different fiber. If fiber was freshly allocated, then runs the + * start_proc function specified at allocation. This function returns when + * another fiber resumes the self fiber. Note that the state of the + * floating-point control register (i.e., the register that controls rounding + * mode, etc.) is valid but indeterminate on return -- different + * implementations will have different results. + * + * When the @c self fiber is resumed, execution proceeds as though + * this function call returns. + * + * This operation increments the reference count of @p other. + * This operation decrements the reference count of @p self. + * + * @param self Fiber to switch from. Must equal current fiber. + * @param other Fiber to switch to. + */ +void cilk_fiber_suspend_self_and_resume_other(cilk_fiber* self, + cilk_fiber* other); + +/** @brief Removes a reference from the currently executing fiber and + * resumes other fiber. + * + * Removes a reference from @p self and transfer control to @p other + * fiber. Execution on @p other resumes from the point at which @p + * other suspended itself to run a different fiber. If @p other fiber + * was freshly allocated, then runs the function specified at + * creation. + * + * + * This operation increments the reference count of @p other. + * + * This operation conceptually decrements the reference count of + * @p self twice, once to suspend it, and once to remove a reference to + * it. Then, if the count is 0, it is returned to the specified pool + * or destroyed. + * + * @pre @p self is the currently executing fiber. + * + * @param self Fiber to remove reference switch from. + * @param self_pool Pool to which the current fiber should be returned + * @param other Fiber to switch to. + */ +NORETURN +cilk_fiber_remove_reference_from_self_and_resume_other(cilk_fiber* self, + cilk_fiber_pool* self_pool, + cilk_fiber* other); + +/** @brief Set the proc method to execute immediately after a switch + * to this fiber. + * + * The @c post_switch_proc method executes immediately after switching + * away form @p self fiber to some other fiber, but before @c self + * gets cleaned up. + * + * @note A fiber can have only one post_switch_proc method at a time. + * If this method is called multiple times before switching to the + * fiber, only the last proc method will execute. + * + * @param self Fiber. + * @param post_switch_proc Proc method to execute immediately after switching to this fiber. + */ +void cilk_fiber_set_post_switch_proc(cilk_fiber* self, cilk_fiber_proc post_switch_proc); + +/** @brief Invoke TBB stack op for this fiber. + * + * @param fiber Fiber to invoke stack op for. + * @param op The stack op to invoke + */ +void cilk_fiber_invoke_tbb_stack_op(cilk_fiber* fiber, __cilk_tbb_stack_op op); + +/** @brief Returns the fiber data associated with the specified fiber. + * + * The returned struct is owned by the fiber and is deallocated automatically + * when the fiber is destroyed. However, the client_data field is owned by + * the client and must be deallocated separately. When called for a + * newly-allocated fiber, the returned data is zero-filled. + * + * @param fiber The fiber for which data is being requested. + * @return The fiber data for the specified fiber + */ +cilk_fiber_data* cilk_fiber_get_data(cilk_fiber* fiber); + +/** @brief Retrieve the owner field from the fiber. + * + * This method is provided for convenience. One can also get the + * fiber data, and then get the owner field. + */ +__CILKRTS_INLINE +__cilkrts_worker* cilk_fiber_get_owner(cilk_fiber* fiber) +{ + // TBD: We really want a static assert here, that this cast is + // doing the right thing. + cilk_fiber_data* fdata = (cilk_fiber_data*)fiber; + return fdata->owner; +} + +/** @brief Sets the owner field of a fiber. + * + * This method is provided for convenience. One can also get the + * fiber data, and then get the owner field. + */ +__CILKRTS_INLINE +void cilk_fiber_set_owner(cilk_fiber* fiber, __cilkrts_worker* owner) +{ + // TBD: We really want a static assert here, that this cast is + // doing the right thing. + cilk_fiber_data* fdata = (cilk_fiber_data*)fiber; + fdata->owner = owner; +} + +/** @brief Returns true if this fiber is resumable. + * + * A fiber is considered resumable when it is not currently being + * executed. + * + * This function is used by Windows exception code. + * @param fiber The fiber to check. + * @return Nonzero value if fiber is resumable. + */ +int cilk_fiber_is_resumable(cilk_fiber* fiber); + +/** + * @brief Returns the base of this fiber's stack. + * + * On some platforms (e.g., Windows), the fiber must have started + * running before we can get this information. + * + * @param fiber The fiber to get the stack pointer from. + * @return The base of the stack, or NULL if this + * information is not available yet. + */ +char* cilk_fiber_get_stack_base(cilk_fiber* fiber); + + +/**************************************************************************** + * TBB interop functions + * **************************************************************************/ +/** + * @brief Set the TBB callback information for a stack + * + * @param fiber The fiber to set the TBB callback information for + * @param o The TBB callback thunk. Specifies the callback address and + * context value. + */ +void cilk_fiber_set_stack_op(cilk_fiber *fiber, + __cilk_tbb_stack_op_thunk o); + +/** + * @brief Save the TBB callback address and context value in + * thread-local storage. + * + * We'll use it later when the thread binds to a worker. + * + * @param o The TBB callback thunk which is to be saved. + */ +void cilk_fiber_tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o); + +/** + * @brief Move TBB stack-op info from thread-local storage and store + * it into the fiber. + * + * Called when we bind a thread to the runtime. If there is any TBB + * interop information in thread-local storage, bind it to the stack + * now. + * + * @pre \c fiber should not be NULL. + * @param fiber The fiber that should take over the TBB interop information. + */ +void cilk_fiber_tbb_interop_use_saved_stack_op_info(cilk_fiber *fiber); + +/** + * @brief Free any TBB interop information saved in thread-local storage + */ +void cilk_fiber_tbb_interop_free_stack_op_info(void); + +/** + * @brief Migrate any TBB interop information from a cilk_fiber to + * thread-local storage. + * + * Returns immediately if no TBB interop information has been + * associated with the stack. + * + * @param fiber The cilk_fiber who's TBB interop information should be + * saved in thread-local storage. + */ +void cilk_fiber_tbb_interop_save_info_from_stack(cilk_fiber* fiber); + + +#if SUPPORT_GET_CURRENT_FIBER +/** @brief Returns the fiber associated with the currently executing thread + * + * @note This function is currently used only for testing the Cilk + * runtime. + * + * @return Fiber associated with the currently executing thread or NULL if no + * fiber was associated with this thread. + */ +cilk_fiber* cilk_fiber_get_current_fiber(void); +#endif + + +#if NEED_FIBER_REF_COUNTS +/** @brief Returns true if this fiber has reference count > 0. + * + * @param fiber The fiber to check for references. + * @return Nonzero value if the fiber has references. + */ +int cilk_fiber_has_references(cilk_fiber *fiber); + +/** @brief Returns the value of the reference count. + * + * @param fiber The fiber to check for references. + * @return The value of the reference count of fiber. + */ +int cilk_fiber_get_ref_count(cilk_fiber *fiber); + +/** @brief Adds a reference to this fiber. + * + * Increments the reference count of a current fiber. Fibers with + * nonzero reference count will not be freed or returned to a fiber + * pool. + * + * @param fiber The fiber to add a reference to. + */ +void cilk_fiber_add_reference(cilk_fiber *fiber); + +#endif // NEED_FIBER_REF_COUNTS + +__CILKRTS_END_EXTERN_C + +#ifdef __cplusplus +// Some C++ implementation details + +/// Opaque declaration of a cilk_fiber_sysdep object. +struct cilk_fiber_sysdep; + +/** + * cilk_fiber is a base-class for system-dependent fiber implementations. + */ +struct cilk_fiber : protected cilk_fiber_data +{ + protected: + // This is a rare acceptable use of protected inheritence and protected + // variable access: when the base class and derived class collaborate + // tightly to comprise a single component. + + /// For overloading constructor of cilk_fiber. + enum from_thread_t { from_thread = 1 }; + + // Boolean flags capturing the status of the fiber. + // Each one can be set independently. + // A default fiber is constructed with a flag value of 0. + static const int RESUMABLE = 0x01; ///< True if the fiber is in a suspended state and can be resumed. + static const int ALLOCATED_FROM_THREAD = 0x02; ///< True if fiber was allocated from a thread. + + cilk_fiber_proc m_start_proc; ///< Function to run on start up/reset + cilk_fiber_proc m_post_switch_proc; ///< Function that executes when we first switch to a new fiber from a different one. + + cilk_fiber* m_pending_remove_ref;///< Fiber to possibly delete on start up or resume + cilk_fiber_pool* m_pending_pool; ///< Pool where m_pending_remove_ref should go if it is deleted. + unsigned m_flags; ///< Captures the status of this fiber. + +#if NEED_FIBER_REF_COUNTS + volatile long m_outstanding_references; ///< Counts references to this fiber. +#endif + + /// Creates a fiber with NULL data. + cilk_fiber(); + + /** + * @brief Creates a fiber with user-specified arguments. + * + * @param stack_size Size of stack to use for this fiber. + */ + cilk_fiber(std::size_t stack_size); + + /// Empty destructor. + ~cilk_fiber(); + + /** + * @brief Performs any actions that happen after switching from + * one fiber to another. + * + * These actions are: + * 1. Execute m_post_switch_proc on a fiber. + * 2. Do any pending deallocations from the previous fiber. + */ + void do_post_switch_actions(); + + /** + *@brief Helper method that converts a @c cilk_fiber object into a + * @c cilk_fiber_sysdep object. + * + * The @c cilk_fiber_sysdep object contains the system-dependent parts + * of the implementation of a @\c cilk_fiber. + * + * We could have @c cilk_fiber_sysdep inherit from @c cilk_fiber and + * then use virtual functions. But since a given platform only uses + * one definition of @c cilk_fiber_sysdep at a time, we statically + * cast between them. + */ + inline cilk_fiber_sysdep* sysdep(); + + /** + * @brief Set resumable flag to specified state. + */ + inline void set_resumable(bool state) { + m_flags = state ? (m_flags | RESUMABLE) : (m_flags & (~RESUMABLE)); + } + + /** + *@brief Set the allocated_from_thread flag. + */ + inline void set_allocated_from_thread(bool state) { + m_flags = state ? (m_flags | ALLOCATED_FROM_THREAD) : (m_flags & (~ALLOCATED_FROM_THREAD)); + } + + public: + + /** + * @brief Allocates and initializes a new cilk_fiber, either from + * the specified pool or from the heap. + * + * @pre pool should not be NULL. + */ + static cilk_fiber* allocate(cilk_fiber_pool* pool); + + /** + * @brief Allocates a fiber from the heap. + */ + static cilk_fiber* allocate_from_heap(size_t stack_size); + + /** + * @brief Return a fiber to the heap. + */ + void deallocate_to_heap(); + + /** + * @brief Reset the state of a fiber just allocated from a pool. + */ + void reset_state(cilk_fiber_proc start_proc); + + /** + * @brief Remove a reference from this fiber, possibly + * deallocating it if the reference count becomes 0. + * + * @param pool The fiber pool to which this fiber should be returned. + * @return The final reference count. + */ + int remove_reference(cilk_fiber_pool* pool); + + /** + * @brief Deallocate the fiber by returning it to the pool. + * @pre This method should only be called if the reference count + * is 0. + * + * @param pool The fiber pool to return this fiber to. If NULL, + * fiber is returned to the heap. + */ + void deallocate_self(cilk_fiber_pool *pool); + + /** @brief Allocates and intializes this thread's main fiber. */ + static cilk_fiber* allocate_from_thread(); + + /** @brief Deallocate a fiber created from a thread, + * possibly destroying it. + * + * This method decrements the reference count of this fiber by 2, + * and destroys the fiber if the reference count is 0. + * + * OS-specific cleanup for the fiber executes unconditionally with for + * this method. The destruction of the actual object, however, does + * not occur unless the reference count is 0. + * + * @return Final reference count. If the count is 0, the fiber was + * returned to the heap. + */ + int deallocate_from_thread(); + + /** @brief Removes a reference from this fiber. + * + * This method deallocates this fiber if the reference count + * becomes 0. + * + * @pre This fiber must be allocated from a thread. + * @return The final reference count of this fiber. + */ + int remove_reference_from_thread(); + +#if SUPPORT_GET_CURRENT_FIBER + /** @brief Get the current fiber from TLS. + * + * @note This function is only used for testing the runtime. + */ + static cilk_fiber* get_current_fiber(); +#endif + + /** @brief Suspend execution on current fiber resumes other fiber. + * + * Control returns after resuming execution of the self fiber. + */ + void suspend_self_and_resume_other(cilk_fiber* other); + + + /** @brief Removes a reference from the currently executing fiber + * and resumes other fiber. + * + * This fiber may be returned to a pool or deallocated. + */ + NORETURN remove_reference_from_self_and_resume_other(cilk_fiber_pool* self_pool, + cilk_fiber* other); + + /** @brief Set the proc method to execute immediately after a switch + * to this fiber. + * + * @param post_switch_proc Proc method to execute immediately + * after switching to this fiber. + */ + inline void set_post_switch_proc(cilk_fiber_proc post_switch_proc) { + m_post_switch_proc = post_switch_proc; + } + + /** @brief Returns true if this fiber is resumable. + * + * A fiber is considered resumable when it is not currently being + * executed. + */ + inline bool is_resumable(void) { + return (m_flags & RESUMABLE); + } + + /** @brief Returns true if fiber was allocated from a thread. */ + inline bool is_allocated_from_thread(void) { + return (m_flags & ALLOCATED_FROM_THREAD); + } + + /** + *@brief Get the address at the base of the stack for this fiber. + */ + inline char* get_stack_base(); + + /** @brief Return the data for this fiber. */ + cilk_fiber_data* get_data() { return this; } + + /** @brief Return the data for this fiber. */ + cilk_fiber_data const* get_data() const { return this; } + + +#if NEED_FIBER_REF_COUNTS + /** @brief Verifies that this fiber's reference count equals v. */ + inline void assert_ref_count_equals(long v) { + #if FIBER_CHECK_REF_COUNTS + CILK_ASSERT(m_outstanding_references >= v); + #endif + } + + /** @brief Verifies that this fiber's reference count is at least v. */ + inline void assert_ref_count_at_least(long v) { + #if FIBER_CHECK_REF_COUNTS + CILK_ASSERT(m_outstanding_references >= v); + #endif + } + + /** @brief Get reference count. */ + inline long get_ref_count() { return m_outstanding_references; } + + /** @brief Initialize reference count. + * Operation is not atomic. + */ + inline void init_ref_count(long v) { m_outstanding_references = v; } + + // For Windows, updates to the fiber reference count need to be + // atomic, because exceptions can live on a stack that we are not + // currently executing on. Thus, we can update the reference + // count of a fiber we are not currently executing on. + + /** @brief Increment reference count for this fiber [Windows]. */ + inline void inc_ref_count() { atomic_inc_ref_count(); } + + /** @brief Decrement reference count for this fiber [Windows]. */ + inline long dec_ref_count() { return atomic_dec_ref_count(); } + + /** @brief Subtract v from the reference count for this fiber [Windows]. */ + inline long sub_from_ref_count(long v) { return atomic_sub_from_ref_count(v); } +#else // NEED_FIBER_REF_COUNTS + + // Without reference counting, we have placeholder methods. + inline void init_ref_count(long v) { } + + inline void inc_ref_count() { } + + // With no reference counting, dec_ref_count always return 0. + // Thus, anyone checking is always the "last" one. + inline long dec_ref_count() { return 0; } + inline long sub_from_ref_count(long v) { return 0; } + + // The assert methods do nothing. + inline void assert_ref_count_equals(long v) { } + inline void assert_ref_count_at_least(long v) { } +#endif + + /** + * @brief Call TBB to tell it about an "interesting" event. + * + * @param op Value specifying the event to track. + */ + void invoke_tbb_stack_op(__cilk_tbb_stack_op op); + +private: + + /** + * @brief Helper method: try to allocate a fiber from this pool or + * its ancestors without going to the OS / heap. + * + * Returns allocated pool, or NULL if no pool is found. + * + * If pool contains a suitable fiber. Return it. Otherwise, try to + * recursively grab a fiber from the parent pool, if there is one. + * + * This method will not allocate a fiber from the heap. + */ + static cilk_fiber* try_allocate_from_pool_recursive(cilk_fiber_pool* pool); + + +#if NEED_FIBER_REF_COUNTS + /** + * @brief Atomic increment of reference count. + */ + void atomic_inc_ref_count(); + + /** + * @brief Atomic decrement of reference count. + */ + long atomic_dec_ref_count(); + + /** + * @brief Atomic subtract of v from reference count. + * @param v Value to subtract. + */ + long atomic_sub_from_ref_count(long v); +#endif // NEED_FIBER_REF_COUNTS + +}; + +#endif // __cplusplus + +#endif // ! defined(INCLUDED_CILK_FIBER_DOT_H) diff --git a/libcilkrts/runtime/cilk_malloc.c b/libcilkrts/runtime/cilk_malloc.c index 89d6fc97ab3..2094b0335c9 100644 --- a/libcilkrts/runtime/cilk_malloc.c +++ b/libcilkrts/runtime/cilk_malloc.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "cilk_malloc.h" @@ -33,6 +38,10 @@ #include <malloc.h> #define HAS_MEMALIGN 1 #endif +#ifdef __VXWORKS__ +#define HAS_MEMALIGN 1 +#include <memLib.h> +#endif #define PREFERRED_ALIGNMENT 64 /* try to keep runtime system data structures within one cache line */ diff --git a/libcilkrts/runtime/cilk_malloc.h b/libcilkrts/runtime/cilk_malloc.h index f547aa9798f..2ccce8a4ae3 100644 --- a/libcilkrts/runtime/cilk_malloc.h +++ b/libcilkrts/runtime/cilk_malloc.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** diff --git a/libcilkrts/runtime/component.h b/libcilkrts/runtime/component.h index 869ea9bd79b..01aab3a6274 100644 --- a/libcilkrts/runtime/component.h +++ b/libcilkrts/runtime/component.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #ifndef INCLUDED_COMPONENT_DOT_H diff --git a/libcilkrts/runtime/doxygen-layout.xml b/libcilkrts/runtime/doxygen-layout.xml index 5e75f563eda..8757667d829 100644 --- a/libcilkrts/runtime/doxygen-layout.xml +++ b/libcilkrts/runtime/doxygen-layout.xml @@ -1,28 +1,33 @@ <doxygenlayout version="1.0"> <!-- -# Copyright (C) 2011 -# Intel Corporation -# -# This file is part of the Intel Cilk Plus Library. This library is free -# software; you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the -# Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# Under Section 7 of GPL version 3, you are granted additional -# permissions described in the GCC Runtime Library Exception, version -# 3.1, as published by the Free Software Foundation. -# -# You should have received a copy of the GNU General Public License and -# a copy of the GCC Runtime Library Exception along with this program; -# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -# <http://www.gnu.org/licenses/>. +# @copyright +# Copyright (C) 2011 +# Intel Corporation +# +# @copyright +# This file is part of the Intel Cilk Plus Library. This library is free +# software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# @copyright +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# @copyright +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# @copyright +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. --> <!-- Navigation index tabs for HTML output --> diff --git a/libcilkrts/runtime/doxygen.cfg b/libcilkrts/runtime/doxygen.cfg index 698cbfcd328..12048bc77bd 100644 --- a/libcilkrts/runtime/doxygen.cfg +++ b/libcilkrts/runtime/doxygen.cfg @@ -1,27 +1,32 @@ # Doxyfile 1.7.4
-# Copyright (C) 2011 -# Intel Corporation -# -# This file is part of the Intel Cilk Plus Library. This library is free -# software; you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the -# Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# Under Section 7 of GPL version 3, you are granted additional -# permissions described in the GCC Runtime Library Exception, version -# 3.1, as published by the Free Software Foundation. -# -# You should have received a copy of the GNU General Public License and -# a copy of the GCC Runtime Library Exception along with this program; -# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -# <http://www.gnu.org/licenses/>. +# @copyright +# Copyright (C) 2011-2012 +# Intel Corporation +# +# @copyright +# This file is part of the Intel Cilk Plus Library. This library is free +# software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# @copyright +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# @copyright +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# @copyright +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. # This file describes the settings to be used by the documentation system
# doxygen (www.doxygen.org) for a project.
@@ -48,7 +53,7 @@ DOXYFILE_ENCODING = UTF-8 # The PROJECT_NAME tag is a single word (or a sequence of words surrounded
# by quotes) that should identify the project.
-PROJECT_NAME = "Cilk Runtime"
+PROJECT_NAME = "Intel Cilk Plus Runtime"
# The PROJECT_NUMBER tag can be used to enter a project or revision number.
# This could be handy for archiving the generated documentation or
@@ -635,7 +640,9 @@ WARN_LOGFILE = INPUT = ./ \
../include/internal/abi.h \
- ../include/cilk/cilk_api.h
+ ../include/cilk/cilk_api.h \
+ ../include/cilk/common.h \
+ ./readme.dox
# This tag can be used to specify the character encoding of the source files
@@ -1490,7 +1497,10 @@ PREDEFINED = _WIN32 \ CILK_API(t)=t \
CILK_ABI(t)=t \
CILK_ABI_THROWS(t)=t \
- CALLBACK=
+ CALLBACK= \
+ __CILKRTS_INLINE=inline \
+ __CILKRTS_ABI_VERSION=1 \
+ __cplusplus \
# If the MACRO_EXPANSION and EXPAND_ONLY_PREDEF tags are set to YES then
# this tag can be used to specify a list of macro names that should be expanded.
diff --git a/libcilkrts/runtime/except-gcc.cpp b/libcilkrts/runtime/except-gcc.cpp index d577428e5a4..0d643c8d310 100644 --- a/libcilkrts/runtime/except-gcc.cpp +++ b/libcilkrts/runtime/except-gcc.cpp @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "except-gcc.h" @@ -34,6 +39,7 @@ #include "full_frame.h" #include "scheduler.h" #include "frame_malloc.h" +#include "pedigrees.h" #include <stdint.h> #include <typeinfo> @@ -159,8 +165,24 @@ __cilkrts_return_exception(__cilkrts_stack_frame *sf) CILK_ASSERT(sf->flags & CILK_FRAME_DETACHED); sf->flags &= ~CILK_FRAME_DETACHED; + /* + * If we are in replay mode, and a steal occurred during the recording + * phase, stall till a steal actually occurs. + */ + replay_wait_for_steal_if_parent_was_stolen(w); + /* If this is to be an abnormal return, save the active exception. */ if (!__cilkrts_pop_tail(w)) { + /* Write a record to the replay log for an attempt to return to a + stolen parent. This must be done before the exception handler + invokes __cilkrts_leave_frame which will bump the pedigree so + the replay_wait_for_steal_if_parent_was_stolen() above will match on + replay */ + replay_record_orphaned(w); + + /* Now that the record/replay stuff is done, update the pedigree */ + update_pedigree_on_leave_frame(w, sf); + /* Inline pop_frame; this may not be needed. */ w->current_stack_frame = sf->call_parent; sf->call_parent = 0; @@ -191,6 +213,10 @@ __cilkrts_return_exception(__cilkrts_stack_frame *sf) the same stack and part of the same full frame. The caller is cleaning up the Cilk frame during unwind and will reraise the exception */ + + /* Now that the record/replay stuff is done, update the pedigree */ + update_pedigree_on_leave_frame(w, sf); + #if DEBUG_EXCEPTIONS /* DEBUG ONLY */ { __cxa_eh_globals *state = __cxa_get_globals(); @@ -247,7 +273,8 @@ NORETURN __cilkrts_c_sync_except (__cilkrts_worker *w, __cilkrts_stack_frame *sf __cxa_eh_globals *state = __cxa_get_globals(); _Unwind_Exception *exc = (_Unwind_Exception *)sf->except_data; - CILK_ASSERT (sf->flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING) == (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING)); + CILK_ASSERT((sf->flags & (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING)) == + (CILK_FRAME_UNSYNCHED|CILK_FRAME_EXCEPTING)); sf->flags &= ~CILK_FRAME_EXCEPTING; #if DEBUG_EXCEPTIONS diff --git a/libcilkrts/runtime/except-gcc.h b/libcilkrts/runtime/except-gcc.h index 14a174befee..fb2ae796d20 100644 --- a/libcilkrts/runtime/except-gcc.h +++ b/libcilkrts/runtime/except-gcc.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** diff --git a/libcilkrts/runtime/except.h b/libcilkrts/runtime/except.h index 94f5b1e3a24..c8739554e9b 100644 --- a/libcilkrts/runtime/except.h +++ b/libcilkrts/runtime/except.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** diff --git a/libcilkrts/runtime/frame_malloc.c b/libcilkrts/runtime/frame_malloc.c index 7f49b17cd8b..d9143034de9 100644 --- a/libcilkrts/runtime/frame_malloc.c +++ b/libcilkrts/runtime/frame_malloc.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "frame_malloc.h" @@ -31,7 +36,9 @@ #include "local_state.h" #include "cilk_malloc.h" +#ifndef __VXWORKS__ #include <memory.h> +#endif /* #define USE_MMAP 1 */ #if USE_MMAP diff --git a/libcilkrts/runtime/frame_malloc.h b/libcilkrts/runtime/frame_malloc.h index 95dffe46d4b..c414ae195c7 100644 --- a/libcilkrts/runtime/frame_malloc.h +++ b/libcilkrts/runtime/frame_malloc.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** diff --git a/libcilkrts/runtime/full_frame.c b/libcilkrts/runtime/full_frame.c index c6036f13acc..e51b9afab57 100644 --- a/libcilkrts/runtime/full_frame.c +++ b/libcilkrts/runtime/full_frame.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2010-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2010-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ @@ -64,10 +69,8 @@ full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w, ff->registration = 0; #endif ff->frame_size = 0; -// ff->exception_sp_offset = 0; -// ff->eh_kind = EH_NONE; - ff->stack_self = 0; - ff->stack_child = 0; + ff->fiber_self = 0; + ff->fiber_child = 0; ff->sync_master = 0; @@ -118,6 +121,26 @@ COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp) __cilkrts_get_tls_worker()->self, ff, ff->sync_sp, sp); } +COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size) +{ + /* When resuming the parent after a steal, __cilkrts_take_stack is used to + * subtract the new stack pointer from the current stack pointer, storing + * the offset in ff->sync_sp. When resuming after a sync, + * __cilkrts_take_stack is used to subtract the new stack pointer from + * itself, leaving ff->sync_sp at zero (null). Although the pointers being + * subtracted are not part of the same contiguous chunk of memory, the + * flat memory model allows us to subtract them and get a useable offset. + * + * __cilkrts_adjust_stack() is used to deallocate a Variable Length Array + * by adding it's size to ff->sync_sp. + */ + ff->sync_sp = ff->sync_sp + size; + + DBGPRINTF("%d- __cilkrts_adjust_stack - adjust (+) sync " + "stack of full frame %p to %p (+ size: 0x%x)\n", + __cilkrts_get_tls_worker()->self, ff, ff->sync_sp, size); +} + COMMON_PORTABLE void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff) { diff --git a/libcilkrts/runtime/full_frame.h b/libcilkrts/runtime/full_frame.h index cc2bde0a006..0ca75624490 100644 --- a/libcilkrts/runtime/full_frame.h +++ b/libcilkrts/runtime/full_frame.h @@ -2,51 +2,53 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #ifndef INCLUDED_FULL_FRAME_DOT_H #define INCLUDED_FULL_FRAME_DOT_H + #include "rts-common.h" #include "worker_mutex.h" #include <cilk/common.h> - +#include <internal/abi.h> #include <stddef.h> +#include "cilk_fiber.h" __CILKRTS_BEGIN_EXTERN_C -// Forwarded declarations -typedef struct __cilkrts_stack_frame __cilkrts_stack_frame; -typedef struct __cilkrts_stack __cilkrts_stack; - /** Magic numbers for full_frame, used for debugging */ typedef unsigned long long ff_magic_t; /* COMMON_SYSDEP */ struct pending_exception_info; /* opaque */ -/* COMMON_SYSDEP */ struct __cilkrts_stack; /* opaque */ /************************************************************* Full frames @@ -302,23 +304,23 @@ struct full_frame ptrdiff_t frame_size; /** - * Allocated stacks that need to be freed. The stacks work - * like a reducer. The leftmost frame may have stack_self + * Allocated fibers that need to be freed. The fibers work + * like a reducer. The leftmost frame may have @c fiber_self * null and owner non-null. * * [local] * TBD: verify exception code satisfies this requirement. */ - __cilkrts_stack *stack_self; + cilk_fiber *fiber_self; /** - * Allocated stacks that need to be freed. The stacks work - * like a reducer. The leftmost frame may have stack_self + * Allocated fibers that need to be freed. The fibers work + * like a reducer. The leftmost frame may have @c fiber_self * null and owner non-null. * * [self-locked] */ - __cilkrts_stack *stack_child; + cilk_fiber *fiber_child; /** * If the sync_master is set, this function can only be sync'd by the team @@ -375,56 +377,62 @@ struct full_frame */ /** - * Records the stack pointer within the 'sf' stack frame as the current stack - * pointer at the point of suspending full frame 'ff'. - * - * Preconditions: - * - ff->sync_sp must be either null or contain the result of a prior call to - * __cilkrts_take_stack(). - * - If ff->sync_sp is not null, then SP(sf) must refer to the same stack as - * the 'sp' argument to the prior call to __cilkrts_take_stack(). + * @brief Records the stack pointer within the @c sf stack frame as the + * current stack pointer at the point of suspending full frame @c ff. + * + * @pre @c ff->sync_sp must be either null or contain the result of a prior call to + * @c __cilkrts_take_stack(). + * @pre If @c ff->sync_sp is not null, then @c SP(sf) must refer to the same stack as + * the @c sp argument to the prior call to @c __cilkrts_take_stack(). * - * Postconditions: - * - If ff->sync_sp was null before the call, then ff->sync_sp will be set to - * SP(sf). - * - Otherwise, ff->sync_sp will be restored to the value it had just prior - * to the last call to __cilkrts_take_stack(), except offset by any change - * in the stack pointer between the call to __cilkrts_take_stack() and - * this call to __cilkrts_put_stack(). + + * @post If @c ff->sync_sp was null before the call, then @c + * ff->sync_sp will be set to @c SP(sf). + * @post Otherwise, @c ff->sync_sp will be restored to the value it had just prior + * to the last call to @c __cilkrts_take_stack(), except offset by any change + * in the stack pointer between the call to @c __cilkrts_take_stack() and + * this call to @c __cilkrts_put_stack(). * * @param ff The full frame that is being suspended. - * @param sf The __cilkrts_stack_frame that is being suspended. The stack + * @param sf The @c __cilkrts_stack_frame that is being suspended. The stack * pointer will be taken from the jmpbuf contained within this - * __cilkrts_stack_frame. + * @c __cilkrts_stack_frame. */ COMMON_PORTABLE void __cilkrts_put_stack(full_frame *ff, __cilkrts_stack_frame *sf); /** - * Records the stack pointer 'sp' as the stack pointer at the point of - * resuming execution on full frame 'ff'. The value of 'sp' may be on a - * different stack than the original value recorded for the stack pointer - * using __cilkrts_put_stack(). + * @brief Records the stack pointer @c sp as the stack pointer at the point of + * resuming execution on full frame @c ff. + * + * The value of @c sp may be on a different stack than the original + * value recorded for the stack pointer using __cilkrts_put_stack(). * - * Precondition: - * - ff->sync_sp must contain a value set by __cilkrts_put_stack(). + * @pre @c ff->sync_sp must contain a value set by @c __cilkrts_put_stack(). * - * Postcondition: - * - ff->sync_sp contains an *integer* value used to compute a change in the - * stack pointer upon the next call to __cilkrts_take_stack(). - * - If 'sp' equals ff->sync_sp, then ff->sync_sp is set to null. + * @post @c ff->sync_sp contains an *integer* value used to compute a change in the + * stack pointer upon the next call to @c __cilkrts_take_stack(). + * @post If @c sp equals @c ff->sync_sp, then @c ff->sync_sp is set to null. * * @param ff The full frame that is being resumed. * @param sp The stack pointer for the stack the function is being resumed on. */ COMMON_PORTABLE void __cilkrts_take_stack(full_frame *ff, void *sp); +/* + * @brief Adjust the stack for to deallocate a Variable Length Array + * + * @param ff The full frame that is being adjusted. + * @param size The size of the array being deallocated from the stack + */ +COMMON_PORTABLE void __cilkrts_adjust_stack(full_frame *ff, size_t size); + /** - * Allocates and initailizes a full_frame. + * @brief Allocates and initailizes a full_frame. * * @param w The memory for the full_frame will be allocated out of the * worker's pool. - * @param sf The __cilkrts_stack_frame which will be saved as the call_stack + * @param sf The @c __cilkrts_stack_frame which will be saved as the call_stack * for this full_frame. * * @return The newly allocated and initialized full_frame. @@ -434,7 +442,7 @@ full_frame *__cilkrts_make_full_frame(__cilkrts_worker *w, __cilkrts_stack_frame *sf); /** - * Deallocates a full_frame. + * @brief Deallocates a full_frame. * * @param w The memory for the full_frame will be returned to the worker's pool. * @param ff The full_frame to be deallocated. @@ -443,18 +451,18 @@ COMMON_PORTABLE void __cilkrts_destroy_full_frame(__cilkrts_worker *w, full_frame *ff); /** - * Performs sanity checks to check the integrity of a full_frame. + * @brief Performs sanity checks to check the integrity of a full_frame. * * @param ff The full_frame to be validated. */ COMMON_PORTABLE void validate_full_frame(full_frame *ff); /** - * Locks the mutex contained in a full_frame. The full_frame is validated - * before the runtime attempts to lock it. + * @brief Locks the mutex contained in a full_frame. + * + * The full_frame is validated before the runtime attempts to lock it. * - * Postcondition: - * - ff->lock will be owned by w. + * @post @c ff->lock will be owned by @c w. * * @param w The worker that will own the full_frame. If the runtime is * collecting stats, the intervals will be attributed to the worker. @@ -464,10 +472,9 @@ COMMON_PORTABLE void __cilkrts_frame_lock(__cilkrts_worker *w, full_frame *ff); /** - * Unlocks the mutex contained in a full_frame. + * @brief Unlocks the mutex contained in a full_frame. * - * Precondition: - * - ff->lock must must be owned by w. + * @pre @c ff->lock must must be owned by @c w. * * @param w The worker that currently owns the full_frame. * @param ff The full_frame containing the mutex to be unlocked. diff --git a/libcilkrts/runtime/global_state.cpp b/libcilkrts/runtime/global_state.cpp index 8bdef33556c..d772c74d073 100644 --- a/libcilkrts/runtime/global_state.cpp +++ b/libcilkrts/runtime/global_state.cpp @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "global_state.h" @@ -32,6 +37,8 @@ #include "metacall_impl.h" #include "stats.h" #include "cilk/cilk_api.h" +#include "cilk_malloc.h" +#include "record-replay.h" #include <algorithm> // For max() #include <cstring> @@ -180,7 +187,6 @@ template <typename INT_T, typename CHAR_T> int store_int(INT_T *out, const CHAR_T *val, INT_T min, INT_T max) { errno = 0; - char *end = 0; long val_as_long = to_long(val); if (val_as_long == 0 && errno != 0) return __CILKRTS_SET_PARAM_INVALID; @@ -271,7 +277,7 @@ int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value) // // Number of stacks we'll hold in the per-worker stack cache. Maximum // value is 42. See __cilkrts_make_global_state for details. - return store_int(&g->stack_cache_size, value, 0, 42); + return store_int(&g->fiber_pool_size, value, 0, 42); } else if (strmatch(param, s_shared_stacks)) { @@ -280,7 +286,7 @@ int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value) // Maximum number of stacks we'll hold in the global stack // cache. Maximum value is 42. See __cilkrts_make_global_state for // details. - return store_int(&g->global_stack_cache_size, value, 0, 42); + return store_int(&g->global_fiber_pool_size, value, 0, 42); } else if (strmatch(param, s_nstacks)) { @@ -293,7 +299,9 @@ int set_param_imp(global_state_t* g, const CHAR_T* param, const CHAR_T* value) // Undocumented at this time, though there are plans to expose it. // The current implentation is for Linux debugging only and is not // robust enough for users. - return store_int<long>(&g->max_stacks, value, 0, INT_MAX); + if (cilkg_singleton_ptr) + return __CILKRTS_SET_PARAM_LATE; + return store_int<unsigned>(&g->max_stacks, value, 0, INT_MAX); } else if (strmatch(param, s_stack_size)) { @@ -356,6 +364,8 @@ global_state_t* cilkg_get_user_settable_values() // multiple threads from initializing this data. if (! cilkg_user_settable_values_initialized) { + size_t len; + // Preserve stealing disabled since it may have been set by the // debugger int stealing_disabled = g->stealing_disabled; @@ -377,12 +387,43 @@ global_state_t* cilkg_get_user_settable_values() g->force_reduce = 0; // Default Off g->P = hardware_cpu_count; // Defaults to hardware CPU count g->max_user_workers = 0; // 0 unless set by user - g->stack_cache_size = 7; // Arbitrary default - g->global_stack_cache_size = 3; // Arbitrary default - g->max_stacks = 0; // 0 == unlimited + g->fiber_pool_size = 7; // Arbitrary default + + g->global_fiber_pool_size = 3 * 3* g->P; // Arbitrary default + // 3*P was the default size of the worker array (including + // space for extra user workers). This parameter was chosen + // to match previous versions of the runtime. + + if (4 == sizeof(void *)) + g->max_stacks = 1200; // Only 1GB on 32-bit machines + else + g->max_stacks = 2400; // 2GB on 64-bit machines + + // If we have 2400 1MB stacks, that is 2 gb. If we reach this + // limit on a single-socket machine, we may have other + // problems. Is 2400 too small for large multicore machines? + + // TBD(jsukha, 11/27/2012): I set this limit on stacks to be a + // value independent of P. When running on a Xeon Phi with + // small values of P, I recall seeing a few microbenchmarks + // (e.g., fib) where a limit of 10*P seemed to be + // unnecessarily slowing things down. + // + // That being said, the code has changed sufficiently that + // this observation may no longer be true. + // + // Note: in general, the worst-case number of stacks required + // for a Cilk computation with spawn depth "d" on P workers is + // O(Pd). Code with unbalanced recursion may run into issues + // with this stack usage. + g->max_steal_failures = 128; // TBD: depend on max_workers? g->stack_size = 0; // 0 unless set by the user + // Assume no record or replay log for now + g->record_replay_file_name = NULL; + g->record_or_replay = RECORD_REPLAY_NONE; // set by user + if (always_force_reduce()) g->force_reduce = true; else if (cilkos_getenv(envstr, sizeof(envstr), "CILK_FORCE_REDUCE")) @@ -414,6 +455,33 @@ global_state_t* cilkg_get_user_settable_values() // total_workers must be computed now to support __cilkrts_get_total_workers g->total_workers = g->P + calc_max_user_workers(g) - 1; +#ifdef CILK_RECORD_REPLAY + // RecordReplay: See if we've been asked to replay a log + len = cilkos_getenv(envstr, 0, "CILK_REPLAY_LOG"); + if (len > 0) + { + len += 1; // Allow for trailing NUL + g->record_or_replay = REPLAY_LOG; + g->record_replay_file_name = (char *)__cilkrts_malloc(len); + cilkos_getenv(g->record_replay_file_name, len, "CILK_REPLAY_LOG"); + } + + // RecordReplay: See if we've been asked to record a log + len = cilkos_getenv(envstr, 0, "CILK_RECORD_LOG"); + if (len > 0) + { + if (RECORD_REPLAY_NONE != g->record_or_replay) + cilkos_warning("CILK_RECORD_LOG ignored since CILK_REPLAY_LOG is defined.\n"); + else + { + len += 1; // Allow for trailing NUL + g->record_or_replay = RECORD_LOG; + g->record_replay_file_name = (char *)__cilkrts_malloc(len); + cilkos_getenv(g->record_replay_file_name, len, "CILK_RECORD_LOG"); + } + } +#endif + cilkg_user_settable_values_initialized = true; } @@ -439,8 +507,6 @@ global_state_t* cilkg_init_global_state() // Get partially-initialized global state. global_state_t* g = cilkg_get_user_settable_values(); - int i, max_workers; - if (g->max_stacks > 0) { // nstacks is currently honored on non-Windows systems only. @@ -461,12 +527,19 @@ global_state_t* cilkg_init_global_state() // interaction with the local stack cache is specifically to help out // MIC. - g->stack_cache_size = 1; // One stack per worker cache. + // About max_stacks / P stacks, except we require at least 1 + // per pool. + if (((int)g->max_stacks / g->P) < g->fiber_pool_size) + g->fiber_pool_size = g->max_stacks / g->P; - if (g->max_stacks < g->P) + if (g->fiber_pool_size <= 0) { + g->fiber_pool_size = 1; + } + + if ((int)g->max_stacks < g->P) g->max_stacks = g->P; - g->global_stack_cache_size = g->max_stacks; + g->global_fiber_pool_size = g->P * (g->fiber_pool_size+1); } // Number of bytes/address - validation for debugger integration @@ -483,7 +556,6 @@ global_state_t* cilkg_init_global_state() g->workers_running = 0; g->ltqsize = 1024; /* FIXME */ - g->stacks = 0; g->stack_size = cilkos_validate_stack_size(g->stack_size); g->failure_to_allocate_stack = 0; diff --git a/libcilkrts/runtime/global_state.h b/libcilkrts/runtime/global_state.h index 8409f0161cf..2ee02a572e3 100644 --- a/libcilkrts/runtime/global_state.h +++ b/libcilkrts/runtime/global_state.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -41,39 +46,23 @@ #include "frame_malloc.h" #include "stats.h" #include "bug.h" +#include "cilk_fiber.h" __CILKRTS_BEGIN_EXTERN_C -/** @brief Abstract, per-strand stack (system dependent) */ -typedef struct __cilkrts_stack __cilkrts_stack; - /** * Non-null place-holder for a stack handle that has no meaningful value. */ -#define PLACEHOLDER_STACK ((__cilkrts_stack *) -2) +#define PLACEHOLDER_FIBER ((cilk_fiber *) -2) /** - * Temporary place holder to use during a provably good steal, before the real - * stack handle is known. Differs from PLACEHOLDER_STACK in that this value - * is used in the case where the stack *is* assigned a meaningful value, but - * that meaningful value is not known (yet). + * States for record_or_replay */ -#define BIND_PROVABLY_GOOD_STACK ((__cilkrts_stack *) -1) - -/** @brief Data structure for a cache of stack handles */ -typedef struct __cilkrts_stack_cache { - /** Mutex used to secure exclusive access to the cache */ - mutex lock; - - /** Max for cached stacks */ - unsigned int size; - - /** Count of cached stacks */ - unsigned int n; - - /** Array to hold cached stacks */ - __cilkrts_stack **stacks; -} __cilkrts_stack_cache; +enum record_replay_t { + RECORD_REPLAY_NONE, + RECORD_LOG, + REPLAY_LOG +}; /** * @brief The global state is a structure that is shared by all workers in @@ -101,7 +90,7 @@ typedef struct __cilkrts_stack_cache { * initialization and after deinitialization. */ -typedef /* COMMON_PORTABLE */ struct global_state_t { +struct global_state_t { /* COMMON_PORTABLE */ /* Fields described as "(fixed)" should not be changed after * initialization. @@ -115,52 +104,60 @@ typedef /* COMMON_PORTABLE */ struct global_state_t { * debugger integration library will need to be changed to match!!! *************************************************************************/ - int addr_size; /**< Number of bytes for an address, used by debugger (fixed)*/ + int addr_size; ///< Number of bytes for an address, used by debugger (fixed) - int system_workers; /**< Number of system workers (fixed) */ + int system_workers; ///< Number of system workers (fixed) /** - * Maximum number of user workers that can be bound to cilk workers. + * @brief USER SETTING: Maximum number of user workers that can be + * bound to cilk workers. + * * 0 unless set by user. Call cilkg_calc_max_user_workers to get * the value. */ - int max_user_workers; /* USER SETTING - max Q (fixed) */ + int max_user_workers; - int total_workers; /**< Total number of worker threads allocated (fixed) */ + int total_workers; ///< Total number of worker threads allocated (fixed) - int workers_running; /**< True when system workers have beens started */ + int workers_running; ///< True when system workers have beens started */ - /** Set by debugger to disable stealing (fixed) */ + /// Set by debugger to disable stealing (fixed) int stealing_disabled; - /** System-dependent part of the global state */ + /// System-dependent part of the global state struct global_sysdep_state *sysdep; - /** Array of worker structures. */ + /// Array of worker structures. __cilkrts_worker **workers; /******* END OF DEBUGGER-INTEGRATION FIELDS ***************/ - /** Number of frames in each worker's lazy task queue */ + /// Number of frames in each worker's lazy task queue __STDNS size_t ltqsize; /** + * @brief USER SETTING: Force all possible reductions. + * * TRUE if running a p-tool that requires reducers to call the reduce() - * method even if no actual stealing occurs + * method even if no actual stealing occurs. + * + * When set to TRUE, runtime will simulate steals, forcing calls to the + * the reduce() methods of reducers. + * */ - int force_reduce; /* USER SETTING */ + int force_reduce; - /** Per-worker stack cache size */ - int stack_cache_size; /* USER SETTING */ + /// USER SETTING: Per-worker fiber pool size + int fiber_pool_size; - /** Global stack cache size */ - int global_stack_cache_size; /* USER SETTING */ + /// USER SETTING: Global fiber pool size + int global_fiber_pool_size; /** - * TRUE when workers should exit scheduling loop so we can shut down the - * runtime and free the global state. + * @brief TRUE when workers should exit scheduling loop so we can + * shut down the runtime and free the global state. * - * Note that work_done will be checked *FREQUENTLY* in the scheduling loop + * @note @c work_done will be checked *FREQUENTLY* in the scheduling loop * by idle workers. We need to ensure that it's not in a cache line which * may be invalidated by other cores. The surrounding fields are either * constant after initialization or not used until shutdown (stats) so we @@ -168,66 +165,81 @@ typedef /* COMMON_PORTABLE */ struct global_state_t { */ volatile int work_done; - int under_ptool; /**< True when running under a serial PIN tool */ - - statistics stats; /**< Statistics on use of runtime */ + int under_ptool; ///< True when running under a serial PIN tool - /** - * Number of allocated stacks. When the runtime is compiled with - * profiling, workers use atomic operations to keep count. Otherwise - * the counter is zero. - */ - long stacks; + statistics stats; ///< Statistics on use of runtime /** - * Maximum number of stacks the runtime will allocate (apart from those - * created by the OS when worker threads are created). If max_stacks <= 0, - * there is no pre-defined maximum. + * @brief USER SETTING: Maximum number of stacks the runtime will + * allocate (apart from those created by the OS when worker + * threads are created). + * + * If max_stacks == 0,there is no pre-defined maximum. */ - long max_stacks; /* USER SETTING */ + unsigned max_stacks; - /** Size of each stack */ + /// Size of each stack size_t stack_size; - /** Global cache for per-worker memory */ + /// Global cache for per-worker memory struct __cilkrts_frame_cache frame_malloc; - /** Global cache of stacks */ - __cilkrts_stack_cache stack_cache; + /// Global fiber pool + cilk_fiber_pool fiber_pool; /** - * Track whether the runtime has failed to allocate a stack. This prevents - * multiple warnings from being issued. + * @brief Track whether the runtime has failed to allocate a + * stack. + * + * Setting this flag prevents multiple warnings from being + * issued. */ int failure_to_allocate_stack; /** - * Buffer to force max_steal_failures to appear on a different cache line - * from the previous member variables. This is because max_steal_failures - * is read constantly and other modified values in the global state will + * @brief USER SETTING: indicate record or replay log. + * Set to NULL if not used in this run. + */ + char *record_replay_file_name; + + /** + * @brief Record/replay state. + * Valid states are: + * RECORD_REPLAY_NONE - Not recording or replaying a log + * RECORD_LOG - Recording a log for replay later + * REPLAY_LOG - Replay a log recorded earlier + */ + enum record_replay_t record_or_replay; + + /** + * @brief Buffer to force max_steal_failures to appear on a + * different cache line from the previous member variables. + * + * This padding is needed because max_steal_failures is read + * constantly and other modified values in the global state will * cause thrashing. */ char cache_buf[64]; /** - * Maximum number of times a thread should fail to steal before checking - * if Cilk is shutting down. + * @brief Maximum number of times a thread should fail to steal + * before checking if Cilk is shutting down. */ unsigned int max_steal_failures; - /** Pointer to scheduler entry point */ + /// Pointer to scheduler entry point void (*scheduler)(__cilkrts_worker *w); /** - * Buffer to force P and Q to appear on a different cache line from the - * previous member variables. + * @brief Buffer to force P and Q to appear on a different cache + * line from the previous member variables. */ char cache_buf_2[64]; - int P; /**< USER SETTING: number of system workers + 1 (fixed) */ - int Q; /**< Number of user threads currently bound to workers */ -} global_state_t; + int P; ///< USER SETTING: number of system workers + 1 (fixed) + int Q; ///< Number of user threads currently bound to workers +}; /** * @brief Initialize the global state object. This method must both diff --git a/libcilkrts/runtime/jmpbuf.c b/libcilkrts/runtime/jmpbuf.c index 5b34636daba..6c472240c1d 100644 --- a/libcilkrts/runtime/jmpbuf.c +++ b/libcilkrts/runtime/jmpbuf.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "jmpbuf.h" diff --git a/libcilkrts/runtime/jmpbuf.h b/libcilkrts/runtime/jmpbuf.h index 8d93915d6ca..5ea6c2e0c8d 100644 --- a/libcilkrts/runtime/jmpbuf.h +++ b/libcilkrts/runtime/jmpbuf.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -44,33 +49,49 @@ #include <setjmp.h> #if 0 /* defined CILK_USE_C_SETJMP && defined JB_RSP */ -#define SP(SF) (SF)->ctx[0].__jmpbuf[JB_RSP] -#define FP(SF) (SF)->ctx[0].__jmpbuf[JB_RBP] -#define PC(SF) (SF)->ctx[0].__jmpbuf[JB_PC] +# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_RSP] +# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_RBP] +# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC] #elif 0 /* defined CILK_USE_C_SETJMP && defined JB_SP */ -#define SP(SF) (SF)->ctx[0].__jmpbuf[JB_SP] -#define FP(SF) (SF)->ctx[0].__jmpbuf[JB_BP] -#define PC(SF) (SF)->ctx[0].__jmpbuf[JB_PC] +# define JMPBUF_SP(ctx) (ctx)[0].__jmpbuf[JB_SP] +# define JMPBUF_FP(ctx) (ctx)[0].__jmpbuf[JB_BP] +# define JMPBUF_PC(ctx) (ctx)[0].__jmpbuf[JB_PC] #elif defined _WIN64 -#define SP(SF) ((_JUMP_BUFFER*)(&(SF)->ctx))->Rsp -#define FP(SF) ((_JUMP_BUFFER*)(&(SF)->ctx))->Rbp -#define PC(SF) ((_JUMP_BUFFER*)(&(SF)->ctx))->Rip +# define JMPBUF_SP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rsp +# define JMPBUF_FP(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rbp +# define JMPBUF_PC(ctx) ((_JUMP_BUFFER*)(&(ctx)))->Rip #elif defined _WIN32 -/** Fetch stack pointer from a __cilkrts_stack_frame */ -#define SP(SF) SF->ctx.Esp -/** Fetch frame pointer from a __cilkrts_stack_frame */ -#define FP(SF) SF->ctx.Ebp -/** Fetch program counter from a __cilkrts_stack_frame */ -#define PC(SF) SF->ctx.Eip + /** Fetch stack pointer from a __cilkrts_stack_frame */ +# define JMPBUF_SP(ctx) (ctx).Esp + /** Fetch frame pointer from a __cilkrts_stack_frame */ +# define JMPBUF_FP(ctx) (ctx).Ebp + /** Fetch program counter from a __cilkrts_stack_frame */ +# define JMPBUF_PC(ctx) (ctx).Eip #else /* defined __GNUC__ || defined __ICC */ -/* word 0 is frame address - word 1 is resume address - word 2 is stack address */ -#define FP(SF) (SF)->ctx[0] -#define PC(SF) (SF)->ctx[1] -#define SP(SF) (SF)->ctx[2] + /* word 0 is frame address + * word 1 is resume address + * word 2 is stack address */ +# define JMPBUF_FP(ctx) (ctx)[0] +# define JMPBUF_PC(ctx) (ctx)[1] +# define JMPBUF_SP(ctx) (ctx)[2] #endif +/** + * @brief Get frame pointer from jump buffer in__cilkrts_stack_frame. + */ +#define FP(SF) JMPBUF_FP((SF)->ctx) + +/** + * @brief Get program counter from jump buffer in__cilkrts_stack_frame. + */ +#define PC(SF) JMPBUF_PC((SF)->ctx) + +/** + * @brief Get stack pointer from jump buffer in__cilkrts_stack_frame. + */ +#define SP(SF) JMPBUF_SP((SF)->ctx) + + __CILKRTS_BEGIN_EXTERN_C /** diff --git a/libcilkrts/runtime/local_state.c b/libcilkrts/runtime/local_state.c index a8cabff624f..bc835da6243 100644 --- a/libcilkrts/runtime/local_state.c +++ b/libcilkrts/runtime/local_state.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2010-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2010-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ @@ -39,10 +44,20 @@ void run_scheduling_stack_fcn(__cilkrts_worker *w) w->l->post_suspend = 0; w->l->suspended_stack = 0; + + // Conceptually, after clearing w->l->frame_ff, + // w no longer owns the full frame ff. + // The next time another (possibly different) worker takes + // ownership of ff will be at a provably_good_steal on ff. + w->l->frame_ff = NULL; + CILK_ASSERT(fcn); CILK_ASSERT(ff2); - fcn(w, ff2, sf2); + + // After we run the scheduling stack function, we shouldn't + // (still) not have a full frame. + CILK_ASSERT(NULL == w->l->frame_ff); } /* End local_state.c */ diff --git a/libcilkrts/runtime/local_state.h b/libcilkrts/runtime/local_state.h index 1a1a9b2720f..92781e86883 100644 --- a/libcilkrts/runtime/local_state.h +++ b/libcilkrts/runtime/local_state.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -40,9 +45,13 @@ #include <internal/abi.h> #include "worker_mutex.h" #include "global_state.h" +#include "record-replay.h" +#include "signal_node.h" #include <setjmp.h> #include <stddef.h> +#include <stdio.h> + #ifndef _WIN32 # include <pthread.h> @@ -51,18 +60,20 @@ __CILKRTS_BEGIN_EXTERN_C /* Opaque types. */ -typedef struct signal_node_t signal_node_t; + struct full_frame; struct free_list; struct pending_exception_info; +/// Opaque type for replay entry. +typedef struct replay_entry_t replay_entry_t; /** - * Magic numbers for local_state, used for debugging + * @brief Magic numbers for local_state, used for debugging */ typedef unsigned long long ls_magic_t; /** - * Scheduling stack function: A function that is decided on the program stack, + * @brief Scheduling stack function: A function that is decided on the program stack, * but that must be executed on the scheduling stack. */ typedef void (*scheduling_stack_fcn_t) (__cilkrts_worker *w, @@ -70,7 +81,7 @@ typedef void (*scheduling_stack_fcn_t) (__cilkrts_worker *w, __cilkrts_stack_frame *sf); /** - * Type of this worker. + * @brief Type of this worker. **/ typedef enum cilk_worker_type { @@ -81,10 +92,12 @@ typedef enum cilk_worker_type /** - * The local_state structure contains additional OS-independent + * @brief The local_state structure contains additional OS-independent * information that's associated with a worker, but doesn't need to be - * visible to the compiler. No compiler-generated code should need to - * know the layout of this structure. + * visible to the compiler. + * + * No compiler-generated code should need to know the layout of this + * structure. * * The fields of this struct can be classified as either local or * shared. @@ -111,8 +124,7 @@ typedef enum cilk_worker_type * that are involved in synchronization protocols (i.e., the THE * protocol). */ -/* COMMON_PORTABLE */ -typedef struct local_state +struct local_state /* COMMON_PORTABLE */ { /** This value should be in the first field in any local_state */ # define WORKER_MAGIC_0 ((ls_magic_t)0xe0831a4a940c60b8ULL) @@ -175,19 +187,76 @@ typedef struct local_state struct full_frame *next_frame_ff; /** + * This is set iff this is a WORKER_USER and there has been a steal. It + * points to the first frame that was stolen since the team was last fully + * sync'd. Only this worker may continue past a sync in this function. + * + * This field is set by a thief for a victim that is a user + * thread, while holding the victim's lock. + * It can be cleared without a lock by the worker that will + * continue exuecting past the sync. + * + * [shared read/write] + */ + struct full_frame *last_full_frame; + + /** + * Team on which this worker is a participant. When a user worker enters, + * its team is its own worker struct and it can never change teams. When a + * system worker steals, it adopts the team of its victim. + * + * When a system worker w steals, it reads victim->l->team and + * joins this team. w->l->team is constant until the next time w + * returns control to the runtime. + * We must acquire the worker lock to change w->l->team. + * + * @note This field is 64-byte aligned because it is the first in + * the group of shared read-only fields. We want this group to + * fall on a different cache line from the previous group, which + * is shared read-write. + * + * [shared read-only] + */ + __attribute__((aligned(64))) + __cilkrts_worker *team; + + /** + * Type of this worker + * + * This field changes only when a worker binds or unbinds. + * Otherwise, the field is read-only while the worker is bound. + * + * [shared read-only] + */ + cilk_worker_type type; + + /** * Lazy task queue of this worker - an array of pointers to stack frames. * * Read-only because deques are a fixed size in the current * implementation. + * + * @note This field is 64-byte aligned because it is the first in + * the group of local fields. We want this group to fall on a + * different cache line from the previous group, which is shared + * read-only. + * * [local read-only] */ + __attribute__((aligned(64))) __cilkrts_stack_frame **ltq; /** - * Stacks waiting to be reused + * Pool of fibers waiting to be reused. * [local read/write] */ - __cilkrts_stack_cache stack_cache; + cilk_fiber_pool fiber_pool; + + /** + * The fiber for the scheduling stacks. + * [local read/write] + */ + cilk_fiber* scheduling_fiber; /** * Saved pointer to the leaf node in thread-local storage, when a @@ -207,24 +276,6 @@ typedef struct local_state unsigned rand_seed; /** - * Type of this worker - * - * This field changes only when a worker binds or unbinds. - * Otherwise, the field is read-only while the worker is bound. - * - * [shared read-only] - */ - cilk_worker_type type; - - /** - * jmp_buf used to jump back into the runtime system after an - * unsuccessful steal check or sync. - * - * [local read/write] - */ - jmp_buf env; - - /** * Function to execute after transferring onto the scheduling stack. * * [local read/write] @@ -240,7 +291,7 @@ typedef struct local_state __cilkrts_stack_frame *suspended_stack; /** - * __cilkrts_stack that should be freed after returning from a + * cilk_fiber that should be freed after returning from a * spawn with a stolen parent or after stalling at a sync. * We calculate the stack to free when executing a reduction on @@ -252,7 +303,7 @@ typedef struct local_state * * [local read/write] */ - __cilkrts_stack* stack_to_free; + cilk_fiber* fiber_to_free; /** * Saved exception object for an exception that is being passed to @@ -263,14 +314,6 @@ typedef struct local_state struct pending_exception_info *pending_exception; /** - * Place to save return address so we can report it to Inspector - * - * Used only by Windows. - * [local read/write] - */ - void *sync_return_address; - - /** * Buckets for the memory allocator * * [local read/write] @@ -290,7 +333,7 @@ typedef struct local_state * Useful only when CILK_PROFIlE is compiled in. * [local read/write] */ - statistics stats; + statistics* stats; /** * Count indicates number of failures since last successful steal. This is @@ -301,63 +344,39 @@ typedef struct local_state unsigned int steal_failure_count; /** - * Team on which this worker is a participant. When a user worker enters, - * its team is its own worker struct and it can never change teams. When a - * system worker steals, it adopts the team of its victim. - * - * When a system worker w steals, it reads victim->l->team and - * joins this team. w->l->team is constant until the next time w - * returns control to the runtime. - * We must acquire the worker lock to change w->l->team. - * - * [shared read-only] - */ - __cilkrts_worker *team; - - /** - * This is set iff this is a WORKER_USER and there has been a steal. It - * points to the first frame that was stolen since the team was last fully - * sync'd. Only this worker may continue past a sync in this function. - * - * This field is set by a thief for a victim that is a user - * thread, while holding the victim's lock. - * It can be cleared without a lock by the worker that will - * continue exuecting past the sync. + * 1 if work was stolen from another worker. When true, this will flag + * setup_for_execution_pedigree to increment the pedigree when we resume + * execution to match the increment that would have been done on a return + * from a spawn helper. * - * [shared read/write] + * [local read/write] */ - struct full_frame *last_full_frame; + int work_stolen; /** - * NULL for WORKER_SYSTEMs (they are created on their scheduling stacks, so - * they already know where their scheduling stacks are). A WORKER_USER can - * jump to this stack when it returns to a stolen parent and wants to begin - * stealing. + * File pointer for record or replay + * Does FILE * work on Windows? + * During record, the file will be opened in write-only mode. + * During replay, the file will be opened in read-only mode. * * [local read/write] */ - void *scheduler_stack; + FILE *record_replay_fptr; /** - * 0 if the user thread has not yet been imported. 1 if the user thread - * has been imported. \"Imported\" means the user thread has returned to a - * stolen parent and a scheduling stack or fiber has been created for it. - * Ignored for system workers. + * Root of array of replay entries - NULL if we're not replaying a log * * [local read/write] */ - int user_thread_imported; + replay_entry_t *replay_list_root; /** - * 1 if work was stolen from another worker. When true, this will flag - * setup_for_execution_pedigree to increment the pedigree when we resume - * execution to match the increment that would have been done on a return - * from a spawn helper. + * Current replay entry - NULL if we're not replaying a log * * [local read/write] */ - int work_stolen; - + replay_entry_t *replay_list_entry; + /** * Separate the signal_node from other things in the local_state by the * sizeof a cache line for performance reasons. @@ -383,7 +402,7 @@ typedef struct local_state * [shared read-only] */ ls_magic_t worker_magic_1; -} local_state; +}; /** * Perform cleanup according to the function set before the longjmp(). diff --git a/libcilkrts/runtime/metacall_impl.c b/libcilkrts/runtime/metacall_impl.c index ae311dc103f..65a2aa02890 100644 --- a/libcilkrts/runtime/metacall_impl.c +++ b/libcilkrts/runtime/metacall_impl.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "metacall_impl.h" @@ -32,12 +37,12 @@ NOINLINE CILK_API_VOID __cilkrts_metacall(unsigned int tool, unsigned int code, void *data) { +#ifdef ENABLE_NOTIFY_ZC_INTRINSIC // The metacall type, code and data are packed together into a single // struct which will be interpreted by the tool. This function is the // one and only use of a "cilkscreen_metacall" annotation metacall_data_t d = { tool, code, data }; -#ifdef ENABLE_NOTIFY_ZC_INTRINSIC // Note that Inspector uses probe mode, and is implementing the metacall // interface to force the runtime to run with a single worker. So // __cilkrts_metacall must use __notify_intrinsic instead of diff --git a/libcilkrts/runtime/metacall_impl.h b/libcilkrts/runtime/metacall_impl.h index d68645fe218..07917fddb52 100644 --- a/libcilkrts/runtime/metacall_impl.h +++ b/libcilkrts/runtime/metacall_impl.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2010-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2010-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ @@ -91,8 +96,8 @@ int __cilkrts_running_under_sequential_ptool(void); /** * Notify Cilkscreen of the extent of the stack. * - * @param in begin Start (low address) of stack - * @param in end One past high address of stack + * @param[in] begin Start (low address) of stack + * @param[in] end One past high address of stack */ void __cilkrts_cilkscreen_establish_c_stack(char *begin, char *end); diff --git a/libcilkrts/runtime/os-unix.c b/libcilkrts/runtime/os-unix.c index 3fa50c88d89..9a8543a16ef 100644 --- a/libcilkrts/runtime/os-unix.c +++ b/libcilkrts/runtime/os-unix.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #ifdef __linux__ @@ -45,8 +50,13 @@ # include <sys/sysctl.h> // Uses sysconf(_SC_NPROCESSORS_ONLN) in verbose output #elif defined __FreeBSD__ +// No additional include files #elif defined __CYGWIN__ // Cygwin on Windows - no additional include files +#elif defined __VXWORKS__ +# include <vxWorks.h> +# include <vxCpuLib.h> +# include <taskLib.h> #else # error "Unsupported OS" #endif @@ -74,7 +84,12 @@ #if !defined CILK_WORKER_TLS static int cilk_keys_defined; -static pthread_key_t worker_key, reducer_key, tbb_interop_key, pedigree_leaf_key; +static pthread_key_t worker_key, pedigree_leaf_key, tbb_interop_key; + +#if SUPPORT_GET_CURRENT_FIBER > 0 +static pthread_key_t fiber_key; +#endif + static void *serial_worker; @@ -88,8 +103,7 @@ static void __cilkrts_pedigree_leaf_destructor(void* pedigree_tls_ptr) // Assert that we have either one or two nodes // left in the pedigree chain. // If we have more, then something is going wrong... - CILK_ASSERT((!pedigree_tls->parent) || - (pedigree_tls->parent && (!pedigree_tls->parent->parent))); + CILK_ASSERT(!pedigree_tls->parent || !pedigree_tls->parent->parent); __cilkrts_free(pedigree_tls); } } @@ -102,19 +116,29 @@ void __cilkrts_init_tls_variables(void) on cilk_keys_defined. */ if (cilk_keys_defined) return; - status = pthread_key_create(&worker_key, 0); - CILK_ASSERT (status == 0); - status = pthread_key_create(&reducer_key, 0); - CILK_ASSERT (status == 0); - status = pthread_key_create(&tbb_interop_key, 0); + status = pthread_key_create(&worker_key, NULL); CILK_ASSERT (status == 0); status = pthread_key_create(&pedigree_leaf_key, __cilkrts_pedigree_leaf_destructor); CILK_ASSERT (status == 0); + status = pthread_key_create(&tbb_interop_key, NULL); + CILK_ASSERT (status == 0); + +#if SUPPORT_GET_CURRENT_FIBER > 0 + status = pthread_key_create(&fiber_key, NULL); + CILK_ASSERT (status == 0); +#endif cilk_keys_defined = 1; return; } +COMMON_SYSDEP +void* cilkos_get_current_thread_id(void) +{ + return (void*)pthread_self(); +} + + CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker() { if (__builtin_expect(cilk_keys_defined, 1)) @@ -129,14 +153,6 @@ CILK_ABI_WORKER_PTR __cilkrts_get_tls_worker_fast() return (__cilkrts_worker *)pthread_getspecific(worker_key); } -COMMON_SYSDEP struct cilkred_map *__cilkrts_get_tls_reducer(void) -{ - if (__builtin_expect(cilk_keys_defined, 1)) - return (struct cilkred_map *)pthread_getspecific(reducer_key); - else - return 0; -} - COMMON_SYSDEP __cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void) { @@ -188,6 +204,17 @@ __cilkrts_pedigree *__cilkrts_get_tls_pedigree_leaf(int create_new) return pedigree_tls; } +#if SUPPORT_GET_CURRENT_FIBER > 0 +COMMON_SYSDEP +cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void) +{ + if (__builtin_expect(cilk_keys_defined, 1)) + return (cilk_fiber_sysdep *)pthread_getspecific(fiber_key); + else + return NULL; +} +#endif + COMMON_SYSDEP void __cilkrts_set_tls_worker(__cilkrts_worker *w) { @@ -203,11 +230,12 @@ void __cilkrts_set_tls_worker(__cilkrts_worker *w) } } -COMMON_SYSDEP void __cilkrts_set_tls_reducer(struct cilkred_map *r) +COMMON_SYSDEP +void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t) { if (__builtin_expect(cilk_keys_defined, 1)) { int status; - status = pthread_setspecific(reducer_key, r); + status = pthread_setspecific(tbb_interop_key, t); CILK_ASSERT (status == 0); return; } @@ -215,29 +243,30 @@ COMMON_SYSDEP void __cilkrts_set_tls_reducer(struct cilkred_map *r) } COMMON_SYSDEP -void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t) +void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf) { if (__builtin_expect(cilk_keys_defined, 1)) { int status; - status = pthread_setspecific(tbb_interop_key, t); + status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf); CILK_ASSERT (status == 0); return; } abort(); } - +#if SUPPORT_GET_CURRENT_FIBER > 0 COMMON_SYSDEP -void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf) +void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber) { if (__builtin_expect(cilk_keys_defined, 1)) { int status; - status = pthread_setspecific(pedigree_leaf_key, pedigree_leaf); + status = pthread_setspecific(fiber_key, fiber); CILK_ASSERT (status == 0); return; } abort(); } +#endif #else void __cilkrts_init_tls_variables(void) @@ -245,7 +274,7 @@ void __cilkrts_init_tls_variables(void) } #endif -#if defined __linux__ +#if defined (__linux__) && ! defined(ANDROID) /* * Get the thread id, rather than the pid. In the case of MIC offload, it's * possible that we have multiple threads entering Cilk, and each has a @@ -312,7 +341,14 @@ static int linux_get_affinity_count (int tid) COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void) { -#if defined __linux__ +#if defined ANDROID + return sysconf (_SC_NPROCESSORS_ONLN); +#elif defined __MIC__ + /// HACK: Usually, the 3rd and 4th hyperthreads are not beneficial + /// on KNC. Also, ignore the last core. + int P = sysconf (_SC_NPROCESSORS_ONLN); + return P/2 - 2; +#elif defined __linux__ int affinity_count = linux_get_affinity_count(linux_gettid()); return (0 != affinity_count) ? affinity_count : sysconf (_SC_NPROCESSORS_ONLN); @@ -331,6 +367,8 @@ COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void) return ncores; // Just get the number of processors // return sysconf(_SC_NPROCESSORS_ONLN); +#elif defined __VXWORKS__ + return __builtin_popcount( vxCpuEnabledGet() ); #else #error "Unknown architecture" #endif @@ -360,7 +398,7 @@ COMMON_SYSDEP void __cilkrts_short_pause(void) #elif defined __i386__ || defined __x86_64 __asm__("pause"); #else -# warning __cilkrts_short_pause undefined +# warning __cilkrts_short_pause empty #endif } @@ -377,12 +415,16 @@ COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x) COMMON_SYSDEP void __cilkrts_sleep(void) { +#ifdef __VXWORKS__ + taskDelay(1); +#else usleep(1); +#endif } COMMON_SYSDEP void __cilkrts_yield(void) { -#if __APPLE__ || __FreeBSD__ +#if __APPLE__ || __FreeBSD__ || __VXWORKS__ // On MacOS, call sched_yield to yield quantum. I'm not sure why we // don't do this on Linux also. sched_yield(); @@ -393,6 +435,10 @@ COMMON_SYSDEP void __cilkrts_yield(void) // giving up the processor and latency starting up when work becomes // available _mm_delay_32(1024); +#elif defined(ANDROID) + // On Android, call sched_yield to yield quantum. I'm not sure why we + // don't do this on Linux also. + sched_yield(); #else // On Linux, call pthread_yield (which in turn will call sched_yield) // to yield quantum. @@ -488,4 +534,9 @@ size_t cilkos_validate_stack_size(size_t specified_stack_size) { return specified_stack_size; } +long cilkos_atomic_add(volatile long* p, long x) +{ + return __sync_add_and_fetch(p, x); +} + /* End os-unix.c */ diff --git a/libcilkrts/runtime/os.h b/libcilkrts/runtime/os.h index 192b21a3e5e..9630de02b02 100644 --- a/libcilkrts/runtime/os.h +++ b/libcilkrts/runtime/os.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -37,7 +42,8 @@ #define INCLUDED_OS_DOT_H #include "rts-common.h" -#include <cilk/common.h> +#include "cilk/common.h" +#include "cilk-tbb-interop.h" #ifdef __cplusplus # include <cstddef> @@ -45,21 +51,8 @@ # include <stddef.h> #endif -// #ifndef _WIN32 -// # include <pthread.h> // For pthread_key_t -// #endif - -// Forward declarations -typedef struct __cilk_tbb_stack_op_thunk __cilk_tbb_stack_op_thunk; - __CILKRTS_BEGIN_EXTERN_C -#ifdef _WIN32 -typedef unsigned cilkos_thread_id_t; -#else -typedef void* cilkos_thread_id_t; -#endif - // /* Thread-local storage */ // #ifdef _WIN32 @@ -74,27 +67,28 @@ typedef void* cilkos_thread_id_t; /* The RTS assumes that some thread-local state exists that stores the worker and reducer map currently associated with a thread. These routines manipulate this state. */ -typedef struct __cilkrts_worker __cilkrts_worker; -typedef struct cilkred_map cilkred_map; -typedef struct __cilkrts_pedigree __cilkrts_pedigree; +/** @brief Thread-local state for cilk fibers. */ +typedef struct cilk_fiber_sysdep cilk_fiber_sysdep; + +/** @brief Initialize all TLS variables for Cilk. */ COMMON_SYSDEP void __cilkrts_init_tls_variables(void); +/** @brief Set worker struct in TLS. */ COMMON_SYSDEP void __cilkrts_set_tls_worker(__cilkrts_worker *w) cilk_nothrow; -/* Likewise for reducer maps */ -COMMON_SYSDEP cilkred_map *__cilkrts_get_tls_reducer(void) cilk_nothrow; - -COMMON_SYSDEP void __cilkrts_set_tls_reducer(cilkred_map *) cilk_nothrow; - -/* Ditto for TBB-interop structures. */ +/** @brief Get stack_op for TBB-interop structures from TLS. */ COMMON_SYSDEP __cilk_tbb_stack_op_thunk *__cilkrts_get_tls_tbb_interop(void); + +/** @brief Set stack_op for TBB-interop structures in TLS. */ COMMON_SYSDEP void __cilkrts_set_tls_tbb_interop(__cilk_tbb_stack_op_thunk *t); /** + * @brief Get the pointer to the pedigree leaf node from TLS. + * * Function to get a pointer to the thread's pedigree leaf node. This * pointer can be NULL. */ @@ -102,24 +96,53 @@ COMMON_SYSDEP __cilkrts_pedigree * __cilkrts_get_tls_pedigree_leaf(int create_new); /** - * Set the pointer to the pedigree leaf node. + * @brief Sets the pointer to the pedigree leaf node in TLS. * * If the previous pointer value was not NULL, it is the caller's * responsibility to ensure that previous pointer value is saved and * freed. + * + * @param pedigree_leaf The leaf node to store into TLS. */ COMMON_SYSDEP void __cilkrts_set_tls_pedigree_leaf(__cilkrts_pedigree* pedigree_leaf); -/* Return number of CPUs supported by this hardware, using whatever definition + +#if SUPPORT_GET_CURRENT_FIBER > 0 +/** + * @brief Get the cilk_fiber from TLS. + */ +COMMON_SYSDEP +cilk_fiber_sysdep* cilkos_get_tls_cilk_fiber(void); + +/** + * @brief Set the cilk_fiber in TLS. + * + * @param fiber The fiber to store into TLS. + */ +COMMON_SYSDEP +void cilkos_set_tls_cilk_fiber(cilk_fiber_sysdep* fiber); +#endif + +/** + * @brief Function for returning the current thread id. + * @warning This function is useful for debugging purposes only. + */ +COMMON_SYSDEP +void* cilkos_get_current_thread_id(void); + +/** @brief Return number of CPUs supported by this hardware, using whatever definition of CPU is considered appropriate. */ COMMON_SYSDEP int __cilkrts_hardware_cpu_count(void); -/* timer support */ +/** @brief Get current value of timer */ COMMON_SYSDEP unsigned long long __cilkrts_getticks(void); /* Machine instructions */ + +/// Stall execution for a few cycles. COMMON_SYSDEP void __cilkrts_short_pause(void); +/// Wrapper for xchg instruction COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x); /* gcc before 4.4 does not implement __sync_synchronize properly */ @@ -153,51 +176,67 @@ COMMON_SYSDEP int __cilkrts_xchg(volatile int *ptr, int x); // # pragma intrinsic(_ReadWriteBarrier) // # define __cilkrts_fence() _ReadWriteBarrier() #else -COMMON_SYSDEP void __cilkrts_fence(void); +COMMON_SYSDEP void __cilkrts_fence(void); ///< MFENCE instruction #endif -COMMON_SYSDEP void __cilkrts_sleep(void); /* Sleep briefly */ -COMMON_SYSDEP void __cilkrts_yield(void); /* Yield quantum */ +COMMON_SYSDEP void __cilkrts_sleep(void); ///< Sleep briefly +COMMON_SYSDEP void __cilkrts_yield(void); ///< Yield quantum -/* - * Gets environment variable 'varname' and copy its value into 'value'. +/** + * @brief Gets environment variable 'varname' and copy its value into 'value'. + * * If the entire value, including the null terminator fits into 'vallen' * bytes, then returns the length of the value excluding the null. Otherwise, * leaves the contents of 'value' undefined and returns the number of * characters needed to store the environment variable's value, *including* * the null terminator. + * + * @param value Buffer to store value. + * @param vallen Length of value buffer + * @param varname Name of the environment variable. + * @return Length of value buffer (excluding the null). */ COMMON_SYSDEP __STDNS size_t cilkos_getenv(char* value, __STDNS size_t vallen, const char* varname); -/* - * Unrecoverable error: Print an error message and abort execution. +/** + * @brief Unrecoverable error: Print an error message and abort execution. */ COMMON_SYSDEP void cilkos_error(const char *fmt, ...); -/* - * Print a warning message and return. +/** + * @brief Print a warning message and return. */ COMMON_SYSDEP void cilkos_warning(const char *fmt, ...); -/* - * Convert the user's specified stack size into a "reasonable" value - * for the current OS. +/** + * @brief Convert the user's specified stack size into a "reasonable" + * value for the current OS. + * + * @param specified_stack_size User-specified stack size. + * @return New stack size value, modified for the OS. */ COMMON_SYSDEP size_t cilkos_validate_stack_size(size_t specified_stack_size); -#ifdef _WIN32 -/* - * Windows-only low-level functions for processor groups. +/** + * @brief Atomic addition: computes *p += x. + * + * @param p Pointer to value to update + * @param x Value of x. */ +COMMON_SYSDEP long cilkos_atomic_add(volatile long* p, long x); +#ifdef _WIN32 + +/** + * @brief Windows-only low-level functions for processor groups. + */ typedef struct _GROUP_AFFINITY GROUP_AFFINITY; -/* - * init_processor_group_function_ptrs - * - * Probe the executing OS to see if it supports processor groups. These - * functions are expected to be available in Windows 7 or later. +/** + * @brief Probe the executing OS to see if it supports processor + * groups. These functions are expected to be available in Windows 7 + * or later. */ void win_init_processor_groups(void); @@ -208,8 +247,7 @@ int win_set_thread_group_affinity(/*HANDLE*/ void* hThread, GROUP_AFFINITY* PreviousGroupAffinity); /** - * This method should be called to clean up any state it allocated in - * TLS. + * @brief Cleans up any state allocated in TLS. * * Only defined for Windows because Linux calls destructors for each * thread-local variable. diff --git a/libcilkrts/runtime/os_mutex-unix.c b/libcilkrts/runtime/os_mutex-unix.c index fce65c981ea..fe99cffc70a 100644 --- a/libcilkrts/runtime/os_mutex-unix.c +++ b/libcilkrts/runtime/os_mutex-unix.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "os_mutex.h" @@ -96,7 +101,6 @@ struct os_mutex *__cilkrts_os_mutex_create(void) void __cilkrts_os_mutex_lock(struct os_mutex *p) { int status; - status = pthread_mutex_lock (&p->mutex); ITT_SYNC_ACQUIRED(p); if (__builtin_expect(status, 0) == 0) @@ -109,20 +113,16 @@ void __cilkrts_os_mutex_lock(struct os_mutex *p) status, p); } -#if 0 int __cilkrts_os_mutex_trylock(struct os_mutex *p) { int status; - status = pthread_mutex_trylock (&p->mutex); return (status == 0); } -#endif void __cilkrts_os_mutex_unlock(struct os_mutex *p) { int status; - ITT_SYNC_RELEASING(p); status = pthread_mutex_unlock (&p->mutex); CILK_ASSERT(status == 0); diff --git a/libcilkrts/runtime/os_mutex.h b/libcilkrts/runtime/os_mutex.h index 154fcd0b9cf..80f0ebc5725 100644 --- a/libcilkrts/runtime/os_mutex.h +++ b/libcilkrts/runtime/os_mutex.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -42,7 +47,9 @@ __CILKRTS_BEGIN_EXTERN_C +/// Opaque type typedef struct os_mutex os_mutex; + /** * Allocate and initialize an os_mutex * @@ -57,7 +64,14 @@ COMMON_SYSDEP os_mutex* __cilkrts_os_mutex_create(void); */ COMMON_SYSDEP void __cilkrts_os_mutex_lock(os_mutex *m); -/*COMMON_SYSDEP int __cilkrts_os_mutex_trylock(os_mutex *m);*/ +/** + * Try to acquire the os_mutex. + * + * @param m The os_mutex to try to acquire + * @return 0 if the lock acquire failed + * @return nonzero if the lock was acquired + */ +COMMON_SYSDEP int __cilkrts_os_mutex_trylock(os_mutex *m); /** * Release the os_mutex diff --git a/libcilkrts/runtime/pedigrees.c b/libcilkrts/runtime/pedigrees.c index 4a66b4e9327..5d00f9aade3 100644 --- a/libcilkrts/runtime/pedigrees.c +++ b/libcilkrts/runtime/pedigrees.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2007-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2007-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ diff --git a/libcilkrts/runtime/pedigrees.h b/libcilkrts/runtime/pedigrees.h index 8b12e650145..a38d2a97a6d 100644 --- a/libcilkrts/runtime/pedigrees.h +++ b/libcilkrts/runtime/pedigrees.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #ifndef INCLUDED_PEDIGREES_DOT_H diff --git a/libcilkrts/runtime/record-replay.cpp b/libcilkrts/runtime/record-replay.cpp new file mode 100644 index 00000000000..1c0ada6d13e --- /dev/null +++ b/libcilkrts/runtime/record-replay.cpp @@ -0,0 +1,765 @@ +/* record-replay.cpp -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + * + **************************************************************************/ + +/* + * Implementation of the record/replay functionality for Cilk Plus + */ + +#include <cstring> +#include <vector> +#include <stdlib.h> + +// clang is really strict about printf formats, so use the annoying integer +// printf macros. Unfortunately they're not avaiable on Windows +#ifdef _WIN32 +#define PRIu64 "llu" +#else +#define __STDC_FORMAT_MACROS 1 +#include <inttypes.h> +#endif + +#include "record-replay.h" +#include "bug.h" +#include "internal/abi.h" +#include "local_state.h" +#include "full_frame.h" +#include "global_state.h" +#include "cilk_malloc.h" +#include "os.h" // for cilkos_error() + +#if RECORD_ON_REPLAY +#pragma message ("*** Record on Replay is enabled!") +#endif + +// Defined to write sequence number to the logs. Note that you cannot +// diff logs with sequence numbers because the numbers may increment in +// different orders. +//#define INCLUDE_SEQUENCE_NUMBER 1 + +const int PED_VERSION = 1; // Log recording version + +// Log types +enum ped_type_t +{ + ped_type_unknown, + ped_type_steal, + ped_type_sync, + ped_type_orphaned, + ped_type_last // Flags end of the list +}; + +// Log type strings +#define PED_TYPE_STR_STEAL "Steal" +#define PED_TYPE_STR_SYNC "Sync" +#define PED_TYPE_STR_WORKERS "Workers" +#define PED_TYPE_STR_ORPHANED "Orphaned" + +#define PED_TYPE_SIZE 16 // Buffer size for the type of pedigree. Must + // hold largest pedigree record type string. +#define PEDIGREE_BUFF_SIZE 512 // Buffer size for the string representation + // of a pedigree. + +/** + * Data we store for a replay log entry + */ +typedef struct replay_entry_t +{ + uint64_t *m_reverse_pedigree; /**< Reverse pedigree for replay log entry */ + ped_type_t m_type; /**< Type of replay log entry */ + int16_t m_pedigree_len; /**< Number of terms in reverse pedigree */ + int16_t m_value; /**< Victim for STEALs, 0 if matching steal found for ORPHANs */ + + /** + * Load data read from the log into the entry + */ + bool load(const char *type, const char *pedigee_str, int32_t value1, int32_t value2) + { + // Convert the type into an enum + if (0 == strcmp(type, PED_TYPE_STR_STEAL)) + { + m_type = ped_type_steal; + m_value = (int16_t)value1; // Victim + } + else + { + m_value = -1; // Victim not valid + if (0 == strcmp(type, PED_TYPE_STR_SYNC)) + m_type = ped_type_sync; + else if (0 == strcmp(type, PED_TYPE_STR_ORPHANED)) + m_type = ped_type_orphaned; + else + { + m_type = ped_type_unknown; + return false; + } + } + + // Parse the pedigree + m_pedigree_len = 0; + + const char *p = pedigee_str; + char *end; + + uint64_t temp_pedigree[PEDIGREE_BUFF_SIZE/2]; + + while(1) + { + temp_pedigree[m_pedigree_len++] = (uint64_t)strtol(p, &end, 10); + if ('\0' == *end) + break; + p = end + 1; + } + + // Allocate memory to hold the pedigree. + // Copy the pedigree in reverse order since that's the order we'll + // traverse it + m_reverse_pedigree = + (uint64_t *)__cilkrts_malloc(sizeof(int64_t) * m_pedigree_len); + for (int n = 0; n < m_pedigree_len; n++) + m_reverse_pedigree[n] = temp_pedigree[(m_pedigree_len - 1) - n]; + + return true; + } + + /** + * Match this entry against the data supplied. This includes walking the + * pedigree from the specified node. + */ + bool match (ped_type_t type, const __cilkrts_pedigree *node, int victim = -1) + { + int i = 0; + + // If the type isn't what they're seeking, we don't have a match + if (type != m_type) + return false; + + // If we're looking for a STEAL, then the victim must match + if ((type == ped_type_steal) && (victim != m_value)) + return false; + + // Compare the current pedigree against what was recorded + while ((NULL != node) && (i < m_pedigree_len)) + { + // If we've got a pedigree rank difference, then we don't have + // a match + if (node->rank != m_reverse_pedigree[i]) + return false; + node = node->parent; + i++; + } + + // Make sure we exhausted both the pedigree chain and the recorded + // pedigree + return ((NULL == node) && (i == m_pedigree_len)); + } + + /** + * Advance to the next entry, skipping any ORPHANED records we didn't see + * a matching STEAL for + */ + replay_entry_t *next_entry() + { + replay_entry_t *entry = this; + + // You can't go beyond the end + if (ped_type_last == entry->m_type) + return entry; + + // Advance to the next entry + entry++; + + // Skip any ORPHANED records that don't have a matching steal. We + // initialized the value field to -1 for ORPHANED. After loading all + // the log data, we iterated through all the STEAL records setting the + // matching ORPHANED record's value field to 0. So if an ORPHANED + // record's value field is still -1, it doesn't have a matching STEAL + // record, and I don't know why we chose not to return from the + // spawned function. + while ((ped_type_orphaned == entry->m_type) && (-1 == entry->m_value)) + { + entry++; + } + + return entry; + } + + /** + * Release any allocated resources + */ + void unload() + { + __cilkrts_free(m_reverse_pedigree); + m_reverse_pedigree = NULL; + } + +} replay_entry_t; + +__CILKRTS_BEGIN_EXTERN_C + +/** + * Walk the pedigree and generate a string representation with underscores + * between terms. Currently does a recursive walk to generate a forward + * pedigree. + * + * @param p The buffer that is to be filled. Assumed to be PEDIGREE_BUFF_SIZE + * characters long + * @param pnode The initial pedigree term to be written. + * + * @return A pointer into the pedigree string buffer after a term has been + * written. + */ +static +char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode) +{ + CILK_ASSERT(pnode); + if (pnode->parent) + { + p = walk_pedigree_nodes(p, pnode->parent); + p += sprintf(p, "_"); + } + + return p + sprintf(p, "%" PRIu64, pnode->rank); +} + +/** + * Write a record to a replay log file. + * + * @param w The worker we're writing the pedigree for. + * @param type The type of the pedigree record, as a string + * @param initial_node The initial pedigree node to be written, or NULL if + * there is no pedigree for this record type. + * @param i1 First integer value to be written to the record. + * @param i2 Second integer value to be written to the record. Only applies + * to STEAL records. Defaults to -1 (unused). The second value is always + * written to make parsing easier. + */ +static +void write_to_replay_log (__cilkrts_worker *w, const char *type, + const __cilkrts_pedigree *initial_node, + int i1 = -1, int i2 = -1) +{ + char pedigree[PEDIGREE_BUFF_SIZE]; + + // If we don't have an initial pedigree node, just use "0" to fill the slot + if (NULL == initial_node) + strcpy(pedigree, "0"); + else + walk_pedigree_nodes(pedigree, initial_node); + +#ifndef INCLUDE_SEQUENCE_NUMBER + // Simply write the record + fprintf(w->l->record_replay_fptr, "%s %s %d %d\n", + type, pedigree, i1, i2); +#else + // Write the record with a sequence number. The sequence number should + // always be the last term, and ignored on read + + static long volatile seq_num = 0; + long write_num; + + // Atomic increment functions are compiler/OS-specific +#ifdef _WIN32 + write_num = _InterlockedIncrement(&seq_num); +#else /* GCC */ + write_num = __sync_add_and_fetch(&seq_num, 1); +#endif // _WIN32 + + fprintf(w->l->record_replay_fptr, "%s %s %d %d %ld\n", + type, pedigree, i1, i2, write_num); +#endif // INCLUDE_SEQUENCE_NUMBER + + fflush(w->l->record_replay_fptr); +} + +/** + * Record data for a successful steal. + * + * The pedigree for a STEAL record is the pedigree of the stolen frame. + * + * @note It's assumed that replay_record_steal() has already checked that we're + * recording a log and that the record/replay functionality has not been + * compiled out. + * + * @param w The worker stealing a frame. + * @param victim_id The ID of the worker which had it's frame stolen. + */ +void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id) +{ + // Follow the pedigree chain using worker's stack frame + CILK_ASSERT(w->l->next_frame_ff); + CILK_ASSERT(w->l->next_frame_ff->call_stack); + + // Record steal: STEAL pedigree victim_id thief_id + write_to_replay_log (w, PED_TYPE_STR_STEAL, + &(w->l->next_frame_ff->call_stack->parent_pedigree), + victim_id); +} + +/** + * Record data for the worker that continues from a sync + * + * The pedigree for a SYNC record is the pedigree at the sync. + * + * @note It's assumed that replay_record_sync() has already checked that we're + * recording a log and that the record/replay functionality has not been + * compiled out. + * + * @param w The worker continuing from a sync. + */ +void replay_record_sync_internal(__cilkrts_worker *w) +{ + // Record sync: SYNC pedigree last_worker_id + write_to_replay_log (w, PED_TYPE_STR_SYNC, &w->pedigree); +} + +/** + * Record the pedigree of an attempt to return to a stolen parent + * + * The pedigree for an ORPHANED record is the pedigree of our parent + * + * @note It's assumed that replay_record_orphaned() has already checked that + * we're recording a log and that the record/replay functionality has not + * been compiled out. + * + * @param w The worker continuing noting that it has been orphaned. + */ +void replay_record_orphaned_internal(__cilkrts_worker *w) +{ + // Record steal: ORPHANED pedigree self + write_to_replay_log (w, PED_TYPE_STR_ORPHANED, w->pedigree.parent); +} + +/** + * Attempt to match a SYNC record. We have a match when this worker was + * recorded returning from the current call to __cilkrts_sync() with the + * same pedigree and this was the worker that continued from the sync, since + * it was the last to sync. + * + * If we find a match, the caller is expected to stall it is the last worker + * to reach a sync so it will be the worker to continue from the sync. + * + * @note It's assumed that replay_match_sync_pedigree() has already returned + * if we're not replaying a log, or if record/replay functionality has + * been compiled out. + * + * @param w The worker we're checking to see if we've got a match + */ +int replay_match_sync_pedigree_internal(__cilkrts_worker *w) +{ + // Return true if we have a match + if (w->l->replay_list_entry->match(ped_type_sync, &w->pedigree)) + return 1; + else + return 0; +} + +/** + * Advance to the next log entry from a SYNC record. Consume the current + * SYNC record on this worker and advance to the next one. + * + * @note It's assumed that replay_advance_from_sync() has already returned if + * we're not replaying a log, or if record/replay functionality has been + * compiled out. + * + * @param w The worker whose replay log we're advancing. + */ +void replay_advance_from_sync_internal (__cilkrts_worker *w) +{ + // The current replay entry must be a SYNC + CILK_ASSERT(ped_type_sync == w->l->replay_list_entry->m_type); + + // Advance to the next entry + w->l->replay_list_entry = w->l->replay_list_entry->next_entry(); +} + +/** + * Called from random_steal() to override the ID of the randomly chosen victim + * worker which this worker will attempt to steal from. Returns the worker id + * of the next victim this worker was recorded stealing from, or -1 if the + * next record in the log is not a STEAL. + * + * @note This call does NOT attempt to match the pedigree. That will be done + * by replay_match_victim_pedigree() after random_steal() has locked the victim + * worker. + * + * @param w The __cilkrts_worker we're executing on. The worker's replay log + * is checked for a STEAL record. If we've got one, the stolen worker ID is + * returned. + * + * @return -1 if the next record is not a STEAL + * @return recorded stolen worker ID if we've got a matching STEAL record + */ +int replay_get_next_recorded_victim_internal(__cilkrts_worker *w) +{ + // If the next record isn't a STEAL, abort the attempt to steal work + if (ped_type_steal != w->l->replay_list_entry->m_type) + return -1; + + // Return the victim's worker ID from the STEAL record. We'll check + // the pedigree after random_steal has locked the victim worker. + return w->l->replay_list_entry->m_value; +} + +/** + * Called from random_steal() to determine if we have a STEAL record that + * matches the pedigree at the head of the victim worker. If we do have a + * match, the STEAL record is consumed. + * + * @note It's assumed that replay_match_victim_pedigree() has already returned if + * we're not replaying a log, or if record/replay functionality has been + * compiled out. + * + * @return 1 if we have a match + * @return 0 if the current replay record isn't a STEAL record, or the victim + * isn't correct, or the pedigree doesn't match. + */ +int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim) +{ + // If we don't have a match, return 0 + if (! w->l->replay_list_entry->match(ped_type_steal, + &((*victim->head)->parent_pedigree), + victim->self)) + return 0; + + // Consume this entry + w->l->replay_list_entry = w->l->replay_list_entry->next_entry(); + + // Return success + return 1; +} + +/** + * If the frame we're about to return to was recorded as being stolen, + * stall until it is. + * + * @note It's assumed that replay_wait_for_steal_if_parent_was_stolen() has + * already returned if we're not replaying a log, or if record/replay + * functionality has been compiled out. + * + * @param w The worker we're executing on. + */ +void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w) +{ + // If our parent wasn't recorded orphanen, return now + if (! w->l->replay_list_entry->match (ped_type_orphaned, + w->pedigree.parent)) + return; + + // Stall until our parent is stolen. Note that we're comparing head + // and tail, not head and exc. The steal is not completed until tail + // is modified. + while (!((w->tail - 1) < w->head)) + __cilkrts_sleep(); + + // Consume the entry + w->l->replay_list_entry = w->l->replay_list_entry->next_entry(); +} + +/** + * Allocate memory for the list of logged events. + * + * This function will read through the file and count the number of records + * so it can estimate how big a buffer to allocate for the array or replay + * entries. It will then rewind the file to the beginning so it can be + * loaded into memory. + * + * @param w The worker we're loading the file for. + * @param f The file of replay data we're scanning. + */ +static +void allocate_replay_list(__cilkrts_worker *w, FILE *f) +{ + // Count the number of entries - yeah, it's a hack, but it lets me + // allocate the space all at once instead of in chunks + char buf[1024]; + int entries = 1; // Include "LAST" node + + while (! feof(f)) + { + if (fgets(buf, 1024, f)) + { + // Skip the Workers record - should only be in file for Worker 0 + if (0 != strncmp(PED_TYPE_STR_WORKERS, buf, sizeof(PED_TYPE_STR_WORKERS)-1)) + entries++; + } + } + + w->l->replay_list_root = + (replay_entry_t *)__cilkrts_malloc(entries * sizeof(replay_entry_t)); + w->l->replay_list_root[entries - 1].m_type = ped_type_last; + + // Reset the file to the beginning + rewind(f); +} + +/** + * Load the replay log for a worker into memory. + * + * @param w The worker we're loading the replay for. + */ +static +void load_recorded_log(__cilkrts_worker *w) +{ + char ped_type[PED_TYPE_SIZE]; + char ped_str[PEDIGREE_BUFF_SIZE]; + int32_t i1 = -1, i2 = -1; + int fret; + char local_replay_file_name[512]; + FILE *f; + + // Open the log for reading + sprintf(local_replay_file_name, "%s%d.cilklog", w->g->record_replay_file_name, w->self); + f = fopen(local_replay_file_name, "r"); + + // Make sure we found a log! + CILK_ASSERT (NULL != f); + + // Initialize the replay_list + allocate_replay_list(w, f); + replay_entry_t *entry = w->l->replay_list_root; + + // Read the data out and add it to our tables + while (! feof(f)) + { +#ifndef INCLUDE_SEQUENCE_NUMBER + fret = fscanf(f, "%s %s %d %d\n", ped_type, ped_str, &i1, &i2); + if(EOF == fret) + break; + + // We must have read 4 fields + CILK_ASSERT(4 == fret); +#else + int32_t write_num; + fret = fscanf(f, "%s %s %d %d %d\n", ped_type, ped_str, + &i1, &i2, &write_num); + if(EOF == fret) + break; + + // We must have read 5 fields + CILK_ASSERT(5 == fret); +#endif // INCLUDE_SEQUENCE_NUMBER + + // Load the data into the entry + if (0 == strcmp(ped_type, PED_TYPE_STR_WORKERS)) + { + // Verify we're replaying with the same number of workers we recorded with + if (i1 != w->g->P) + { + // Fatal error - does not return + cilkos_error("Cannot continue replay: number of workers(%d) doesn't match " + "that from the recording(%d).\n", w->g->P, i1); + } + + // Verify that we understand this version of the pedigree file + if (PED_VERSION != i2) + { + // Fatal error - does not return + cilkos_error("Pedigree file version %d doesn't match current " + "version %d - cannot continue.\n", + i2, PED_VERSION); + } + } + else + { + entry->load(ped_type, ped_str, i1, i2); + entry++; + } + } + + // Make sure we've filled the allocated memory. We initialized the last + // entry in + CILK_ASSERT(ped_type_last == entry->m_type); + w->l->replay_list_entry = w->l->replay_list_root; + + // Close the log and return + fclose(f); +} + +/** + * Scan a recorded log to match STEALs againsted ORPHANED records. + * + * @param g Cilk Runtime global state. Passed to access the worker array so + * we can scan a worker's ORPHANED entries for one that matches a STEAL entry. + * @param entry The root of a replay_list for a worker. + */ +static +void scan_for_matching_steals(global_state_t *g, replay_entry_t *entry) +{ + // Iterate over all of the entries + while (ped_type_last != entry->m_type) + { + // Look for STEALs. That will tell us which worker the frame was + // stolen from + if (ped_type_steal == entry->m_type) + { + bool found = false; + + // Validate the worker ID and make sure we've got a list + CILK_ASSERT((entry->m_value >= 0) && (entry->m_value < g->total_workers)); + replay_entry_t *victim_entry = g->workers[entry->m_value]->l->replay_list_root; + CILK_ASSERT(NULL != victim_entry); + + // Scan the victim's list for the matching ORPHANED record + while ((ped_type_last != victim_entry->m_type) && ! found) + { + if (ped_type_orphaned == victim_entry->m_type) + { + if (entry->m_pedigree_len == victim_entry->m_pedigree_len) + { + if (0 == memcmp(entry->m_reverse_pedigree, + victim_entry->m_reverse_pedigree, + entry->m_pedigree_len * sizeof(int64_t))) + { + // Note that this ORPHANED record has a matching steal + victim_entry->m_value = 0; + found = true; + } + } + } + victim_entry++; + } + } + entry++; + } +} + + +/* + * Initialize per-worker data for record or replay - See record-replay.h + * for full routine header. + */ +void replay_init_workers(global_state_t *g) +{ + int i; + char worker_file_name[512]; + + // If we're not recording or replaying a log, we're done. All of the + // fields in the global_state_t or local_state_t are already initialized + // to default values. + if (RECORD_REPLAY_NONE == g->record_or_replay) + return; + + // If we're replaying a log, read each worker's log and construct the + // in-memory log + if (REPLAY_LOG == g->record_or_replay) + { + // Read all of the data + for (i = 0; i < g->total_workers; ++i) + { + // This function will also initialize and fill the worker's + // replay list + load_recorded_log(g->workers[i]); + } + + // Scan for orphans with no matching steal. Mark them so they'll be + // skipped as we advance through the log. + for (i = 0; i < g->total_workers; ++i) + { + scan_for_matching_steals(g, g->workers[i]->l->replay_list_root); + } + + // If we're recording the logs while replaying, create the log files. + // This will only be used for debugging. Create the logs in the + // current directory. It should be as good a place as any... +#if RECORD_ON_REPLAY + for(i = 0; i < g->total_workers; ++i) + { + __cilkrts_worker *w = g->workers[i]; + sprintf(worker_file_name, "replay_log_%d.cilklog", w->self); + w->l->record_replay_fptr = fopen(worker_file_name, "w+"); + CILK_ASSERT(NULL != w->l->record_replay_fptr); + } + + // Record the number of workers, file version in Worker 0's file + write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION); +#endif // RECORD_ON_REPLAY + } + + // If we're recording, create the log files + if (RECORD_LOG == g->record_or_replay) + { + for(i = 0; i < g->total_workers; ++i) + { + __cilkrts_worker *w = g->workers[i]; + sprintf(worker_file_name, "%s%d.cilklog", + g->record_replay_file_name, + w->self); + w->l->record_replay_fptr = fopen(worker_file_name, "w+"); + CILK_ASSERT(NULL != w->l->record_replay_fptr); + } + + // Record the number of workers, file version in Worker 0's file + write_to_replay_log (g->workers[0], PED_TYPE_STR_WORKERS, NULL, g->P, PED_VERSION); + } +} + +/* + * Do any necessary cleanup for the logs - See record-replay.h for full + * routine header. + */ +void replay_term(global_state_t *g) +{ + // Free memory for the record/replay log file name, if we've got one + if (g->record_replay_file_name) + __cilkrts_free(g->record_replay_file_name); + + // Per-worker cleanup + for(int i = 0; i < g->total_workers; ++i) + { + __cilkrts_worker *w = g->workers[i]; + + // Close the log files, if we've opened them + if(w->l->record_replay_fptr) + fclose(w->l->record_replay_fptr); + + if (w->l->replay_list_root) + { + // We should have consumed the entire list + CILK_ASSERT(ped_type_last == w->l->replay_list_entry->m_type); + + replay_entry_t *entry = w->l->replay_list_root; + while (ped_type_last != entry->m_type) + { + // Free the pedigree memory for each entry + entry->unload(); + entry++; + } + __cilkrts_free(w->l->replay_list_root); + w->l->replay_list_root = NULL; + w->l->replay_list_entry = NULL; + } + } +} + +__CILKRTS_END_EXTERN_C diff --git a/libcilkrts/runtime/record-replay.h b/libcilkrts/runtime/record-replay.h new file mode 100644 index 00000000000..f65e667a8e1 --- /dev/null +++ b/libcilkrts/runtime/record-replay.h @@ -0,0 +1,427 @@ +/* record_replay.h -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + * + **************************************************************************/ + +/** + * @file record-replay.h + * + * @brief record-replay.h and .cpp encapsulate most of the functionality to + * record and play back a Cilk Plus application. + * + * Recording is directed by the setting of the CILK_RECORD_LOG environment + * variable. If it's defined, the value specifies the root we'll use to + * generate files for each worker using the following format string: + * "%s%d.cilklog", where the integer is the value of w->self. + * + * Replay is directed by the setting of the CILK_REPLAY_LOG environment + * variable, interpreted the same way as CILK_RECORD_LOG. If both + * CILK_RECORD_LOG and CILK_REPLAY_LOG are defined, a warning will be given + * and the attempt to record a log will be ignored. + * + * Recording is relatively straightforward. We write all information about a + * worker to a per-worker file. + * + * Each pedigree record consists of the following fields. All fields must be + * present in every record to make parsing easy. + * - Type - A string identifying the pedigree record. See the PED_TYPE_STR_ + * macros for the currently defined values. + * - Pedigree - A string of pedigree values, with underscores between + * adjacent values. + * - i1 - Record type-specific value. -1 if not used. + * - i2 - Record type-specific value. -1 if not used. + * + * WORKERS record - only written to the file for worker 0. Note that this is + * the first worker in the workers array. Worker 0 is the first system worker, + * *NOT* a user worker. + * - Type: "Workers" + * - Pedigree: Always "0" - ignored + * - i1: Number of workers (g->P) when we recorded the log. A mismatch when + * we attempt to replay the log will result in aborting the execution. + * - i2: Log version number - Specified by PED_VERSION in record-replay.cpp + * + * STEAL record - written after a successful steal. + * - Type: "Steal" + * - Pedigree: Pedigree of stolen frame + * - i1: Worker the frame was stolen from + * - i2: -1 + * + * SYNC record - written after a worker continues from a sync. + * - Type: "Sync" + * - Pedigree: Pedigree of sync. Note that this is the pedigree *before* + * the pedigree in incremented in setup_for_execution_pedigree(). + * - i1: -1 + * - i2: -1 + * + * ORPHANED record - saved on a return to a stolen parent. + * - Type: "Orphaned" + * - Pedigree: Pedigree of the parent frame *before* the pedigree is + * incremented by the return + * - i1: -1 + * - i2: -1 + * + * On replay, the data is loaded into a per-worker array, and the data is + * consumed in order as needed. + */ + +#ifndef INCLUDED_RECORD_REPLAY_DOT_H +#define INCLUDED_RECORD_REPLAY_DOT_H + +#include "cilk/common.h" +#include "global_state.h" + +/** + * Define CILK_RECORD_REPLAY to enable record/replay functionality. If + * CILK_RECORD_REPLAY is not defined, all of the record/replay functions in + * record-replay.h will be stubbed out. Since they're declared as inline, + * functions, the resulting build should have no performance impact due to + * the implementation or record/replay. + */ + #define CILK_RECORD_REPLAY 1 + +/** + * Define RECORD_ON_REPLAY=1 to write logs when we're replaying a log. This + * should only be needed when debugging the replay functionality. This should + * always be defined as 0 when record-replay.h is checked in. + */ +#define RECORD_ON_REPLAY 0 + +__CILKRTS_BEGIN_EXTERN_C + +#ifdef CILK_RECORD_REPLAY +// Declarations of internal record/replay functions. The inlined versions +// further down do some preliminary testing (like if we're not recording or +// replaying) and will stub out the functionality if we've compiled out the +// record/replay feature +int replay_match_sync_pedigree_internal(__cilkrts_worker *w); +void replay_wait_for_steal_if_parent_was_stolen_internal(__cilkrts_worker *w); +void replay_record_steal_internal(__cilkrts_worker *w, int32_t victim_id); +void replay_record_sync_internal(__cilkrts_worker *w); +void replay_record_orphaned_internal(__cilkrts_worker *w); +int replay_match_victim_pedigree_internal(__cilkrts_worker *w, __cilkrts_worker *victim); +void replay_advance_from_sync_internal (__cilkrts_worker *w); +int replay_get_next_recorded_victim_internal(__cilkrts_worker *w); +#endif // CILK_RECORD_REPLAY + +// Publically defined record/replay API + +/** + * If we're replaying a log, wait for our parent to be stolen if it was when + * the log was recorded. If record/replay is compiled out, this is a noop. + * + * @param w The __cilkrts_worker we're executing on. The worker's replay + * list will be checked for a ORPHANED record with a matching pedigree. If + * there is a match, the ORPHANED record will be consumed. + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w) +{ + // Only check if we're replaying a log + if (REPLAY_LOG == w->g->record_or_replay) + replay_wait_for_steal_if_parent_was_stolen_internal(w); +} +#else +__CILKRTS_INLINE +void replay_wait_for_steal_if_parent_was_stolen(__cilkrts_worker *w) +{ + // If record/replay is disabled, we never wait +} +#endif // CILK_RECORD_REPLAY + +/** + * Called from random_steal() to override the ID of the randomly chosen victim + * worker which this worker will attempt to steal from. Returns the worker id + * of the next victim this worker was recorded stealing from, or -1 if the + * next record in the log is not a STEAL. + * + * @note This call does NOT attempt to match the pedigree. That will be done + * by replay_match_victim_pedigree() after random_steal() has locked the victim + * worker. + * + * @param w The __cilkrts_worker we're executing on. The worker's replay log + * is checked for a STEAL record. If we've got one, the stolen worker ID is + * returned. + * @param id The randomly chosen victim worker ID. If we're not replaying a + * log, or if record/replay has been compiled out, this is the value that + * will be returned. + * + * @return id if we're not replaying a log + * @return -1 if the next record is not a STEAL + * @return recorded stolen worker ID if we've got a matching STEAL record + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +int replay_get_next_recorded_victim(__cilkrts_worker *w, int id) +{ + // Only check if we're replaying a log + if (REPLAY_LOG == w->g->record_or_replay) + return replay_get_next_recorded_victim_internal(w); + else + return id; +} +#else +__CILKRTS_INLINE +int replay_get_next_recorded_victim(__cilkrts_worker *w, int id) +{ + // Record/replay is disabled. Always return the original worker id + return id; +} +#endif // CILK_RECORD_REPLAY + +/** + * Initialize per-worker data for record/replay. A noop if record/replay + * is disabled, or if we're not recording or replaying anything. + * + * If we're recording a log, this will ready us to create the per-worker + * logs. + * + * If we're replaying a log, this will read the logs into the per-worker + * structures. + * + * @param g Cilk runtime global state + */ +void replay_init_workers(global_state_t *g); + +/** + * Record a record on a successful steal. A noop if record/replay is + * diabled, or if we're not recording anything + * + * @param w The __cilkrts_worker we're executing on. The pedigree of + * the stolen frame will be walked to generate the STEAL record. + * + * @param victim_id The worker ID of the worker w stole from. + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +void replay_record_steal(__cilkrts_worker *w, int32_t victim_id) +{ +#if RECORD_ON_REPLAY + // If we're recording on replay, write the record if we're recording or + // replaying + if (RECORD_REPLAY_NONE == w->g->record_or_replay) + return; +#else + // Only write the record if we're recording + if (RECORD_LOG != w->g->record_or_replay) + return; +#endif + + replay_record_steal_internal(w, victim_id); +} +#else +__CILKRTS_INLINE +void replay_record_steal(__cilkrts_worker *w, int32_t victim_id) +{ +} +#endif // CILK_RECORD_REPLAY + +/** + * Record a record when continuing after a sync. A noop if record/replay is + * diabled, or if we're not recording anything, or if the sync was abandoned, + * meaning this isn't the worker that continues from the sync. + * + * @param w The __cilkrts_worker for we're executing on. The pedigree of + * the sync-ing frame will be walked to generate the SYNC record. + * + * @param continuing True if this worker will be continuing from the + * cilk_sync. A SYNC record will only be generated if continuing is true. + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +void replay_record_sync(__cilkrts_worker *w, int continuing) +{ + // If this was not the last worker to the syn, return + if (! continuing) + return; + +#if RECORD_ON_REPLAY + // If we're recording on replay, write the record if we're recording or + // replaying + if (RECORD_REPLAY_NONE == w->g->record_or_replay) + return; +#else + // Only write the record if we're recording + if (RECORD_LOG != w->g->record_or_replay) + return; +#endif + + replay_record_sync_internal(w); +} +#else +__CILKRTS_INLINE +void replay_record_sync(__cilkrts_worker *w, int abandoned) +{ +} +#endif // CILK_RECORD_REPLAY + +/** + * Record a record on a return to a stolen parent. A noop if record/replay is + * diabled, or if we're not recording anything. + * + * @param w The __cilkrts_worker for we're executing on. The pedigree of the + * frame that has discovered that its parent has been stolken will be walked + * to generate the ORPHANED record. + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +void replay_record_orphaned(__cilkrts_worker *w) +{ +#if RECORD_ON_REPLAY + // If we're recording on replay, write the record if we're recording or + // replaying + if (RECORD_REPLAY_NONE == w->g->record_or_replay) + return; +#else + // Only write the record if we're recording + if (RECORD_LOG != w->g->record_or_replay) + return; +#endif + + replay_record_orphaned_internal(w); +} +#else +__CILKRTS_INLINE +void replay_record_orphaned(__cilkrts_worker *w) +{ +} +#endif // CILK_RECORD_REPLAY + +/** + * Test whether the frame at the head of the victim matches the pedigree of + * the frame that was recorded being stolen. Called in random steal to verify + * that we're about to steal the correct frame. + * + * @param w The __cilkrts_worker for we're executing on. The current worker + * is needed to find the replay entry to be checked. + * + * @param victim The __cilkrts_worker for we're proposing to steal a frame + * from. The victim's head entry is + * is needed to find the replay entry to be checked. + * + * @return 0 if we're replaying a log and the victim's pedigree does NOT match + * the next frame the worker is expected to steal. + * + * @return 1 in all other cases to indicate that the steal attempt should + * continue + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim) +{ + // We're not replaying a log. The victim is always acceptable + if (REPLAY_LOG != w->g->record_or_replay) + return 1; + + // Return 1 if the victim's pedigree matches the frame the worker stole + // when we recorded the log + return replay_match_victim_pedigree_internal(w, victim); +} +#else +__CILKRTS_INLINE +int replay_match_victim_pedigree(__cilkrts_worker *w, __cilkrts_worker *victim) +{ + // Record/replay is disabled. The victim is always acceptable + return 1; +} +#endif // CILK_RECORD_REPLAY + +/** + * Test whether the current replay entry is a sync record matching the + * worker's pedigree. + * + * @param w The __cilkrts_worker for we're executing on. + * + * @return 1 if the current replay entry matches the current pedigree. + * @return 0 if there's no match, or if we're not replaying a log. + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +int replay_match_sync_pedigree(__cilkrts_worker *w) +{ + // If we're not replaying, assume no match + if (REPLAY_LOG != w->g->record_or_replay) + return 0; + + return replay_match_sync_pedigree_internal(w); +} +#else +__CILKRTS_INLINE +int replay_match_sync_pedigree(__cilkrts_worker *w) +{ + // Record/replay is disabled. Assume no match + return 0; +} +#endif + +/** + * Marks a sync record seen, advancing to the next record in the replay list. + * + * This function will only advance to the next record if: + * - Record/replay hasn't been compiled out AND + * - We're replaying a log AND + * - A match was found AND + * - The sync is not being abandoned + * + * @param w The __cilkrts_worker for we're executing on. + * @param match_found The value returned by replay_match_sync_pedigree(). If + * match_found is false, nothing is done. + * @param continuing Flag indicating whether this worker will continue from + * the sync (it's the last worker to the sync) or if it will abandon the work + * and go to the scheduling loop to look for more work it can steal. + */ +#ifdef CILK_RECORD_REPLAY +__CILKRTS_INLINE +void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing) +{ + // If we're replaying a log, and the current sync wasn't abandoned, and we + // found a match in the log, mark the sync record seen. + if ((REPLAY_LOG == w->g->record_or_replay) && match_found && continuing) + replay_advance_from_sync_internal(w); +} +#else +__CILKRTS_INLINE +void replay_advance_from_sync(__cilkrts_worker *w, int match_found, int continuing) +{ +} +#endif + +/** + * Release any resources used to read or write a replay log. + * + * @param g Cilk runtime global state + */ +void replay_term(global_state_t *g); + +__CILKRTS_END_EXTERN_C + +#endif // ! defined(INCLUDED_RECORD_REPLAY_DOT_H) diff --git a/libcilkrts/runtime/reducer_impl.cpp b/libcilkrts/runtime/reducer_impl.cpp index be749c5072c..ec5a1e4037c 100644 --- a/libcilkrts/runtime/reducer_impl.cpp +++ b/libcilkrts/runtime/reducer_impl.cpp @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * * Patents Pending, Intel Corporation. **************************************************************************/ @@ -41,6 +46,7 @@ #include "reducer_impl.h" #include "scheduler.h" #include "bug.h" +#include "os.h" #include "global_state.h" #include "frame_malloc.h" @@ -71,6 +77,11 @@ static inline void verify_current_wkr(__cilkrts_worker *w) #endif } +// Suppress clang warning that the expression result is unused +#if defined(__clang__) && (! defined(__INTEL_COMPILER)) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunused-value" +#endif // __clang__ /// Helper class to disable and re-enable Cilkscreen struct DisableCilkscreen @@ -86,13 +97,23 @@ struct EnableCilkscreen ~EnableCilkscreen () { __cilkscreen_disable_checking(); } }; -/** Element for a hyperobject */ +#if defined(__clang__) && (! defined(__INTEL_COMPILER)) +# pragma clang diagnostic pop +#endif // __clang__ + +/** + * @brief Element for a hyperobject + */ struct elem { - void *key; // Shared key for this hyperobject - __cilkrts_hyperobject_base *hb; // Base of the hyperobject. - void *val; // Strand-private view of this hyperobject - /// Destructor for an instance of this hyperobject + void *key; ///< Shared key for this hyperobject + __cilkrts_hyperobject_base *hb; ///< Base of the hyperobject. + void *view; ///< Strand-private view of this hyperobject + /// Destroy and deallocate the view object for this element and set view to + /// null. void destroy(); + + /// Returns true if this element contains a leftmost view. + bool is_leftmost() const; }; /** Bucket containing at most NMAX elements */ @@ -132,36 +153,40 @@ struct cilkred_map { /** Set true for leftmost reducer map */ bool is_leftmost; - /* Return element mapped to 'key' or null if not found. */ + /** @brief Return element mapped to 'key' or null if not found. */ elem *lookup(void *key); - /* Insert key/value element into hash map without rehashing. Does not - * check for duplicate key. */ + /** + * @brief Insert key/value element into hash map without rehashing. + * Does not check for duplicate key. + */ elem *insert_no_rehash(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, void *value); - /* Insert key/value element into hash map, rehashing if necessary. Does not - * check for duplicate key. */ + /** + * @brief Insert key/value element into hash map, rehashing if necessary. + * Does not check for duplicate key. + */ inline elem *rehash_and_insert(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, void *value); - /** Grow bucket by one element, reallocating bucket if necessary */ + /** @brief Grow bucket by one element, reallocating bucket if necessary */ static elem *grow(__cilkrts_worker *w, bucket **bp); - /** Rehash a worker's reducer map */ + /** @brief Rehash a worker's reducer map */ void rehash(__cilkrts_worker *); /** - * Returns true if a rehash is needed due to the number of elements that + * @brief Returns true if a rehash is needed due to the number of elements that * have been inserted. */ inline bool need_rehash_p() const; - /** Allocate and initialize the buckets */ + /** @brief Allocate and initialize the buckets */ void make_buckets(__cilkrts_worker *w, size_t nbuckets); /** @@ -176,17 +201,17 @@ struct cilkred_map { }; /** - * Merge another reducer map into this one, destroying the other map in + * @brief Merge another reducer map into this one, destroying the other map in * the process. */ __cilkrts_worker* merge(__cilkrts_worker *current_wkr, cilkred_map *other_map, enum merge_kind kind); - /** check consistency of a reducer map */ - void check(bool allow_null_val); + /** @brief check consistency of a reducer map */ + void check(bool allow_null_view); - /** Test whether the cilkred_map is empty */ + /** @brief Test whether the cilkred_map is empty */ bool is_empty() { return nelem == 0; } }; @@ -313,24 +338,25 @@ static inline size_t hashfun(const cilkred_map *h, void *key) return k & (h->nbuckets - 1); } -// Given a __cilkrts_hyperobject_base, return a pointer to the leftmost view -// object. -static inline void* get_leftmost_view(__cilkrts_hyperobject_base *hb) +// Given a __cilkrts_hyperobject_base, return the key to that hyperobject in +// the reducer map. +static inline void* get_hyperobject_key(__cilkrts_hyperobject_base *hb) { + // The current implementation uses the address of the lefmost view as the + // key. return reinterpret_cast<char*>(hb) + hb->__view_offset; } // Given a hyperobject key, return a pointer to the leftmost object. In the // current implementation, the address of the leftmost object IS the key, so -// this function is an effective noop. The key is passed by reference so that -// conversion of arbitrary pointers to 'void*' are supressed. -static inline void* get_leftmost_view(void *&key) +// this function is an effective noop. +static inline void* get_leftmost_view(void *key) { return key; } /* debugging support: check consistency of a reducer map */ -void cilkred_map::check(bool allow_null_val) +void cilkred_map::check(bool allow_null_view) { size_t count = 0; @@ -339,7 +365,7 @@ void cilkred_map::check(bool allow_null_val) bucket *b = buckets[i]; if (b) for (elem *el = b->el; el->key; ++el) { - CILK_ASSERT(allow_null_val || el->val); + CILK_ASSERT(allow_null_view || el->view); ++count; } } @@ -391,7 +417,7 @@ elem *cilkred_map::grow(__cilkrts_worker *w, elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, - void *val) + void *view) { #if REDPAR_DEBUG >= 2 @@ -402,18 +428,18 @@ elem *cilkred_map::insert_no_rehash(__cilkrts_worker *w, CILK_ASSERT((w == 0 && g == 0) || w->g == g); CILK_ASSERT(key != 0); - CILK_ASSERT(val != 0); + CILK_ASSERT(view != 0); elem *el = grow(w, &(buckets[hashfun(this, key)])); #if REDPAR_DEBUG >= 3 - fprintf(stderr, "[W=%d, this=%p, inserting key=%p, val=%p, el = %p]\n", - w->self, this, key, val, el); + fprintf(stderr, "[W=%d, this=%p, inserting key=%p, view=%p, el = %p]\n", + w->self, this, key, view, el); #endif el->key = key; el->hb = hb; - el->val = val; + el->view = view; ++nelem; return el; @@ -441,7 +467,7 @@ void cilkred_map::rehash(__cilkrts_worker *w) if (b) { elem *oel; for (oel = b->el; oel->key; ++oel) - insert_no_rehash(w, oel->key, oel->hb, oel->val); + insert_no_rehash(w, oel->key, oel->hb, oel->view); } } @@ -453,19 +479,19 @@ void cilkred_map::rehash(__cilkrts_worker *w) elem *cilkred_map::rehash_and_insert(__cilkrts_worker *w, void *key, __cilkrts_hyperobject_base *hb, - void *val) + void *view) { #if REDPAR_DEBUG >= 1 - fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, val=%p\n", - w->self, this, key, val); + fprintf(stderr, "W=%d, this_map =%p, inserting key=%p, view=%p\n", + w->self, this, key, view); verify_current_wkr(w); #endif if (need_rehash_p()) rehash(w); - return insert_no_rehash(w, key, hb, val); + return insert_no_rehash(w, key, hb, view); } @@ -477,7 +503,7 @@ elem *cilkred_map::lookup(void *key) elem *el; for (el = b->el; el->key; ++el) { if (el->key == key) { - CILK_ASSERT(el->val); + CILK_ASSERT(el->view); return el; } } @@ -488,17 +514,27 @@ elem *cilkred_map::lookup(void *key) void elem::destroy() { - // Call destroy_fn and deallocate_fn on all but the leftmost value - if (val != key) - { - cilk_c_monoid *monoid = &(hb->__c_monoid); + if (! is_leftmost()) { + + // Call destroy_fn and deallocate_fn on the view, but not if it's the + // leftmost view. + cilk_c_monoid *monoid = &(hb->__c_monoid); cilk_c_reducer_destroy_fn_t destroy_fn = monoid->destroy_fn; cilk_c_reducer_deallocate_fn_t deallocate_fn = monoid->deallocate_fn; - destroy_fn((void*)hb, val); - deallocate_fn((void*)hb, val); + destroy_fn((void*)hb, view); + deallocate_fn((void*)hb, view); } - val = 0; + + view = 0; +} + +inline +bool elem::is_leftmost() const +{ + // implementation uses the address of the leftmost view as the key, so if + // key == view, then this element refers to the leftmost view. + return key == view; } /* remove the reducer from the current reducer map. If the reducer @@ -521,35 +557,42 @@ CILK_EXPORT void __CILKRTS_STRAND_STALE( return; } +const char *UNSYNCED_REDUCER_MSG = + "Destroying a reducer while it is visible to unsynced child tasks, or\n" + "calling CILK_C_UNREGISTER_REDUCER() on an unregistered reducer.\n" + "Did you forget a _Cilk_sync or CILK_C_REGISTER_REDUCER()?"; + cilkred_map* h = w->reducer_map; - CILK_ASSERT(h); + if (NULL == h) + cilkos_error(UNSYNCED_REDUCER_MSG); // Does not return if (h->merging) { verify_current_wkr(w); __cilkrts_bug("User error: hyperobject used by another hyperobject"); } - void* key = get_leftmost_view(hb); + void* key = get_hyperobject_key(hb); elem *el = h->lookup(key); - if (el) { - /* found. */ + + // Verify that the reducer is being destroyed from the leftmost strand for + // which the reducer is defined. + if (! (el && el->is_leftmost())) + cilkos_error(UNSYNCED_REDUCER_MSG); #if REDPAR_DEBUG >= 3 - fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n", - w->self, key, h, el); + fprintf(stderr, "[W=%d, key=%p, lookup in map %p, found el=%p, about to destroy]\n", + w->self, key, h, el); #endif - /* Destroy view and remove element from bucket. */ - el->destroy(); - - /* Shift all subsequent elements. Do not bother - shrinking the bucket */ - do { - el[0] = el[1]; - ++el; - } while (el->key); - --h->nelem; - } + // Remove the element from the hash bucket. Do not bother shrinking + // the bucket. Note that the destroy() function does not actually + // call the destructor for the leftmost view. + el->destroy(); + do { + el[0] = el[1]; + ++el; + } while (el->key); + --h->nelem; #if REDPAR_DEBUG >= 2 fprintf(stderr, "[W=%d, desc=hyper_destroy_finish, key=%p, w->reducer_map=%p]\n", @@ -577,29 +620,30 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb) // will prevent Cilkscreen from reporting apparent races in reducers DisableCilkscreen x; - void* val = get_leftmost_view(hb); + void* key = get_hyperobject_key(hb); + void* view = get_leftmost_view(key); cilkred_map *h = w->reducer_map; if (__builtin_expect(!h, 0)) { h = install_new_reducer_map(w); #if REDPAR_DEBUG >= 2 - fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, val=%p]\n", - w->self, hb, h, val); + fprintf(stderr, "[W=%d, hb=%p, hyper_create, isntalled new map %p, view=%p]\n", + w->self, hb, h, view); #endif } /* Must not exist. */ - CILK_ASSERT(h->lookup(val) == NULL); + CILK_ASSERT(h->lookup(key) == NULL); #if REDPAR_DEBUG >= 3 verify_current_wkr(w); - fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of val %p, should be null]\n", - w->self, hb, h, val); - fprintf(stderr, "W=%d, h=%p, inserting key %p, val%p\n", + fprintf(stderr, "[W=%d, hb=%p, lookup in map %p of view %p, should be null]\n", + w->self, hb, h, view); + fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n", w->self, h, &(hb->__c_monoid), - val); + view); #endif if (h->merging) @@ -607,7 +651,7 @@ void __cilkrts_hyper_create(__cilkrts_hyperobject_base *hb) CILK_ASSERT(w->reducer_map == h); // The address of the leftmost value is the same as the key for lookup. - (void) h->rehash_and_insert(w, val, hb, val); + (void) h->rehash_and_insert(w, view, hb, view); } extern "C" @@ -615,7 +659,7 @@ CILK_EXPORT void* __CILKRTS_STRAND_PURE( __cilkrts_hyper_lookup(__cilkrts_hyperobject_base *hb)) { __cilkrts_worker* w = __cilkrts_get_tls_worker_fast(); - void* key = get_leftmost_view(hb); + void* key = get_hyperobject_key(hb); if (! w) return get_leftmost_view(key); @@ -658,7 +702,7 @@ CILK_EXPORT void* __CILKRTS_STRAND_PURE( } #if REDPAR_DEBUG >= 3 - fprintf(stderr, "W=%d, h=%p, inserting key %p, val%p\n", + fprintf(stderr, "W=%d, h=%p, inserting key %p, view%p\n", w->self, h, &(hb->__c_monoid), @@ -668,7 +712,7 @@ CILK_EXPORT void* __CILKRTS_STRAND_PURE( el = h->rehash_and_insert(w, key, hb, rep); } - return el->val; + return el->view; } extern "C" CILK_EXPORT @@ -718,10 +762,10 @@ void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h) CILK_ASSERT((w == 0 && h->g == 0) || w->g == h->g); verify_current_wkr(w); - /* the reducer map is allowed to contain el->val == NULL here (and - only here). We set el->val == NULL only when we know that the + /* the reducer map is allowed to contain el->view == NULL here (and + only here). We set el->view == NULL only when we know that the map will be destroyed immediately afterwards. */ - DBG h->check(/*allow_null_val=*/true); + DBG h->check(/*allow_null_view=*/true); bucket *b; size_t i; @@ -731,7 +775,7 @@ void __cilkrts_destroy_reducer_map(__cilkrts_worker *w, cilkred_map *h) if (b) { elem *el; for (el = b->el; el->key; ++el) { - if (el->val) + if (el->view) el->destroy(); } } @@ -783,8 +827,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w, bool merge_to_leftmost = (this->is_leftmost /* && !other_map->is_leftmost */); - DBG check(/*allow_null_val=*/false); - DBG other_map->check(/*allow_null_val=*/false); + DBG check(/*allow_null_view=*/false); + DBG other_map->check(/*allow_null_view=*/false); for (size_t i = 0; i < other_map->nbuckets; ++i) { bucket *b = other_map->buckets[i]; @@ -792,8 +836,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w, for (elem *other_el = b->el; other_el->key; ++other_el) { /* Steal the value from the other map, which will be destroyed at the end of this operation. */ - void *other_val = other_el->val; - CILK_ASSERT(other_val); + void *other_view = other_el->view; + CILK_ASSERT(other_view); void *key = other_el->key; __cilkrts_hyperobject_base *hb = other_el->hb; @@ -802,7 +846,7 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w, if (this_el == 0 && merge_to_leftmost) { /* Initialize leftmost view before merging. */ void* leftmost = get_leftmost_view(key); - // leftmost == other_val can be true if the initial view + // leftmost == other_view can be true if the initial view // was created in other than the leftmost strand of the // spawn tree, but then made visible to subsequent strands // (E.g., the reducer was allocated on the heap and the @@ -811,17 +855,17 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w, // strands will always result in 'this_el' being null, // thus propagating the initial view up the spawn tree // until it reaches the leftmost strand. When synching - // with the leftmost strand, leftmost == other_val will be + // with the leftmost strand, leftmost == other_view will be // true and we must avoid reducing the initial view with // itself. - if (leftmost != other_val) + if (leftmost != other_view) this_el = rehash_and_insert(w, key, hb, leftmost); } if (this_el == 0) { /* move object from other map into this one */ - rehash_and_insert(w, key, hb, other_val); - other_el->val = 0; + rehash_and_insert(w, key, hb, other_view); + other_el->view = 0; continue; /* No element-level merge necessary */ } @@ -835,8 +879,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w, case MERGE_INTO_RIGHT: /* Swap elements in order to preserve object identity */ - other_el->val = this_el->val; - this_el->val = other_val; + other_el->view = this_el->view; + this_el->view = other_view; /* FALL THROUGH */ case MERGE_INTO_LEFT: { /* Stealing should be disabled during reduce @@ -854,8 +898,8 @@ __cilkrts_worker* cilkred_map::merge(__cilkrts_worker *w, /* TBD: if reduce throws an exception we need to stop it here. */ hb->__c_monoid.reduce_fn((void*)hb, - this_el->val, - other_el->val); + this_el->view, + other_el->view); w = current_sf->worker; #if REDPAR_DEBUG >= 2 diff --git a/libcilkrts/runtime/reducer_impl.h b/libcilkrts/runtime/reducer_impl.h index f088b969293..8e51da0dd0b 100644 --- a/libcilkrts/runtime/reducer_impl.h +++ b/libcilkrts/runtime/reducer_impl.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -41,8 +46,6 @@ __CILKRTS_BEGIN_EXTERN_C -typedef struct cilkred_map cilkred_map; - /** * Construct an empty reducer map from the memory pool associated with the * given worker. This reducer map must be destroyed before the worker's diff --git a/libcilkrts/runtime/rts-common.h b/libcilkrts/runtime/rts-common.h index 837c22546ab..f8e33d6c1fd 100644 --- a/libcilkrts/runtime/rts-common.h +++ b/libcilkrts/runtime/rts-common.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #ifndef INCLUDED_RTS_COMMON_DOT_H @@ -89,9 +94,27 @@ # define inline __inline #endif -/* Compilers that build the Cilk runtime are assumed to know about - zero-cost intrinsics. For those that don't, comment out the - following definition: */ -#define ENABLE_NOTIFY_ZC_INTRINSIC +/* Compilers that build the Cilk runtime are assumed to know about zero-cost + * intrinsics (__notify_intrinsic()). For those that don't, #undef the + * following definition: + */ +#define ENABLE_NOTIFY_ZC_INTRINSIC 1 + +#if defined(__INTEL_COMPILER) +/* The notify intrinsic was introduced in ICC 12.0. */ +# if __INTEL_COMPILER <= 1200 +# undef ENABLE_NOTIFY_ZC_INTRINSIC +# endif +#elif defined(__VXWORKS__) +# undef ENABLE_NOTIFY_ZC_INTRINSIC +#elif defined(__clang__) +# if !defined(__has_extension) || !__has_extension(notify_zc_intrinsic) +# undef ENABLE_NOTIFY_ZC_INTRINSIC +# endif +#elif defined(__arm__) +// __notify_zc_intrinsic not yet supported by gcc for ARM +# undef ENABLE_NOTIFY_ZC_INTRINSIC +#endif + #endif // ! defined(INCLUDED_RTS_COMMON_DOT_H) diff --git a/libcilkrts/runtime/scheduler.c b/libcilkrts/runtime/scheduler.c index 0a19aea1933..54bec2cf9f5 100644 --- a/libcilkrts/runtime/scheduler.c +++ b/libcilkrts/runtime/scheduler.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2007-2012 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2007-2012 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ @@ -38,12 +43,13 @@ #include "local_state.h" #include "signal_node.h" #include "full_frame.h" -#include "stacks.h" #include "sysdep.h" #include "except.h" #include "cilk_malloc.h" #include "pedigrees.h" +#include "record-replay.h" +#include <limits.h> #include <string.h> /* memcpy */ #include <stdio.h> // sprintf #include <stdlib.h> // malloc, free, abort @@ -51,6 +57,7 @@ #ifdef _WIN32 # pragma warning(disable:1786) // disable warning: sprintf is deprecated # include "sysdep-win.h" +# include "except-win32.h" #endif // _WIN32 // ICL: Don't complain about conversion from pointer to same-sized integral @@ -66,6 +73,7 @@ #include "cilk-tbb-interop.h" #include "cilk-ittnotify.h" #include "stats.h" + // ICL: Don't complain about loss of precision in myrand // I tried restoring the warning after the function, but it didn't // suppress it @@ -77,6 +85,12 @@ # include <unistd.h> #endif +#ifdef __VXWORKS__ +// redeclare longjmp() with noreturn to stop warnings +extern __attribute__((noreturn)) + void longjmp(jmp_buf, int); +#endif + //#define DEBUG_LOCKS 1 #ifdef DEBUG_LOCKS // The currently executing worker must own this worker's lock @@ -94,12 +108,22 @@ enum schedule_t { SCHEDULE_RUN, SCHEDULE_WAIT, SCHEDULE_EXIT }; +// Return values for provably_good_steal() +enum provably_good_steal_t +{ + ABANDON_EXECUTION, // Not the last child to the sync - attempt to steal work + CONTINUE_EXECUTION, // Last child to the sync - continue executing on this worker + WAIT_FOR_CONTINUE // The replay log indicates that this was the worker + // which continued. Loop until we are the last worker + // to the sync. +}; + // Verify that "w" is the worker we are currently executing on. // Because this check is expensive, this method is usually a no-op. static inline void verify_current_wkr(__cilkrts_worker *w) { -#if REDPAR_DEBUG >= 3 +#if ((REDPAR_DEBUG >= 3) || (FIBER_DEBUG >= 1)) // Lookup the worker from TLS and compare to w. __cilkrts_worker* tmp = __cilkrts_get_tls_worker(); if (w != tmp) { @@ -113,7 +137,7 @@ static inline void verify_current_wkr(__cilkrts_worker *w) static enum schedule_t worker_runnable(__cilkrts_worker *w); -// Scheduling-stack functions: +// Scheduling-fiber functions: static void do_return_from_spawn (__cilkrts_worker *w, full_frame *ff, __cilkrts_stack_frame *sf); @@ -121,7 +145,8 @@ static void do_sync (__cilkrts_worker *w, full_frame *ff, __cilkrts_stack_frame *sf); -#ifndef _WIN32 +// max is defined on Windows and VxWorks +#if (! defined(_WIN32)) && (! defined(__VXWORKS__)) // TBD: definition of max() for Linux. # define max(a, b) ((a) < (b) ? (b) : (a)) #endif @@ -130,8 +155,15 @@ void __cilkrts_dump_stats_to_stderr(global_state_t *g) { #ifdef CILK_PROFILE int i; - for (i = 0; i < g->total_workers; ++i) - __cilkrts_accum_stats(&g->stats, &g->workers[i]->l->stats); + for (i = 0; i < g->total_workers; ++i) { + // Print out statistics for each worker. We collected them, + // so why not print them out? + fprintf(stderr, "Stats for worker %d\n", i); + dump_stats_to_file(stderr, g->workers[i]->l->stats); + __cilkrts_accum_stats(&g->stats, g->workers[i]->l->stats); + } + + // Also print out aggregate statistics. dump_stats_to_file(stderr, &g->stats); #endif fprintf(stderr, @@ -196,6 +228,12 @@ static int decjoin(full_frame *ff) return (--ff->join_counter); } +static int simulate_decjoin(full_frame *ff) +{ + CILK_ASSERT(ff->join_counter > 0); + return (ff->join_counter - 1); +} + /* * Pseudo-random generator defined by the congruence S' = 69070 * S * mod (2^32 - 5). Marsaglia (CACM July 1993) says on page 107 that @@ -338,7 +376,7 @@ static void make_runnable(__cilkrts_worker *w, full_frame *ff) static void make_unrunnable(__cilkrts_worker *w, full_frame *ff, __cilkrts_stack_frame *sf, - int state_valid, + int is_loot, const char *why) { /* CALL_STACK becomes valid again */ @@ -352,12 +390,12 @@ static void make_unrunnable(__cilkrts_worker *w, sf->flags |= CILK_FRAME_STOLEN | CILK_FRAME_SUSPENDED; sf->worker = 0; - if (state_valid) + if (is_loot) __cilkrts_put_stack(ff, sf); /* perform any system-dependent action, such as saving the state of the stack */ - __cilkrts_make_unrunnable_sysdep(w, ff, sf, state_valid, why); + __cilkrts_make_unrunnable_sysdep(w, ff, sf, is_loot, why); } } @@ -435,34 +473,38 @@ static void unset_sync_master(__cilkrts_worker *w, full_frame *ff) w->l->last_full_frame = NULL; } -/************************************************************* - THE protocol: -*************************************************************/ +/******************************************************************** + * THE protocol: + ********************************************************************/ /* - This is a protocol for work stealing that minimize the - overhead on the victim. - - The protocol uses three shared pointes into the victim's deque: T - (the ``tail''), H (the ``head'') and E (the ``exception''), - with H <= E, H <= T. (NB: "exception," in this case has nothing to do with - C++ throw-catch exceptions -- it refers only to a non-normal return, i.e., a - steal or similar scheduling exception.) - - Stack frames P, where H <= E < T, are available for stealing. - - The victim operates on the T end of the stack. The frame being - worked on by the victim is not on the stack. To push, the victim - stores *T++=frame. To pop, it obtains frame=*--T. - - After decrementing T, the condition E > T signals to the victim that - it should invoke the runtime system ``THE'' exception handler. The - pointer E can become INFINITY, in which case the victim must invoke - the THE exception handler as soon as possible. - - See "The implementation of the Cilk-5 multithreaded language", PLDI 1998, - http://portal.acm.org/citation.cfm?doid=277652.277725, for more information - on the THE protocol. -*/ + * This is a protocol for work stealing that minimizes the overhead on + * the victim. + * + * The protocol uses three shared pointers into the worker's deque: + * - T - the "tail" + * - H - the "head" + * - E - the "exception" NB: In this case, "exception" has nothing to do + * with C++ throw-catch exceptions -- it refers only to a non-normal return, + * i.e., a steal or similar scheduling exception. + * + * with H <= E, H <= T. + * + * Stack frames SF, where H <= E < T, are available for stealing. + * + * The worker operates on the T end of the stack. The frame being + * worked on is not on the stack. To make a continuation available for + * stealing the worker pushes a from onto the stack: stores *T++ = SF. + * To return, it pops the frame off the stack: obtains SF = *--T. + * + * After decrementing T, the condition E > T signals to the victim that + * it should invoke the runtime system's "THE" exception handler. The + * pointer E can become INFINITY, in which case the victim must invoke + * the THE exception handler as soon as possible. + * + * See "The implementation of the Cilk-5 multithreaded language", PLDI 1998, + * http://portal.acm.org/citation.cfm?doid=277652.277725, for more information + * on the THE protocol. + */ /* the infinity value of E */ #define EXC_INFINITY ((__cilkrts_stack_frame **) (-1)) @@ -553,11 +595,12 @@ static int dekker_protocol(__cilkrts_worker *victim) } } + /* Link PARENT and CHILD in the spawn tree */ static full_frame *make_child(__cilkrts_worker *w, full_frame *parent_ff, __cilkrts_stack_frame *child_sf, - __cilkrts_stack *sd) + cilk_fiber *fiber) { full_frame *child_ff = __cilkrts_make_full_frame(w, child_sf); @@ -570,15 +613,14 @@ static full_frame *make_child(__cilkrts_worker *w, // w->self, child, parent, child_sf, // parent->parent, parent->left_sibling, parent->right_sibling, parent->rightmost_child, // child->parent, child->left_sibling, child->right_sibling, child->rightmost_child); - CILK_ASSERT(parent_ff->call_stack); - child_ff->is_call_child = (sd == NULL); + child_ff->is_call_child = (fiber == NULL); - /* PLACEHOLDER_STACK is used as non-null marker indicating that + /* PLACEHOLDER_FIBER is used as non-null marker indicating that child should be treated as a spawn child even though we have not - yet assigned a real stack to its parent. */ - if (sd == PLACEHOLDER_STACK) - sd = NULL; /* Parent actually gets a null stack, for now */ + yet assigned a real fiber to its parent. */ + if (fiber == PLACEHOLDER_FIBER) + fiber = NULL; /* Parent actually gets a null fiber, for now */ /* perform any system-dependent actions, such as capturing parameter passing information */ @@ -586,19 +628,15 @@ static full_frame *make_child(__cilkrts_worker *w, /* Child gets reducer map and stack of parent. Parent gets a new map and new stack. */ - child_ff->stack_self = parent_ff->stack_self; + child_ff->fiber_self = parent_ff->fiber_self; child_ff->sync_master = NULL; if (child_ff->is_call_child) { /* Cause segfault on any attempted access. The parent gets the child map and stack when the child completes. */ - parent_ff->stack_self = 0; + parent_ff->fiber_self = 0; } else { - parent_ff->stack_self = sd; - __cilkrts_bind_stack(parent_ff, - __cilkrts_stack_to_pointer(parent_ff->stack_self, child_sf), - child_ff->stack_self, - child_ff->sync_master); + parent_ff->fiber_self = fiber; } incjoin(parent_ff); @@ -677,7 +715,7 @@ static full_frame *unroll_call_stack(__cilkrts_worker *w, CHILD frame in its place */ static void detach_for_steal(__cilkrts_worker *w, __cilkrts_worker *victim, - __cilkrts_stack *sd) + cilk_fiber* fiber) { /* ASSERT: we own victim->lock */ @@ -741,7 +779,7 @@ static void detach_for_steal(__cilkrts_worker *w, __cilkrts_push_next_frame(w, loot_ff); // After this "push_next_frame" call, w now owns loot_ff. - child_ff = make_child(w, loot_ff, 0, sd); + child_ff = make_child(w, loot_ff, 0, fiber); BEGIN_WITH_FRAME_LOCK(w, child_ff) { /* install child in the victim's work queue, taking @@ -764,12 +802,90 @@ static void detach_for_steal(__cilkrts_worker *w, } END_WITH_FRAME_LOCK(w, parent_ff); } +/** + * @brief cilk_fiber_proc that resumes user code after a successful + * random steal. + + * This function longjmps back into the user code whose state is + * stored in cilk_fiber_get_data(fiber)->resume_sf. The stack pointer + * is adjusted so that the code resumes on the specified fiber stack + * instead of its original stack. + * + * This method gets executed only on a fiber freshly allocated from a + * pool. + * + * @param fiber The fiber being used to resume user code. + * @param arg Unused. + */ +static +void fiber_proc_to_resume_user_code_for_random_steal(cilk_fiber *fiber) +{ + cilk_fiber_data *data = cilk_fiber_get_data(fiber); + __cilkrts_stack_frame* sf = data->resume_sf; + full_frame *ff; + + CILK_ASSERT(sf); + + // When we pull the resume_sf out of the fiber to resume it, clear + // the old value. + data->resume_sf = NULL; + CILK_ASSERT(sf->worker == data->owner); + ff = sf->worker->l->frame_ff; + + // For Win32, we need to overwrite the default exception handler + // in this function, so that when the OS exception handling code + // walks off the top of the current Cilk stack, it reaches our stub + // handler. + + // Also, this function needs to be wrapped into a try-catch block + // so the compiler generates the appropriate exception information + // in this frame. + + // TBD: IS THIS HANDLER IN THE WRONG PLACE? Can we longjmp out of + // this function (and does it matter?) +#if defined(_WIN32) && !defined(_WIN64) + install_exception_stub_handler(); + __try +#endif + { + char* new_sp = sysdep_reset_jump_buffers_for_resume(fiber, ff, sf); + + // Notify the Intel tools that we're stealing code + ITT_SYNC_ACQUIRED(sf->worker); +#ifdef ENABLE_NOTIFY_ZC_INTRINSIC + __notify_zc_intrinsic("cilk_continue", sf); +#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC + + // TBD: We'd like to move TBB-interop methods into the fiber + // eventually. + cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT); + + sf->flags &= ~CILK_FRAME_SUSPENDED; + + // longjmp to user code. Don't process exceptions here, + // because we are resuming a stolen frame. + sysdep_longjmp_to_sf(new_sp, sf, NULL); + /*NOTREACHED*/ + // Intel's C compiler respects the preceding lint pragma + } +#if defined(_WIN32) && !defined(_WIN64) + __except (CILK_ASSERT(!"should not execute the the stub filter"), + EXCEPTION_EXECUTE_HANDLER) + { + // If we are here, that means something very wrong + // has happened in our exception processing... + CILK_ASSERT(! "should not be here!"); + } +#endif +} + static void random_steal(__cilkrts_worker *w) { - __cilkrts_worker *victim; - __cilkrts_stack *sd; + __cilkrts_worker *victim = NULL; + cilk_fiber *fiber = NULL; int n; int success = 0; + int32_t victim_id; // Nothing's been stolen yet. When true, this will flag // setup_for_execution_pedigree to increment the pedigree @@ -785,16 +901,35 @@ static void random_steal(__cilkrts_worker *w) There must be only one worker to prevent stealing. */ CILK_ASSERT(w->g->total_workers > 1); - /* Verify that we can get a stack. If not, no need to continue. */ - sd = __cilkrts_get_stack(w); - if (NULL == sd) { + /* pick random *other* victim */ + n = myrand(w) % (w->g->total_workers - 1); + if (n >= w->self) + ++n; + + // If we're replaying a log, override the victim. -1 indicates that + // we've exhausted the list of things this worker stole when we recorded + // the log so just return. If we're not replaying a log, + // replay_get_next_recorded_victim() just returns the victim ID passed in. + n = replay_get_next_recorded_victim(w, n); + if (-1 == n) return; - } - /* pick random *other* victim */ - n = myrand(w) % (w->g->total_workers - 1); if (n >= w->self) ++n; victim = w->g->workers[n]; + START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE) { + /* Verify that we can get a stack. If not, no need to continue. */ + fiber = cilk_fiber_allocate(&w->l->fiber_pool); + } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE); + + + if (NULL == fiber) { +#if FIBER_DEBUG >= 2 + fprintf(stderr, "w=%d: failed steal because we could not get a fiber\n", + w->self); +#endif + return; + } + /* do not steal from self */ CILK_ASSERT (victim != w); @@ -802,10 +937,15 @@ static void random_steal(__cilkrts_worker *w) Avoid grabbing locks if there is nothing to steal. */ if (!can_steal_from(victim)) { NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_EMPTYQ); - __cilkrts_release_stack(w, sd); + START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) { + int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool); + // Fibers we use when trying to steal should not be active, + // and thus should not have any other references. + CILK_ASSERT(0 == ref_count); + } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE); return; } - + /* Attempt to steal work from the victim */ if (worker_trylock_other(w, victim)) { if (w->l->type == WORKER_USER && victim->l->team != w) { @@ -828,24 +968,42 @@ static void random_steal(__cilkrts_worker *w) // though the victim may be executing. Thus, the lock on // the victim's deque is also protecting victim->frame_ff. if (dekker_protocol(victim)) { - START_INTERVAL(w, INTERVAL_STEAL_SUCCESS) { - success = 1; - detach_for_steal(w, victim, sd); - #if REDPAR_DEBUG >= 1 - fprintf(stderr, "Wkr %d stole from victim %d, sd = %p\n", - w->self, victim->self, sd); - #endif - - // The use of victim->self contradicts our - // classification of the "self" field as - // local. But since this code is only for - // debugging, it is ok. - DBGPRINTF ("%d-%p: Stealing work from worker %d\n" - " sf: %p, call parent: %p\n", - w->self, GetCurrentFiber(), victim->self, - w->l->next_frame_ff->call_stack, - w->l->next_frame_ff->call_stack->call_parent); - } STOP_INTERVAL(w, INTERVAL_STEAL_SUCCESS); + int proceed_with_steal = 1; // optimistic + + // If we're replaying a log, verify that this the correct frame + // to steal from the victim + if (! replay_match_victim_pedigree(w, victim)) + { + // Abort the steal attempt. decrement_E(victim) to + // counter the increment_E(victim) done by the + // dekker protocol + decrement_E(victim); + proceed_with_steal = 0; + } + + if (proceed_with_steal) + { + START_INTERVAL(w, INTERVAL_STEAL_SUCCESS) { + success = 1; + detach_for_steal(w, victim, fiber); + victim_id = victim->self; + + #if REDPAR_DEBUG >= 1 + fprintf(stderr, "Wkr %d stole from victim %d, fiber = %p\n", + w->self, victim->self, fiber); + #endif + + // The use of victim->self contradicts our + // classification of the "self" field as + // local. But since this code is only for + // debugging, it is ok. + DBGPRINTF ("%d-%p: Stealing work from worker %d\n" + " sf: %p, call parent: %p\n", + w->self, GetCurrentFiber(), victim->self, + w->l->next_frame_ff->call_stack, + w->l->next_frame_ff->call_stack->call_parent); + } STOP_INTERVAL(w, INTERVAL_STEAL_SUCCESS); + } // end if(proceed_with_steal) } else { NOTE_INTERVAL(w, INTERVAL_STEAL_FAIL_DEKKER); } @@ -862,11 +1020,28 @@ static void random_steal(__cilkrts_worker *w) w->l->work_stolen = success; if (0 == success) { - // failed to steal work. Return the stack to the pool. - __cilkrts_release_stack(w, sd); + // failed to steal work. Return the fiber to the pool. + START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) { + int ref_count = cilk_fiber_remove_reference(fiber, &w->l->fiber_pool); + // Fibers we use when trying to steal should not be active, + // and thus should not have any other references. + CILK_ASSERT(0 == ref_count); + } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE); + } + else + { + // Since our steal was successful, finish initialization of + // the fiber. + cilk_fiber_reset_state(fiber, + fiber_proc_to_resume_user_code_for_random_steal); + // Record the pedigree of the frame that w has stolen. + // record only if CILK_RECORD_LOG is set + replay_record_steal(w, victim_id); } } + + /** * At a provably good steal, we need to transfer the child reducer map * from ff->children_reducer_map into v->reducer_map, where v is the @@ -903,17 +1078,9 @@ static void provably_good_steal_exceptions(__cilkrts_worker *w, /* At sync discard the frame's old stack and take the leftmost child's. */ static void provably_good_steal_stacks(__cilkrts_worker *w, full_frame *ff) { - __cilkrts_stack *s; - s = ff->stack_self; - ff->stack_self = ff->stack_child; - ff->stack_child = NULL; - if (s) { - __cilkrts_release_stack(w, s); - } - - /* We don't have a stack to bind right now, so use the - BIND_PROVABLY_GOOD_STACK magic number, instead */ - __cilkrts_bind_stack(ff, ff->sync_sp, BIND_PROVABLY_GOOD_STACK, NULL); + CILK_ASSERT(NULL == ff->fiber_self); + ff->fiber_self = ff->fiber_child; + ff->fiber_child = NULL; } static void __cilkrts_mark_synched(full_frame *ff) @@ -922,14 +1089,21 @@ static void __cilkrts_mark_synched(full_frame *ff) ff->simulated_stolen = 0; } -static int provably_good_steal(__cilkrts_worker *w, - full_frame *ff) +static +enum provably_good_steal_t provably_good_steal(__cilkrts_worker *w, + full_frame *ff) { // ASSERT: we hold w->lock and ff->lock - int abandoned = 1; // True if we can't make any more progress on this - // thread and are going to attempt to steal work from - // someone else + enum provably_good_steal_t result = ABANDON_EXECUTION; + + // If the current replay entry is a sync record matching the worker's + // pedigree, AND this isn't the last child to the sync, return + // WAIT_FOR_CONTINUE to indicate that the caller should loop until + // we find the right frame to steal and CONTINUE_EXECUTION is returned. + int match_found = replay_match_sync_pedigree(w); + if (match_found && (0 != simulate_decjoin(ff))) + return WAIT_FOR_CONTINUE; START_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL) { if (decjoin(ff) == 0) { @@ -953,10 +1127,10 @@ static int provably_good_steal(__cilkrts_worker *w, // If this is the team leader we're not abandoning the work if (w == w->l->team) - abandoned = 0; + result = CONTINUE_EXECUTION; } else { __cilkrts_push_next_frame(w, ff); - abandoned = 0; // Continue working on this thread + result = CONTINUE_EXECUTION; // Continue working on this thread } // The __cilkrts_push_next_frame() call changes ownership @@ -964,7 +1138,16 @@ static int provably_good_steal(__cilkrts_worker *w, } } STOP_INTERVAL(w, INTERVAL_PROVABLY_GOOD_STEAL); - return abandoned; + // Only write a SYNC record if: + // - We're recording a log *AND* + // - We're the worker continuing from this sync + replay_record_sync(w, result == CONTINUE_EXECUTION); + + // If we're replaying a log, and matched a sync from the log, mark the + // sync record seen if the sync isn't going to be abandoned. + replay_advance_from_sync (w, match_found, result == CONTINUE_EXECUTION); + + return result; } static void unconditional_steal(__cilkrts_worker *w, @@ -1038,15 +1221,15 @@ static inline void splice_stacks_for_call(__cilkrts_worker *w, #endif /* A synched frame does not have accumulated child reducers. */ - CILK_ASSERT(!child_ff->stack_child); + CILK_ASSERT(!child_ff->fiber_child); CILK_ASSERT(child_ff->is_call_child); - /* An attached parent has no self stack. It may have - accumulated child stacks or child owners, which should be + /* An attached parent has no self fiber. It may have + accumulated child fibers or child owners, which should be ignored until sync. */ - CILK_ASSERT(!parent_ff->stack_self); - parent_ff->stack_self = child_ff->stack_self; - child_ff->stack_self = NULL; + CILK_ASSERT(!parent_ff->fiber_self); + parent_ff->fiber_self = child_ff->fiber_self; + child_ff->fiber_self = NULL; } static void finalize_child_for_call(__cilkrts_worker *w, @@ -1221,15 +1404,178 @@ static void setup_for_execution(__cilkrts_worker *w, make_runnable(w, ff); } + +/* + * Called by the scheduling fiber, right before + * resuming a sf/ff for user code. + * + * This method associates the specified sf with the worker. + * + * It also asserts that w, ff, and sf all have the expected properties + * for resuming user code. + */ +void scheduling_fiber_prepare_to_resume_user_code(__cilkrts_worker *w, + full_frame *ff, + __cilkrts_stack_frame *sf) +{ + w->current_stack_frame = sf; + sf->worker = w; + + // Lots of debugging checks on the state of the fiber we might be + // resuming. +#if FIBER_DEBUG >= 1 +# if FIBER_DEBUG >= 3 + { + fprintf(stderr, "w=%d: ff=%p, sf=%p. about to resume user code\n", + w->self, ff, sf); + } +# endif + + const int flags = sf->flags; + CILK_ASSERT(flags & CILK_FRAME_SUSPENDED); + CILK_ASSERT(!sf->call_parent); + CILK_ASSERT(w->head == w->tail); + + /* A frame can not be resumed unless it was suspended. */ + CILK_ASSERT(ff->sync_sp != NULL); + + /* The leftmost frame has no allocated stack */ + if (ff->simulated_stolen) + CILK_ASSERT(flags & CILK_FRAME_UNSYNCHED); + else if (flags & CILK_FRAME_UNSYNCHED) + /* XXX By coincidence sync_sp could be null. */ + CILK_ASSERT(ff->fiber_self != NULL); + else + /* XXX This frame could be resumed unsynched on the leftmost stack */ + CILK_ASSERT((ff->sync_master == 0 || ff->sync_master == w)); + CILK_ASSERT(w->l->frame_ff == ff); +#endif +} + + +/** + * This method is the first method that should execute after we've + * switched to a scheduling fiber from user code. + * + * @param fiber The scheduling fiber for the current worker. + * @param wptr The current worker. + */ +static void enter_runtime_transition_proc(cilk_fiber *fiber) +{ + // We can execute this method for one of three reasons: + // 1. Undo-detach finds parent stolen. + // 2. Sync suspends frame. + // 3. Return from Cilk entry point. + // + // + // In cases 1 and 2, the frame may be truly suspended or + // may be immediately executed by this worker after provably_good_steal. + // + // + // There is a fourth case, which can, but does not need to execute + // this function: + // 4. Starting up the scheduling loop on a user or + // system worker. In this case, we won't have + // a scheduling stack function to run. + __cilkrts_worker* w = cilk_fiber_get_owner(fiber); + if (w->l->post_suspend) { + // Run the continuation function passed to longjmp_into_runtime + run_scheduling_stack_fcn(w); + + // After we have jumped into the runtime and run the + // scheduling function, any reducer map the worker had before entering the runtime + // should have already been saved into the appropriate full + // frame. + CILK_ASSERT(NULL == w->reducer_map); + + // There shouldn't be any uncaught exceptions. + // + // In Windows, the OS catches any exceptions not caught by the + // user code. Thus, we are omitting the check on Windows. + // + // On Android, calling std::uncaught_exception with the stlport + // library causes a seg fault. Since we're not supporting + // exceptions there at this point, just don't do the check + // + // TBD: Is this check also safe to do on Windows? + CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION(); + } +} + + +/** + * Method called to jump back to executing user code. + * + * A normal return from the runtime back to resuming user code calls + * this method. A computation executed using force_reduce also calls + * this method to return to user code. + * + * This function should not contain any code that depends on a fiber. + * In a force-reduce case, the user worker may not have a fiber. In + * the force-reduce case, we call this method directly instead of + * calling @c user_code_resume_after_switch_into_runtime. + */ +static inline NORETURN +cilkrts_resume(__cilkrts_stack_frame *sf, full_frame *ff) +{ + // Save the sync stack pointer, and do the bookkeeping + char* sync_sp = ff->sync_sp; + __cilkrts_take_stack(ff, sync_sp); // leaves ff->sync_sp null + + sf->flags &= ~CILK_FRAME_SUSPENDED; + // Actually longjmp to the user code. + // We may have exceptions to deal with, since we are resuming + // a previous-suspended frame. + sysdep_longjmp_to_sf(sync_sp, sf, ff); +} + + +/** + * Called by the user-code fiber right before resuming a full frame + * (sf/ff). + * + * This method pulls sf/ff out of the worker, and then calls + * cilkrts_resume to jump to user code. + */ +static NORETURN +user_code_resume_after_switch_into_runtime(cilk_fiber *fiber) +{ + __cilkrts_worker *w = cilk_fiber_get_owner(fiber); + __cilkrts_stack_frame *sf; + full_frame *ff; + sf = w->current_stack_frame; + ff = sf->worker->l->frame_ff; + +#if FIBER_DEBUG >= 1 + CILK_ASSERT(ff->fiber_self == fiber); + cilk_fiber_data *fdata = cilk_fiber_get_data(fiber); + DBGPRINTF ("%d-%p: resume_after_switch_into_runtime, fiber=%p\n", + w->self, w, fiber); + CILK_ASSERT(sf == fdata->resume_sf); +#endif + + // Notify the Intel tools that we're stealing code + ITT_SYNC_ACQUIRED(sf->worker); +#ifdef ENABLE_NOTIFY_ZC_INTRINSIC + __notify_zc_intrinsic("cilk_continue", sf); +#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC + cilk_fiber_invoke_tbb_stack_op(fiber, CILK_TBB_STACK_ADOPT); + + // Actually jump to user code. + cilkrts_resume(sf, ff); + } + + /* The current stack is about to either be suspended or destroyed. This * function will switch to the stack on which the scheduler is suspended and * resume running the scheduler within function do_work(). Upon waking up, * the scheduler will run the 'cont' function, using the supplied worker and * frame. */ -static NORETURN longjmp_into_runtime(__cilkrts_worker *w, - scheduling_stack_fcn_t fcn, - __cilkrts_stack_frame *sf) +static NORETURN +longjmp_into_runtime(__cilkrts_worker *w, + scheduling_stack_fcn_t fcn, + __cilkrts_stack_frame *sf) { full_frame *ff, *ff2; @@ -1237,7 +1583,6 @@ static NORETURN longjmp_into_runtime(__cilkrts_worker *w, ff = w->l->frame_ff; // If we've got only one worker, stealing shouldn't be possible. - // // Assume that this is a steal or return from spawn in a force-reduce case. // We don't have a scheduling stack to switch to, so call the continuation // function directly. @@ -1251,8 +1596,19 @@ static NORETURN longjmp_into_runtime(__cilkrts_worker *w, ff2 = pop_next_frame(w); setup_for_execution(w, ff2, 0); - __cilkrts_resume(w, ff2, w->current_stack_frame); /* no return */ - CILK_ASSERT(("returned from __cilkrts_resume", 0)); + scheduling_fiber_prepare_to_resume_user_code(w, ff2, w->current_stack_frame); + cilkrts_resume(w->current_stack_frame, ff2); + +// Suppress clang warning that the expression result is unused +#if defined(__clang__) && (! defined(__INTEL_COMPILER)) +# pragma clang diagnostic push +# pragma clang diagnostic ignored "-Wunused-value" +#endif // __clang__ + /* no return */ + CILK_ASSERT(((void)"returned from __cilkrts_resume", 0)); +#if defined(__clang__) && (! defined(__INTEL_COMPILER)) +# pragma clang diagnostic pop +#endif // __clang__ } w->l->post_suspend = fcn; @@ -1261,43 +1617,91 @@ static NORETURN longjmp_into_runtime(__cilkrts_worker *w, ITT_SYNC_RELEASING(w); ITT_SYNC_PREPARE(w); - // If this is a user worker, and it's the first time that it's returned to - // a stolen parent, we need to import the thread. This will create a - // scheduling stack or fiber, switch to that, and run the scheduling loop - // on it - if ((WORKER_USER == w->l->type) && (0 == w->l->user_thread_imported)) - { - // We're importing the thread - w->l->user_thread_imported = 1; - __cilkrts_sysdep_import_user_thread(w); - CILK_ASSERT(0); // Should never reach this point. - } - - -#ifndef _WIN32 +#if FIBER_DEBUG >= 2 + fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime... w->l->frame_ff = %p, sf=%p\n", + cilkos_get_current_thread_id(), + w->self, w->l->frame_ff, + sf); +#endif - // Jump to this thread's scheduling stack. - longjmp(w->l->env, 1); -#else - DBGPRINTF ("%d-%p: longjmp_into_runtime - " - "Switching to scheduling fiber - %p\n" - " continuation routine: %p, sf: %p\n", - w->self, GetWorkerFiber(w), w->sysdep->scheduling_fiber, - fcn, sf); -#ifdef _DEBUG - SetWorkerThreadName(w, NULL); + // Current fiber is either the (1) one we are about to free, + // or (2) it has been passed up to the parent. + cilk_fiber *current_fiber = ( w->l->fiber_to_free ? + w->l->fiber_to_free : + w->l->frame_ff->parent->fiber_child ); + cilk_fiber_data* fdata = cilk_fiber_get_data(current_fiber); + CILK_ASSERT(NULL == w->l->frame_ff->fiber_self); + + // Clear the sf in the current fiber for cleanliness, to prevent + // us from accidentally resuming a bad sf. + // Technically, resume_sf gets overwritten for a fiber when + // we are about to resume it anyway. + fdata->resume_sf = NULL; + CILK_ASSERT(fdata->owner == w); + + // Set the function to execute immediately after switching to the + // scheduling fiber, but before freeing any fibers. + cilk_fiber_set_post_switch_proc(w->l->scheduling_fiber, + enter_runtime_transition_proc); + cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_ORPHAN); + + if (w->l->fiber_to_free) { + // Case 1: we are freeing this fiber. We never + // resume this fiber again after jumping into the runtime. + w->l->fiber_to_free = NULL; + + // Extra check. Normally, the fiber we are about to switch to + // should have a NULL owner. + CILK_ASSERT(NULL == cilk_fiber_get_data(w->l->scheduling_fiber)->owner); +#if FIBER_DEBUG >= 4 + fprintf(stderr, "ThreadId=%p, W=%d: about to switch into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n", + cilkos_get_current_thread_id(), + w->self, + current_fiber, w->l->scheduling_fiber); #endif - SwitchToFiber(w->sysdep->scheduling_fiber); - - /* Since we switched away from the fiber on which this function was - * entered, we will not get here until either the initial fiber is - * resumed. If the initial fiber belonged to a thief at a sync, then - * the longjmp below will re-initialize the fiber for another steal. - * If this fiber belonged to a victim, then the longjmp below will - * resume the victim after the sync. - */ - __cilkrts_resume_after_longjmp_into_runtime(); + cilk_fiber_invoke_tbb_stack_op(current_fiber, CILK_TBB_STACK_RELEASE); + NOTE_INTERVAL(w, INTERVAL_DEALLOCATE_RESUME_OTHER); + cilk_fiber_remove_reference_from_self_and_resume_other(current_fiber, + &w->l->fiber_pool, + w->l->scheduling_fiber); + // We should never come back here! + CILK_ASSERT(0); + } + else { + // Case 2: We are passing the fiber to our parent because we + // are leftmost. We should come back later to + // resume execution of user code. + // + // If we are not freeing a fiber, there we must be + // returning from a spawn or processing an exception. The + // "sync" path always frees a fiber. + // + // We must be the leftmost child, and by left holder logic, we + // have already moved the current fiber into our parent full + // frame. +#if FIBER_DEBUG >= 2 + fprintf(stderr, "ThreadId=%p, W=%d: about to suspend self into runtime.. current_fiber = %p, deallcoate, switch to fiber %p\n", + cilkos_get_current_thread_id(), + w->self, + current_fiber, w->l->scheduling_fiber); +#endif + + NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER); + + cilk_fiber_suspend_self_and_resume_other(current_fiber, + w->l->scheduling_fiber); + // Resuming this fiber returns control back to + // this function because our implementation uses OS fibers. + // + // On Unix, we could have the choice of passing the + // user_code_resume_after_switch_into_runtime as an extra "resume_proc" + // that resumes execution of user code instead of the + // jumping back here, and then jumping back to user code. +#if FIBER_DEBUG >= 2 + CILK_ASSERT(fdata->owner == __cilkrts_get_tls_worker()); #endif + user_code_resume_after_switch_into_runtime(current_fiber); + } } /* @@ -1340,73 +1744,15 @@ static void notify_children_run(__cilkrts_worker *w) notify_children(w, 1); } -static void do_work(__cilkrts_worker *w, full_frame *ff) -{ - __cilkrts_stack_frame *sf; - -#ifndef _WIN32 - cilkbug_assert_no_uncaught_exception(); -#endif - - BEGIN_WITH_WORKER_LOCK(w) { - CILK_ASSERT(!w->l->frame_ff); - BEGIN_WITH_FRAME_LOCK(w, ff) { - sf = ff->call_stack; - CILK_ASSERT(sf && !sf->call_parent); - setup_for_execution(w, ff, 0); - } END_WITH_FRAME_LOCK(w, ff); - } END_WITH_WORKER_LOCK(w); - -#if CILK_LIB_DEBUG - if (!(sf->flags & CILK_FRAME_UNSYNCHED)) - CILK_ASSERT(!ff->stack_child); - if (sf->flags & CILK_FRAME_EXITING) { - __cilkrts_bug("W%d: resuming frame %p/%p suspended in exit\n", - w->self, ff, sf); - } -#endif - - /* run it */ - if (setjmp(w->l->env) == 0) { - __cilkrts_resume(w, ff, sf); - - /* unreached---the call to cilk_resume exits through longjmp */ - CILK_ASSERT(0); - } - - /* This point is reached for three reasons: - - 1. Undo-detach finds parent stolen. - - 2. Sync suspends frame. - - 3. Return from Cilk entry point. - - In the first two cases the frame may be truly suspended or - may be immediately executed by this worker after provably_good_steal. - - The active frame and call_stack may have changed since _resume. */ - run_scheduling_stack_fcn(w); - - /* The worker borrowed the full frame's reducer map. - Clear the extra reference. Bookkeeping uses the - copy in the frame, not the worker. */ - w->reducer_map = 0; - -#ifndef _WIN32 - cilkbug_assert_no_uncaught_exception(); -#endif -} - -/* - * Try to do work. If there is none available, try to steal some and do it. +/** + * A single "check" to find work, either on our queue or through a + * steal attempt. This method checks our local queue once, and + * performs one steal attempt. */ -static void schedule_work(__cilkrts_worker *w) +static full_frame* check_for_work(__cilkrts_worker *w) { - full_frame *ff; - + full_frame *ff = NULL; ff = pop_next_frame(w); - // If there is no work on the queue, try to steal some. if (NULL == ff) { START_INTERVAL(w, INTERVAL_STEALING) { @@ -1418,6 +1764,10 @@ static void schedule_work(__cilkrts_worker *w) w->l->team = NULL; __cilkrts_worker_unlock(w); } + + // If we are about to do a random steal, we should have no + // full frame... + CILK_ASSERT(NULL == w->l->frame_ff); random_steal(w); } STOP_INTERVAL(w, INTERVAL_STEALING); @@ -1429,82 +1779,315 @@ static void schedule_work(__cilkrts_worker *w) // No quantum for you! __cilkrts_yield(); w->l->steal_failure_count++; - return; } else { // Reset steal_failure_count since there is obviously still work to // be done. w->l->steal_failure_count = 0; } } - CILK_ASSERT(ff); + return ff; +} - // Do the work that was on the queue or was stolen. - START_INTERVAL(w, INTERVAL_WORKING) { - do_work(w, ff); - ITT_SYNC_SET_NAME_AND_PREPARE(w, w->l->sync_return_address); - } STOP_INTERVAL(w, INTERVAL_WORKING); +/** + * Keep stealing or looking on our queue. + * + * Returns either when a full frame is found, or NULL if the + * computation is done. + */ +static full_frame* search_until_work_found_or_done(__cilkrts_worker *w) +{ + full_frame *ff = NULL; + // Find a full frame to execute (either through random stealing, + // or because we pull it off w's 1-element queue). + while (!ff) { + // Check worker state to figure out our next action. + switch (worker_runnable(w)) + { + case SCHEDULE_RUN: // One attempt at checking for work. + ff = check_for_work(w); + break; + case SCHEDULE_WAIT: // go into wait-mode. + CILK_ASSERT(WORKER_SYSTEM == w->l->type); + // If we are about to wait, then we better not have + // a frame that we should execute... + CILK_ASSERT(NULL == w->l->next_frame_ff); + notify_children_wait(w); + signal_node_wait(w->l->signal_node); + // ... + // Runtime is waking up. + notify_children_run(w); + w->l->steal_failure_count = 0; + break; + case SCHEDULE_EXIT: // exit the scheduler. + CILK_ASSERT(WORKER_USER != w->l->type); + return NULL; + default: + CILK_ASSERT(0); + abort(); + } + } + return ff; } -static void __cilkrts_scheduler(__cilkrts_worker *w) +/** + * The proc method for a scheduling fiber on a user worker. + * + * When a user worker jumps into the runtime, it jumps into this + * method by either starting it if the scheduling fiber has never run + * before, or resuming the fiber if it was previously suspended. + */ +COMMON_PORTABLE +void scheduler_fiber_proc_for_user_worker(cilk_fiber *fiber) { - ITT_SYNC_PREPARE(w); + __cilkrts_worker* w = cilk_fiber_get_owner(fiber); + CILK_ASSERT(w); - START_INTERVAL(w, INTERVAL_IN_SCHEDULER) { + // This must be a user worker + CILK_ASSERT(WORKER_USER == w->l->type); - /* this thread now becomes a worker---associate the thread - with the worker state */ - __cilkrts_set_tls_worker(w); + // If we aren't the current worker, then something is very wrong + // here.. + verify_current_wkr(w); - /* Notify tools about the new worker. Inspector needs this, but we - don't want to confuse Cilkscreen with system threads. User threads - do this notification in bind_thread */ - if (! w->g->under_ptool) - __cilkrts_cilkscreen_establish_worker(w); + __cilkrts_run_scheduler_with_exceptions(w); +} - mysrand(w, (w->self + 1)); - if (WORKER_SYSTEM == w->l->type) { - // Runtime begins in a wait-state and is woken up by the first user - // worker when the runtime is ready. - signal_node_wait(w->l->signal_node); - // ... - // Runtime is waking up. - notify_children_run(w); - w->l->steal_failure_count = 0; +/** + * The body of the runtime scheduling loop. This function executes in + * 4 stages: + * + * 1. Transitions from the user code into the runtime by + * executing any scheduling-stack functions. + * 2. Looks for a full frame enqueued from a successful provably + * good steal. + * 3. If no full frame is found in step 2, steal until + * a frame is found or we are done. If we are done, finish + * the scheduling loop. + * 4. When a frame is found, setup to resume user code. + * In particular, suspend the current fiber and resume the + * user fiber to execute the frame. + * + * Returns a fiber object that we should switch to after completing + * the body of the loop, or NULL if we should continue executing on + * this fiber. + * + * @pre @c current_fiber should equal @c wptr->l->scheduling_fiber + * + * @param current_fiber The currently executing (scheduling_ fiber + * @param wptr The currently executing worker. + * @param return The next fiber we should switch to. + */ +static cilk_fiber* worker_scheduling_loop_body(cilk_fiber* current_fiber, + void* wptr) +{ + __cilkrts_worker *w = (__cilkrts_worker*) wptr; + CILK_ASSERT(current_fiber == w->l->scheduling_fiber); + + // Stage 1: Transition from executing user code to the runtime code. + // We don't need to do this call here any more, because + // every switch to the scheduling fiber should make this call + // using a post_switch_proc on the fiber. + // + // enter_runtime_transition_proc(w->l->scheduling_fiber, wptr); + + // After Stage 1 is complete, w should no longer have + // an associated full frame. + CILK_ASSERT(NULL == w->l->frame_ff); + + // Stage 2. First do a quick check of our 1-element queue. + full_frame *ff = pop_next_frame(w); + + if (!ff) { + // Stage 3. We didn't find anything from our 1-element + // queue. Now go through the steal loop to find work. + ff = search_until_work_found_or_done(w); + if (!ff) { + CILK_ASSERT(w->g->work_done); + return NULL; } + } - while (!w->g->work_done) { + // Stage 4. Now that we have found a full frame to work on, + // actually execute it. + __cilkrts_stack_frame *sf; - switch (worker_runnable(w)) - { - case SCHEDULE_RUN: // do some work. - schedule_work(w); - break; - - case SCHEDULE_WAIT: // go into wait-mode. - CILK_ASSERT(WORKER_SYSTEM == w->l->type); - notify_children_wait(w); - signal_node_wait(w->l->signal_node); - // ... - // Runtime is waking up. - notify_children_run(w); - w->l->steal_failure_count = 0; - break; + // There shouldn't be any uncaught exceptions. + // + // In Windows, the OS catches any exceptions not caught by the + // user code. Thus, we are omitting the check on Windows. + // + // On Android, calling std::uncaught_exception with the stlport + // library causes a seg fault. Since we're not supporting + // exceptions there at this point, just don't do the check + CILKBUG_ASSERT_NO_UNCAUGHT_EXCEPTION(); + + BEGIN_WITH_WORKER_LOCK(w) { + CILK_ASSERT(!w->l->frame_ff); + BEGIN_WITH_FRAME_LOCK(w, ff) { + sf = ff->call_stack; + CILK_ASSERT(sf && !sf->call_parent); + setup_for_execution(w, ff, 0); + } END_WITH_FRAME_LOCK(w, ff); + } END_WITH_WORKER_LOCK(w); - case SCHEDULE_EXIT: // exit the scheduler. - CILK_ASSERT(WORKER_USER != w->l->type); - break; + /* run it */ + // + // Prepare to run the full frame. To do so, we need to: + // (a) Execute some code on this fiber (the scheduling + // fiber) to set up data structures, and + // (b) Suspend the scheduling fiber, and resume the + // user-code fiber. - default: - CILK_ASSERT(0); - abort(); - } + // Part (a). Set up data structures. + scheduling_fiber_prepare_to_resume_user_code(w, ff, sf); - } // while (!w->g->work_done) + cilk_fiber *other = w->l->frame_ff->fiber_self; + cilk_fiber_data* other_data = cilk_fiber_get_data(other); + cilk_fiber_data* current_fiber_data = cilk_fiber_get_data(current_fiber); - } STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER); + // I believe two cases are possible here, both of which + // should have other_data->resume_sf as NULL. + // + // 1. Resuming a fiber that was previously executing + // user code (i.e., a provably-good-steal). + // In this case, resume_sf should have been + // set to NULL when it was suspended. + // + // 2. Resuming code on a steal. In this case, since we + // grabbed a new fiber, resume_sf should be NULL. + CILK_ASSERT(NULL == other_data->resume_sf); + +#if FIBER_DEBUG >= 2 + fprintf(stderr, "W=%d: other fiber=%p, setting resume_sf to %p\n", + w->self, other, other_data->resume_sf); +#endif + // Update our own fiber's data. + current_fiber_data->resume_sf = NULL; + // The scheduling fiber should have the right owner from before. + CILK_ASSERT(current_fiber_data->owner == w); + other_data->resume_sf = sf; + - CILK_ASSERT(WORKER_SYSTEM == w->l->type); +#if FIBER_DEBUG >= 3 + fprintf(stderr, "ThreadId=%p (about to suspend self resume other), W=%d: current_fiber=%p, other=%p, current_fiber->resume_sf = %p, other->resume_sf = %p\n", + cilkos_get_current_thread_id(), + w->self, + current_fiber, other, + current_fiber_data->resume_sf, + other_data->resume_sf); +#endif + return other; +} + + +/** + * This function is executed once by each worker, to initialize its + * scheduling loop. + */ +static void worker_scheduler_init_function(__cilkrts_worker *w) +{ + // First, execute the startup tasks that must happen for all + // worker types. + ITT_SYNC_PREPARE(w); + /* Notify tools about the new worker. Inspector needs this, but we + don't want to confuse Cilkscreen with system threads. User threads + do this notification in bind_thread */ + if (! w->g->under_ptool) + __cilkrts_cilkscreen_establish_worker(w); + + // Seed the initial random number generator. + // If we forget to do this, then the worker always steals from 0. + // Programs will still execute correctly, but + // you may see a subtle performance bug... + mysrand(w, (w->self + 1)); + + // The startup work varies, depending on the worker type. + switch (w->l->type) { + case WORKER_USER: + // Stop working once we've entered the scheduler. + // For user workers, INTERVAL_IN_SCHEDULER counts the time + // since we called bind_thread. + break; + + case WORKER_SYSTEM: + // If a system worker is starting, we must also be starting + // the runtime. + + // Runtime begins in a wait-state and is woken up by the first user + // worker when the runtime is ready. + signal_node_wait(w->l->signal_node); + // ... + // Runtime is waking up. + notify_children_run(w); + w->l->steal_failure_count = 0; + + // For system threads, count all the time this thread is + // alive in the scheduling loop. + START_INTERVAL(w, INTERVAL_IN_SCHEDULER); + START_INTERVAL(w, INTERVAL_WORKING); + break; + default: + __cilkrts_bug("Unknown worker %p of type %d entering scheduling loop\n", + w, w->l->type); + } +} + +/** + * This function is executed once by each worker, to finish its + * scheduling loop. + * + * @note Currently, only system workers finish their loops. User + * workers will jump away to user code without exiting their + * scheduling loop. + */ +static void worker_scheduler_terminate_function(__cilkrts_worker *w) +{ + // A user worker should never finish by falling through the + // scheduling loop. + CILK_ASSERT(WORKER_USER != w->l->type); + STOP_INTERVAL(w, INTERVAL_IN_RUNTIME); + STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER); +} + +/** + * The main scheduler function executed by a worker's scheduling + * fiber. + * + * This method is started by either a new system worker, or a user + * worker that has stalled and just been imported into the runtime. + */ +static void worker_scheduler_function(__cilkrts_worker *w) +{ + worker_scheduler_init_function(w); + + // The main scheduling loop body. + + while (!w->g->work_done) { + // Set intervals. Now we are in the runtime instead of working. + START_INTERVAL(w, INTERVAL_IN_RUNTIME); + STOP_INTERVAL(w, INTERVAL_WORKING); + + // Execute the "body" of the scheduling loop, and figure + // out the fiber to jump to next. + cilk_fiber* fiber_to_resume + = worker_scheduling_loop_body(w->l->scheduling_fiber, w); + + if (fiber_to_resume) { + // Suspend the current fiber and resume next one. + NOTE_INTERVAL(w, INTERVAL_SUSPEND_RESUME_OTHER); + STOP_INTERVAL(w, INTERVAL_IN_RUNTIME); + START_INTERVAL(w, INTERVAL_WORKING); + cilk_fiber_suspend_self_and_resume_other(w->l->scheduling_fiber, + fiber_to_resume); + + // Return here only when this (scheduling) fiber is + // resumed (i.e., this worker wants to reenter the runtime). + } + } + + // Finish the scheduling loop. + worker_scheduler_terminate_function(w); } @@ -1617,44 +2200,35 @@ NORETURN __cilkrts_c_sync(__cilkrts_worker *w, w = execute_reductions_for_sync(w, ff, sf_at_sync); +#if FIBER_DEBUG >= 3 + fprintf(stderr, "ThreadId=%p, w->self = %d. about to longjmp_into_runtim[c_sync] with ff=%p\n", + cilkos_get_current_thread_id(), w->self, ff); +#endif + longjmp_into_runtime(w, do_sync, sf_at_sync); } static void do_sync(__cilkrts_worker *w, full_frame *ff, __cilkrts_stack_frame *sf) { - int abandoned = 1; + //int abandoned = 1; + enum provably_good_steal_t steal_result = ABANDON_EXECUTION; + START_INTERVAL(w, INTERVAL_SYNC_CHECK) { BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) { - ff = w->l->frame_ff; - w->l->frame_ff = NULL; - // Conceptually, after clearing w->l->frame_ff, - // w no longer owns the full frame ff. - // The next time another (possibly different) worker takes - // ownership of ff will be at a provably_good_steal on ff. CILK_ASSERT(ff); BEGIN_WITH_FRAME_LOCK(w, ff) { CILK_ASSERT(sf->call_parent == 0); CILK_ASSERT(sf->flags & CILK_FRAME_UNSYNCHED); - /* A frame entering a nontrivial sync always has a - stack_self. A topmost frame after a sync does - not; it is back on the caller's stack. */ - CILK_ASSERT(ff->stack_self || ff->simulated_stolen); - - // Notify TBB that we're orphaning the stack. We'll reclaim it - // again if we continue - __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_ORPHAN, ff->stack_self); + // Before switching into the scheduling fiber, we should have + // already taken care of deallocating the current + // fiber. + CILK_ASSERT(NULL == ff->fiber_self); - /* if (ff->stack_self) see above comment */ { - __cilkrts_stack *s = ff->stack_self; - ff->stack_self = NULL; - __cilkrts_release_stack(w, s); - } - - // Update the frame's pedigree information if this is an ABI 1 or later - // frame + // Update the frame's pedigree information if this is an ABI 1 + // or later frame if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) { sf->parent_pedigree.rank = w->pedigree.rank; @@ -1666,16 +2240,43 @@ static void do_sync(__cilkrts_worker *w, full_frame *ff, } /* the decjoin() occurs in provably_good_steal() */ - abandoned = provably_good_steal(w, ff); - + steal_result = provably_good_steal(w, ff); + } END_WITH_FRAME_LOCK(w, ff); + // set w->l->frame_ff = NULL after checking abandoned + if (WAIT_FOR_CONTINUE != steal_result) { + w->l->frame_ff = NULL; + } } END_WITH_WORKER_LOCK_OPTIONAL(w); } STOP_INTERVAL(w, INTERVAL_SYNC_CHECK); + // Now, if we are in a replay situation and provably_good_steal() returned + // WAIT_FOR_CONTINUE, we should sleep, reacquire locks, call + // provably_good_steal(), and release locks until we get a value other + // than WAIT_FOR_CONTINUE from the function. +#ifdef CILK_RECORD_REPLAY + // We don't have to explicitly check for REPLAY_LOG below because + // steal_result can only be set to WAIT_FOR_CONTINUE during replay + while(WAIT_FOR_CONTINUE == steal_result) + { + __cilkrts_sleep(); + BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) + { + ff = w->l->frame_ff; + BEGIN_WITH_FRAME_LOCK(w, ff) + { + steal_result = provably_good_steal(w, ff); + } END_WITH_FRAME_LOCK(w, ff); + if (WAIT_FOR_CONTINUE != steal_result) + w->l->frame_ff = NULL; + } END_WITH_WORKER_LOCK_OPTIONAL(w); + } +#endif // CILK_RECORD_REPLAY + #ifdef ENABLE_NOTIFY_ZC_INTRINSIC // If we can't make any further progress on this thread, tell Inspector // that we're abandoning the work and will go find something else to do. - if (abandoned) + if (ABANDON_EXECUTION == steal_result) { __notify_zc_intrinsic("cilk_sync_abandon", 0); } @@ -1690,15 +2291,38 @@ static void do_sync(__cilkrts_worker *w, full_frame *ff, purposes. */ void __cilkrts_promote_own_deque(__cilkrts_worker *w) { + // Remember the fiber we start this method on. + CILK_ASSERT(w->l->frame_ff); + cilk_fiber* starting_fiber = w->l->frame_ff->fiber_self; + BEGIN_WITH_WORKER_LOCK(w) { while (dekker_protocol(w)) { - /* PLACEHOLDER_STACK is used as non-null marker to tell detach() + /* PLACEHOLDER_FIBER is used as non-null marker to tell detach() and make_child() that this frame should be treated as a spawn parent, even though we have not assigned it a stack. */ - detach_for_steal(w, w, PLACEHOLDER_STACK); - + detach_for_steal(w, w, PLACEHOLDER_FIBER); } } END_WITH_WORKER_LOCK(w); + + + // TBD: The management of full frames and fibers is a bit + // sketchy here. We are promoting stack frames into full frames, + // and pretending they are stolen away, but no other worker is + // actually working on them. Some runtime invariants + // may be broken here. + // + // Technically, if we are simulating a steal from w + // w should get a new full frame, but + // keep the same fiber. A real thief would be taking the + // loot frame away, get a new fiber, and starting executing the + // loot frame. + // + // What should a fake thief do? Where does the frame go? + + // In any case, we should be finishing the promotion process with + // the same fiber with. + CILK_ASSERT(w->l->frame_ff); + CILK_ASSERT(w->l->frame_ff->fiber_self == starting_fiber); } @@ -1717,6 +2341,7 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w, full_frame *ff; int stolen_p; __cilkrts_stack_frame *saved_sf = NULL; + START_INTERVAL(w, INTERVAL_THE_EXCEPTION_CHECK); BEGIN_WITH_WORKER_LOCK(w) { @@ -1761,6 +2386,13 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w, { w = execute_reductions_for_spawn_return(w, ff, returning_sf); + // "Mr. Policeman? My parent always told me that if I was in trouble + // I should ask a nice policeman for help. I can't find my parent + // anywhere..." + // + // Write a record to the replay log for an attempt to return to a stolen parent + replay_record_orphaned(w); + // Update the pedigree only after we've finished the // reductions. update_pedigree_on_leave_frame(w, returning_sf); @@ -1772,9 +2404,9 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w, __notify_zc_intrinsic("cilk_leave_stolen", saved_sf); #endif // defined ENABLE_NOTIFY_ZC_INTRINSIC - DBGPRINTF ("%d-%p: longjmp_into_runtime from __cilkrts_c_THE_exception_check\n", w->self, GetWorkerFiber(w)); + DBGPRINTF ("%d: longjmp_into_runtime from __cilkrts_c_THE_exception_check\n", w->self); longjmp_into_runtime(w, do_return_from_spawn, 0); - DBGPRINTF ("%d-%p: returned from longjmp_into_runtime from __cilkrts_c_THE_exception_check?!\n", w->self, GetWorkerFiber(w)); + DBGPRINTF ("%d: returned from longjmp_into_runtime from __cilkrts_c_THE_exception_check?!\n", w->self); } else { @@ -1803,44 +2435,47 @@ static void do_return_from_spawn(__cilkrts_worker *w, __cilkrts_stack_frame *sf) { full_frame *parent_ff; + enum provably_good_steal_t steal_result = ABANDON_EXECUTION; + BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) { CILK_ASSERT(ff); CILK_ASSERT(!ff->is_call_child); - CILK_ASSERT(ff == w->l->frame_ff); CILK_ASSERT(sf == NULL); parent_ff = ff->parent; BEGIN_WITH_FRAME_LOCK(w, ff) { - if( ff->stack_self ) - { - // Notify TBB that we're returning from a spawn and orphaning - // the stack. We'll re-adopt it if we continue - __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_ORPHAN, - ff->stack_self); - } decjoin(ff); } END_WITH_FRAME_LOCK(w, ff); BEGIN_WITH_FRAME_LOCK(w, parent_ff) { - __cilkrts_stack* stack_to_free = w->l->stack_to_free; - w->l->stack_to_free = NULL; - w->l->frame_ff = NULL; - - if (stack_to_free) { - __cilkrts_release_stack(w, stack_to_free); - } - ff->stack_self = NULL; - - if (parent_ff->simulated_stolen) { + if (parent_ff->simulated_stolen) unconditional_steal(w, parent_ff); - } - else { - provably_good_steal(w, parent_ff); - } + else + steal_result = provably_good_steal(w, parent_ff); } END_WITH_FRAME_LOCK(w, parent_ff); } END_WITH_WORKER_LOCK_OPTIONAL(w); + // Loop here in replay mode +#ifdef CILK_RECORD_REPLAY + // We don't have to explicitly check for REPLAY_LOG below because + // steal_result can only get set to WAIT_FOR_CONTINUE during replay. + // We also don't have to worry about the simulated_stolen flag + // because steal_result can only be set to WAIT_FOR_CONTINUE by + // provably_good_steal(). + while(WAIT_FOR_CONTINUE == steal_result) + { + __cilkrts_sleep(); + BEGIN_WITH_WORKER_LOCK_OPTIONAL(w) + { + BEGIN_WITH_FRAME_LOCK(w, parent_ff) + { + steal_result = provably_good_steal(w, parent_ff); + } END_WITH_FRAME_LOCK(w, parent_ff); + } END_WITH_WORKER_LOCK_OPTIONAL(w); + } +#endif // CILK_RECORD_REPLAY + // Cleanup the child frame. __cilkrts_destroy_full_frame(w, ff); return; @@ -1897,6 +2532,22 @@ __cilkrts_stack_frame *__cilkrts_pop_tail(__cilkrts_worker *w) return sf; } +#ifdef CILK_RECORD_REPLAY +__cilkrts_stack_frame *simulate_pop_tail(__cilkrts_worker *w) +{ + __cilkrts_stack_frame *sf; + BEGIN_WITH_WORKER_LOCK(w) { + if (w->head < w->tail) { + sf = *(w->tail-1); + } else { + sf = 0; + } + } END_WITH_WORKER_LOCK(w); + return sf; +} +#endif + + /* Return from a call, not a spawn. */ void __cilkrts_return(__cilkrts_worker *w) { @@ -1977,7 +2628,6 @@ static void __cilkrts_unbind_thread() STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER); } - __cilkrts_sysdep_unbind_thread(w); __cilkrts_set_tls_worker(0); if (w->self == -1) { @@ -2023,10 +2673,39 @@ void __cilkrts_c_return_from_initial(__cilkrts_worker *w) CILK_ASSERT(ff->join_counter == 1); w->l->frame_ff = 0; - CILK_ASSERT(ff->stack_self); + CILK_ASSERT(ff->fiber_self); // Save any TBB interop data for the next time this thread enters Cilk - tbb_interop_save_info_from_stack(ff->stack_self); - sysdep_destroy_user_stack(ff->stack_self); + cilk_fiber_tbb_interop_save_info_from_stack(ff->fiber_self); + + // Deallocate cilk_fiber that mapped to the user stack. The stack + // itself does not get deallocated (of course) but our data + // structure becomes divorced from it. + +#if FIBER_DEBUG >= 1 + fprintf(stderr, "ThreadId=%p: w=%d: We are about to deallocate ff->fiber_self = %p here. w->l->scheduling_fiber = %p. w->l->type = %d\n", + cilkos_get_current_thread_id(), + w->self, + ff->fiber_self, + w->l->scheduling_fiber, + w->l->type); +#endif + // The fiber in ff is a user-code fiber. The fiber in + // w->l->scheduling_fiber is a scheduling fiber. These fibers should + // never be equal. When a user worker returns (and will unbind), we + // should destroy only the fiber in ff. The scheduling fiber will be + // re-used. + + CILK_ASSERT(ff->fiber_self != w->l->scheduling_fiber); + + START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE) { + // This fiber might not be deallocated here if there + // is a pending exception on Windows that refers + // to this fiber. + // + // First "suspend" the fiber, and then try to delete it. + cilk_fiber_deallocate_from_thread(ff->fiber_self); + } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE); + ff->fiber_self = NULL; /* Save reducer map into global_state object */ rm = w->reducer_map; @@ -2052,9 +2731,25 @@ void __cilkrts_c_return_from_initial(__cilkrts_worker *w) __cilkrts_destroy_reducer_map(w, rm); } + +#if FIBER_DEBUG >= 1 + __cilkrts_worker* tmp = w; + int tmp_id = w->self; + fprintf(stderr, "w=%d: We are about unbind thread (w= %p)\n", + w->self, + w); +#endif + w = NULL; + __cilkrts_unbind_thread(); +#if FIBER_DEBUG >= 1 + + fprintf(stderr, "w=%p, %d: Finished unbind\n", + tmp, tmp_id); +#endif + /* Other workers will stop trying to steal if this was the last worker. */ return; @@ -2128,62 +2823,75 @@ __cilkrts_worker *make_worker(global_state_t *g, w->l = (local_state *)__cilkrts_malloc(sizeof(*w->l)); - __cilkrts_init_stats(&w->l->stats); - __cilkrts_frame_malloc_per_worker_init(w); + w->reducer_map = NULL; + w->current_stack_frame = NULL; + w->reserved = NULL; + w->l->worker_magic_0 = WORKER_MAGIC_0; + w->l->team = NULL; + w->l->type = WORKER_FREE; + __cilkrts_mutex_init(&w->l->lock); __cilkrts_mutex_init(&w->l->steal_lock); w->l->do_not_steal = 0; w->l->frame_ff = 0; + w->l->next_frame_ff = 0; + w->l->last_full_frame = NULL; + w->l->ltq = (__cilkrts_stack_frame **) __cilkrts_malloc(g->ltqsize * sizeof(*w->l->ltq)); w->ltq_limit = w->l->ltq + g->ltqsize; - - w->l->original_pedigree_leaf = NULL; + w->head = w->tail = w->l->ltq; + cilk_fiber_pool_init(&w->l->fiber_pool, + &g->fiber_pool, + g->stack_size, + g->fiber_pool_size, + 0, // alloc_max is 0. We don't allocate from the heap directly without checking the parent pool. + 0); +#if FIBER_DEBUG >= 2 + fprintf(stderr, "ThreadId=%p: Making w=%d (%p), pool = %p\n", + cilkos_get_current_thread_id(), + w->self, w, + &w->l->fiber_pool); +#endif + w->l->scheduling_fiber = NULL; + w->l->original_pedigree_leaf = NULL; w->l->rand_seed = 0; /* the scheduler will overwrite this field */ - w->l->next_frame_ff = 0; - __cilkrts_init_stack_cache(w, &w->l->stack_cache, g->stack_cache_size); - - w->head = w->tail = w->l->ltq; - - w->reducer_map = NULL; - - w->current_stack_frame = NULL; - - w->l->pending_exception = NULL; - w->l->worker_magic_1 = WORKER_MAGIC_1; w->l->post_suspend = 0; w->l->suspended_stack = 0; - w->l->stack_to_free = NULL; + w->l->fiber_to_free = NULL; + w->l->pending_exception = NULL; +#if CILK_PROFILE + w->l->stats = __cilkrts_malloc(sizeof(statistics)); + __cilkrts_init_stats(w->l->stats); +#else + w->l->stats = NULL; +#endif w->l->steal_failure_count = 0; - w->l->team = NULL; - w->l->last_full_frame = NULL; - - w->l->scheduler_stack = NULL; + w->l->work_stolen = 0; + // Initialize record/replay assuming we're doing neither + w->l->record_replay_fptr = NULL; + w->l->replay_list_root = NULL; + w->l->replay_list_entry = NULL; w->l->signal_node = NULL; + // Nothing's been stolen yet + w->l->worker_magic_1 = WORKER_MAGIC_1; - w->reserved = NULL; /*w->parallelism_disabled = 0;*/ // Allow stealing all frames. Sets w->saved_protected_tail __cilkrts_restore_stealing(w, w->ltq_limit); - - w->l->type = WORKER_FREE; - w->l->user_thread_imported = 0; - - // Nothing's been stolen yet - w->l->work_stolen = 0; - + __cilkrts_init_worker_sysdep(w); - reset_THE_exception(w); + reset_THE_exception(w); return w; } @@ -2192,13 +2900,39 @@ void destroy_worker(__cilkrts_worker *w) { CILK_ASSERT (NULL == w->l->pending_exception); - /* Free any cached stack. */ - __cilkrts_destroy_stack_cache(w, w->g, &w->l->stack_cache); + // Deallocate the scheduling fiber + if (NULL != w->l->scheduling_fiber) + { + // The scheduling fiber is the main fiber for system workers and must + // be deallocated by the thread that created it. Thus, we can + // deallocate only free workers' (formerly user workers) scheduling + // fibers here. + CILK_ASSERT(WORKER_FREE == w->l->type); + +#if FIBER_DEBUG >=1 + fprintf(stderr, "ThreadId=%p, w=%p, %d, deallocating scheduling fiber = %p, \n", + cilkos_get_current_thread_id(), + w, + w->self, + w->l->scheduling_fiber); +#endif + int ref_count = cilk_fiber_remove_reference(w->l->scheduling_fiber, NULL); + // Scheduling fiber should never have extra references because of exceptions. + CILK_ASSERT(0 == ref_count); + w->l->scheduling_fiber = NULL; + } - if (w->l->scheduler_stack) { - sysdep_destroy_tiny_stack(w->l->scheduler_stack); - w->l->scheduler_stack = NULL; +#if CILK_PROFILE + if (w->l->stats) { + __cilkrts_free(w->l->stats); } +#else + CILK_ASSERT(NULL == w->l->stats); +#endif + + /* Free any cached fibers. */ + cilk_fiber_pool_destroy(&w->l->fiber_pool); + __cilkrts_destroy_worker_sysdep(w); if (w->l->signal_node) { @@ -2210,6 +2944,7 @@ void destroy_worker(__cilkrts_worker *w) __cilkrts_mutex_destroy(0, &w->l->lock); __cilkrts_mutex_destroy(0, &w->l->steal_lock); __cilkrts_frame_malloc_per_worker_cleanup(w); + __cilkrts_free(w->l); // The caller is responsible for freeing the worker memory @@ -2243,6 +2978,9 @@ void __cilkrts_deinit_internal(global_state_t *g) w->l->frame_ff = 0; } + // Release any resources used for record/replay + replay_term(g); + // Destroy any system dependent global state __cilkrts_destroy_global_sysdep(g); @@ -2253,8 +2991,10 @@ void __cilkrts_deinit_internal(global_state_t *g) __cilkrts_free(g->workers[0]); __cilkrts_free(g->workers); - __cilkrts_destroy_stack_cache(0, g, &g->stack_cache); + + cilk_fiber_pool_destroy(&g->fiber_pool); __cilkrts_frame_malloc_global_cleanup(g); + cilkg_deinit_global_state(); } @@ -2353,6 +3093,8 @@ static enum schedule_t worker_runnable(__cilkrts_worker *w) return SCHEDULE_RUN; } + + // Initialize the worker structs, but don't start the workers themselves. static void init_workers(global_state_t *g) { @@ -2364,8 +3106,15 @@ static void init_workers(global_state_t *g) } *workers_memory; /* not needed if only one worker */ - __cilkrts_init_stack_cache(0, &g->stack_cache, - 2*total_workers * g->global_stack_cache_size); + cilk_fiber_pool_init(&g->fiber_pool, + NULL, + g->stack_size, + g->global_fiber_pool_size, // buffer_size + g->max_stacks, // maximum # to allocate + 1); + + cilk_fiber_pool_set_fiber_limit(&g->fiber_pool, + (g->max_stacks ? g->max_stacks : INT_MAX)); g->workers = (__cilkrts_worker **) __cilkrts_malloc(total_workers * sizeof(*g->workers)); @@ -2395,7 +3144,6 @@ static void init_workers(global_state_t *g) void __cilkrts_init_internal(int start) { - int i; global_state_t *g = NULL; if (cilkg_is_published()) { @@ -2416,7 +3164,7 @@ void __cilkrts_init_internal(int start) g = cilkg_init_global_state(); // Set the scheduler pointer - g->scheduler = &__cilkrts_scheduler; + g->scheduler = worker_scheduler_function; // If we're running under a sequential P-Tool (Cilkscreen or // Cilkview) then there's only one worker and we need to tell @@ -2425,9 +3173,13 @@ void __cilkrts_init_internal(int start) __cilkrts_establish_c_stack(); init_workers(g); + // Initialize per-work record/replay logging + replay_init_workers(g); + // Initialize any system dependent global state __cilkrts_init_global_sysdep(g); + cilkg_publish_global_state(g); } @@ -2575,14 +3327,17 @@ void __cilkrts_init_internal(int start) *****************************************************************/ -// Struct storing pointers to the fields in our "left" sibling -// that we should update when splicing out a full frame or stalling at -// a sync. +/** + * @brief Locations to store the result of a reduction. + * + * Struct storing pointers to the fields in our "left" sibling that we + * should update when splicing out a full frame or stalling at a sync. + */ typedef struct { - // A pointer to the location of our left reducer map. + /** A pointer to the location of our left reducer map. */ struct cilkred_map **map_ptr; - // A pointer to the location of our left exception. + /** A pointer to the location of our left exception. */ struct pending_exception_info **exception_ptr; } splice_left_ptrs; @@ -2650,8 +3405,8 @@ splice_left_ptrs compute_left_ptrs_for_sync(__cilkrts_worker *w, * 1. Perform the "reduction" on stacks, i.e., execute the left * holder logic to pass the leftmost stack up. * - * w->l->stack_to_free holds any stack that needs to be freed - * after control longjmps into the runtime. + * w->l->fiber_to_free holds any stack that needs to be freed + * when control switches into the runtime fiber. * * 2. Unlink and remove child_ff from the tree of full frames. * @@ -2664,29 +3419,26 @@ void finish_spawn_return_on_user_stack(__cilkrts_worker *w, full_frame *parent_ff, full_frame *child_ff) { - CILK_ASSERT(w->l->stack_to_free == NULL); - + CILK_ASSERT(w->l->fiber_to_free == NULL); + // Execute left-holder logic for stacks. - if (child_ff->left_sibling || parent_ff->stack_child) { + if (child_ff->left_sibling || parent_ff->fiber_child) { // Case where we are not the leftmost stack. - CILK_ASSERT(parent_ff->stack_child != child_ff->stack_self); + CILK_ASSERT(parent_ff->fiber_child != child_ff->fiber_self); - // Remember any stack we need to free in the worker. + // Remember any fiber we need to free in the worker. // After we jump into the runtime, we will actually do the // free. - w->l->stack_to_free = child_ff->stack_self; + w->l->fiber_to_free = child_ff->fiber_self; } else { - // We are leftmost, pass stack up to parent. - // Thus, no stack to free. - parent_ff->stack_child = child_ff->stack_self; - w->l->stack_to_free = NULL; + // We are leftmost, pass stack/fiber up to parent. + // Thus, no stack/fiber to free. + parent_ff->fiber_child = child_ff->fiber_self; + w->l->fiber_to_free = NULL; } - // We cannot NULL this out yet. Importing a user worker on Windows - // depends on this field in the full_frame being valid in - // __cilkrts_sysdep_import_user_thread() -// child_ff->stack_self = NULL; + child_ff->fiber_self = NULL; unlink_child(parent_ff, child_ff); } @@ -2727,7 +3479,6 @@ fast_path_reductions_for_spawn_return(__cilkrts_worker *w, full_frame *ff) { // ASSERT: we hold ff->parent->lock. - full_frame *parent_ff = ff->parent; splice_left_ptrs left_ptrs; CILK_ASSERT(NULL == w->l->pending_exception); @@ -3137,6 +3888,7 @@ execute_reductions_for_sync(__cilkrts_worker *w, // we start any reductions, since the reductions might push more // data onto the stack. CILK_ASSERT(sf_at_sync->flags | CILK_FRAME_STOLEN); + __cilkrts_put_stack(ff, sf_at_sync); __cilkrts_make_unrunnable_sysdep(w, ff, sf_at_sync, 1, "execute_reductions_for_sync"); @@ -3172,6 +3924,10 @@ execute_reductions_for_sync(__cilkrts_worker *w, ff->call_stack = sf_at_sync; sf_at_sync->flags |= CILK_FRAME_SUSPENDED; + // At a nontrivial sync, we should always free the current fiber, + // because it can not be leftmost. + w->l->fiber_to_free = ff->fiber_self; + ff->fiber_self = NULL; return w; } diff --git a/libcilkrts/runtime/scheduler.h b/libcilkrts/runtime/scheduler.h index b0cc2797621..d7c3b1340df 100644 --- a/libcilkrts/runtime/scheduler.h +++ b/libcilkrts/runtime/scheduler.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -44,19 +49,34 @@ #include "reducer_impl.h" #include "global_state.h" +#ifdef CILK_RECORD_REPLAY +#include "record-replay.h" +#endif + __CILKRTS_BEGIN_EXTERN_C -// Set to 0 to allow parallel reductions. + +/** + * @brief Flag to disable parallel reductions. + * + * Set to 0 to allow parallel reductions. + */ #define DISABLE_PARALLEL_REDUCERS 0 + +/** + * @brief Debugging level for parallel reductions. + * + * Print debugging messages and assertions for parallel reducers. 0 is + * no debugging. A higher value generates more output. + */ #define REDPAR_DEBUG 0 /** - * Lock the worker mutex to allow exclusive access to the values in the - * __cilkrts_worker and local_state structures. + * @brief Lock the worker mutex to allow exclusive access to the + * values in the @c __cilkrts_worker and local_state structures. * - * Preconditions: - * - local_state.don_not_steal must not be set. Essentially this asserts - * that the worker is not locked recursively. + * @pre @c w->l->do_not_steal must not be set. Essentially this + * condition asserts that the worker is not locked recursively. * * @param w The worker to lock. */ @@ -64,11 +84,10 @@ COMMON_PORTABLE void __cilkrts_worker_lock(__cilkrts_worker *w); /** - * Unlock the worker mutex. + * @brief Unlock the worker mutex. * - * Preconditions: - * - local_state.don_not_steal must be set. Essentially this asserts - * that the worker has been previously locked. + * @pre @c w->l->do_not_steal must be set. Essentially this condition + * asserts that the worker has been previously locked. * * @param w The worker to unlock. */ @@ -76,8 +95,8 @@ COMMON_PORTABLE void __cilkrts_worker_unlock(__cilkrts_worker *w); /** - * Push the next full frame to be made active in this worker and increment - * its join counter. + * @brief Push the next full frame to be made active in this worker + * and increment its join counter. * * __cilkrts_push_next_frame and pop_next_frame work on a one-element queue. * This queue is used to communicate across the runtime from the code that @@ -86,9 +105,9 @@ void __cilkrts_worker_unlock(__cilkrts_worker *w); * counter but pop does not decrement it. Rather, a single push/pop * combination makes a frame active and increments its join counter once. * - * Note that a system worker may chose to push work onto a user worker if - * the work is the continuation from a sync which only the user worker may - * complete. + * @note A system worker may chose to push work onto a user worker if + * the work is the continuation from a sync which only the user worker + * may complete. * * @param w The worker which the frame is to be pushed onto. * @param ff The full_frame which is to be continued by the worker. @@ -98,11 +117,14 @@ void __cilkrts_push_next_frame(__cilkrts_worker *w, full_frame *ff); /** - * Sync on this worker. If this is the last worker to reach the sync, - * execution may resume on this worker after the sync. If this is not - * the last spawned child to reach the sync, then execution is suspended - * and the worker will re-enter the scheduling loop, looking for work - * it can steal. + * @brief Sync on this worker. + * + * If this worker is the last to reach the sync, execution may resume + * on this worker after the sync. + * + * If this worker is not the last spawned child to reach the sync, + * then execution is suspended and the worker will re-enter the + * scheduling loop, looking for work it can steal. * * This function will jump into the runtime to switch to the scheduling * stack to implement most of its logic. @@ -115,13 +137,14 @@ NORETURN __cilkrts_c_sync(__cilkrts_worker *w, __cilkrts_stack_frame *sf); /** - * Worker W completely promotes its own deque, simulating the case - * where the whole deque is stolen. We use this mechanism to force - * the allocation of new storage for reducers for race-detection - * purposes. + * @brief Worker @c w completely promotes its own deque, simulating the case + * where the whole deque is stolen. * - * This is called from the reducer lookup logic when g->force_reduce - * is set. + * We use this mechanism to force the allocation of new storage for + * reducers for race-detection purposes. + * + * This method is called from the reducer lookup logic when + * @c g->force_reduce is set. * * @warning Use of "force_reduce" is known to have bugs when run with * more than 1 worker. @@ -133,10 +156,22 @@ COMMON_PORTABLE void __cilkrts_promote_own_deque(__cilkrts_worker *w); /** - * Called when a function attempts to return from a spawn and the - * parent has been stolen. While this function can return, it - * will most likely jump into the runtime to switch onto the - * scheduling stack to execute do_return_from_spawn(). + * Called when a spawned function attempts to return and + * __cilkrts_undo_detach() fails. This can happen for two reasons: + * + * @li If another worker is considering stealing our parent, it bumps the + * exception pointer while it did so, which will cause __cilkrts_undo_detach() + * to fail. If the other worker didn't complete the steal of our parent, we + * still may be able to return to it, either because the steal attempt failed, + * or we won the race for the tail pointer. + * + * @li If the function's parent has been stolen then we cannot return. Instead + * we'll longjmp into the runtime to switch onto the scheduling stack to + * execute do_return_from_spawn() and determine what to do. Either this + * worker is the last one to the sync, in which case we need to jump to the + * sync, or this worker is not the last one to the sync, in which case we'll + * abandon this work and jump to the scheduling loop to search for more work + * we can steal. * * @param w The worker which attempting to return from a spawn to * a stolen parent. @@ -147,22 +182,27 @@ void __cilkrts_c_THE_exception_check(__cilkrts_worker *w, __cilkrts_stack_frame *returning_sf); /** + * @brief Return an exception to an stolen parent. + * * Used by the gcc implementation of exceptions to return an exception * to a stolen parent * * @param w The worker which attempting to return from a spawn with an * exception to a stolen parent. + * @param returning_sf The stack frame which is returning. */ COMMON_PORTABLE NORETURN __cilkrts_exception_from_spawn(__cilkrts_worker *w, __cilkrts_stack_frame *returning_sf); /** - * Used by the Windows implementations of exceptions to migrate an exception - * across fibers. Call this function when an exception has been thrown and - * has to traverse across a steal. The exception has already been wrapped up, - * so all that remains is to longjmp() into the continuation, sync, and - * re-raise it. + * @brief Used by the Windows implementations of exceptions to migrate an exception + * across fibers. + * + * Call this function when an exception has been thrown and has to + * traverse across a steal. The exception has already been wrapped + * up, so all that remains is to longjmp() into the continuation, + * sync, and re-raise it. * * @param sf The __cilkrts_stack_frame for the frame that is attempting to * return an exception to a stolen parent. @@ -170,7 +210,8 @@ NORETURN __cilkrts_exception_from_spawn(__cilkrts_worker *w, void __cilkrts_migrate_exception (__cilkrts_stack_frame *sf); /** - * Return from a call, not a spawn, where this frame has ever been stolen. + * @brief Return from a call, not a spawn, where this frame has ever + * been stolen. * * @param w The worker that is returning from a frame which was ever stolen. */ @@ -178,16 +219,18 @@ COMMON_PORTABLE void __cilkrts_return(__cilkrts_worker *w); /** - * Special return from the initial frame. Will be called from - * __cilkrts_leave_frame if CILK_FRAME_LAST is set. + * @brief Special return from the initial frame. + * + * This method will be called from @c __cilkrts_leave_frame if + * @c CILK_FRAME_LAST is set. * * This function will do the things necessary to cleanup, and unbind the * thread from the Intel Cilk Plus runtime. If this is the last user * worker unbinding from the runtime, all system worker threads will be * suspended. * - * Preconditions: - * - This must be a user worker. + * @pre @c w must be the currently executing worker, and must be a user + * worker. * * @param w The worker that's returning from the initial frame. */ @@ -195,7 +238,8 @@ COMMON_PORTABLE void __cilkrts_c_return_from_initial(__cilkrts_worker *w); /** - * Used by exception handling code to pop an entry from the worker's deque. + * @brief Used by exception handling code to pop an entry from the + * worker's deque. * * @param w Worker to pop the entry from * @@ -206,7 +250,8 @@ COMMON_PORTABLE __cilkrts_stack_frame *__cilkrts_pop_tail(__cilkrts_worker *w); /** - * Modifies the worker's protected_tail to prevent frames from being stolen. + * @brief Modifies the worker's protected_tail to prevent frames from + * being stolen. * * The Dekker protocol has been extended to only steal if head+1 is also * less than protected_tail. @@ -223,8 +268,8 @@ __cilkrts_stack_frame *volatile *__cilkrts_disallow_stealing( __cilkrts_stack_frame *volatile *new_protected_tail); /** - * Restores the protected tail to a previous state, possibly allowing frames - * to be stolen. + * @brief Restores the protected tail to a previous state, possibly + * allowing frames to be stolen. * * @param w The worker to be modified. * @param saved_protected_tail A previous setting for protected_tail that is @@ -236,8 +281,10 @@ void __cilkrts_restore_stealing( __cilkrts_stack_frame *volatile *saved_protected_tail); /** - * Initialize a __cilkrts_worker. The memory for the worker must have been - * allocated outside this call. + * @brief Initialize a @c __cilkrts_worker. + * + * @note The memory for the worker must have been allocated outside + * this call. * * @param g The global_state_t. * @param self The index into the global_state's array of workers for this @@ -253,8 +300,10 @@ __cilkrts_worker *make_worker(global_state_t *g, __cilkrts_worker *w); /** - * Free up any resources allocated for a worker. The memory for the - * __cilkrts_worker itself must be deallocated outside this call. + * @brief Free up any resources allocated for a worker. + * + * @note The memory for the @c __cilkrts_worker itself must be + * deallocated outside this call. * * @param w The worker to be destroyed. */ @@ -262,8 +311,10 @@ COMMON_PORTABLE void destroy_worker (__cilkrts_worker *w); /** - * Initialize the runtime. If necessary, allocates and initializes the - * global state. If necessary, unsuspends the system workers. + * @brief Initialize the runtime. + * + * If necessary, allocates and initializes the global state. If + * necessary, unsuspends the system workers. * * @param start Specifies whether the workers are to be unsuspended if * they are suspended. Allows __cilkrts_init() to start up the runtime without @@ -273,8 +324,9 @@ COMMON_PORTABLE void __cilkrts_init_internal(int start); /** - * Part of the sequence to shutdown the runtime. Specifically frees the - * global_state_t for the runtime. + * @brief Part of the sequence to shutdown the runtime. + * + * Specifically, this call frees the @c global_state_t for the runtime. * * @param g The global_state_t. */ @@ -289,34 +341,49 @@ cilkred_map *__cilkrts_xchg_reducer( __cilkrts_worker *w, cilkred_map *newmap) cilk_nothrow; /** - * Called when a user thread is bound to the runtime. If this increments the - * count of bound user threads from 0 to 1, the system worker threads are - * unsuspended. + * @brief Called when a user thread is bound to the runtime. * - * @param g The runtime global state. + * If this action increments the count of bound user threads from 0 to + * 1, the system worker threads are unsuspended. + * + * If this action increments the count of bound user threads from 0 to + * 1, the system worker threads are unsuspended. * - * Preconditions: - * - Global lock must be held. + * @pre Global lock must be held. + * @param g The runtime global state. */ COMMON_PORTABLE void __cilkrts_enter_cilk(global_state_t *g); /** - * Called when a user thread is unbound from the runtime. If this decrements - * the count of bound user threads to 0, the system worker threads are - * suspended. + * @brief Called when a user thread is unbound from the runtime. + * + * If this action decrements the count of bound user threads to 0, the + * system worker threads are suspended. * - * @param g The runtime global state. * - * Preconditions: - * - Global lock must be held. + * @pre Global lock must be held. + * + * @param g The runtime global state. */ COMMON_PORTABLE void __cilkrts_leave_cilk(global_state_t *g); /** - * Prints out Cilk runtime statistics. + * @brief cilk_fiber_proc that runs the main scheduler loop on a + * user worker. + * + * @pre fiber's owner field should be set to the correct __cilkrts_worker + * @pre fiber must be a user worker. + * + * @param fiber The scheduling fiber object. + */ +void scheduler_fiber_proc_for_user_worker(cilk_fiber *fiber); + + +/** + * @brief Prints out Cilk runtime statistics. * * @param g The runtime global state. * @@ -326,6 +393,23 @@ void __cilkrts_leave_cilk(global_state_t *g); COMMON_PORTABLE void __cilkrts_dump_stats_to_stderr(global_state_t *g); +#ifdef CILK_RECORD_REPLAY +COMMON_PORTABLE +char * walk_pedigree_nodes(char *p, const __cilkrts_pedigree *pnode); + +/** + * @brief Used by exception handling code to simulate the popping of + * an entry from the worker's deque. + * + * @param w Worker whose deque we want to check + * + * @return @c __cilkrts_stack_frame of parent call + * @return NULL if the deque is empty + */ +COMMON_PORTABLE +__cilkrts_stack_frame *simulate_pop_tail(__cilkrts_worker *w); + +#endif __CILKRTS_END_EXTERN_C diff --git a/libcilkrts/runtime/signal_node.c b/libcilkrts/runtime/signal_node.c index bcce9dbf254..4743bc03cb8 100644 --- a/libcilkrts/runtime/signal_node.c +++ b/libcilkrts/runtime/signal_node.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * **************************************************************************/ @@ -57,14 +62,14 @@ * cilk_semaphore_t is implemented as an auto-reset event on Windows, and * as a semaphore_t on Linux and MacOS. */ -typedef struct signal_node_t +struct signal_node_t { /** 0 if the worker should wait, 1 if it should be running. */ volatile unsigned int run; /** OS-specific semaphore on which the worker can wait. */ cilk_semaphore_t sem; -} signal_node_t; +}; /******************************************************************************/ /* Semaphore-abstraction functions */ diff --git a/libcilkrts/runtime/signal_node.h b/libcilkrts/runtime/signal_node.h index 6b05234b6e6..d11b3d85cf7 100644 --- a/libcilkrts/runtime/signal_node.h +++ b/libcilkrts/runtime/signal_node.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -45,7 +50,7 @@ __CILKRTS_BEGIN_EXTERN_C -/* Opaque type. */ +/** Opaque type. */ typedef struct signal_node_t signal_node_t; /** diff --git a/libcilkrts/runtime/spin_mutex.c b/libcilkrts/runtime/spin_mutex.c new file mode 100644 index 00000000000..0a63ede7cba --- /dev/null +++ b/libcilkrts/runtime/spin_mutex.c @@ -0,0 +1,104 @@ +/* spin_mutex.c -*-C-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +#include "spin_mutex.h" +#include "bug.h" +#include "os.h" +#include "stats.h" + +// TBD (11/30/12): We should be doing a conditional test-xchg instead +// of an unconditional xchg operation for the spin mutex. + +/* m->lock == 1 means that mutex M is locked */ +#define TRY_ACQUIRE(m) (__cilkrts_xchg(&(m)->lock, 1) == 0) + +/* ICC 11.1+ understands release semantics and generates an + ordinary store with a software memory barrier. */ +#if __ICC >= 1110 +#define RELEASE(m) __sync_lock_release(&(m)->lock) +#else +#define RELEASE(m) __cilkrts_xchg(&(m)->lock, 0) +#endif + + +spin_mutex* spin_mutex_create() +{ + spin_mutex* mutex = (spin_mutex*)__cilkrts_malloc(sizeof(spin_mutex)); + spin_mutex_init(mutex); + return mutex; +} + +void spin_mutex_init(struct spin_mutex *m) +{ + // Use a simple assignment so Inspector doesn't bug us about the + // interlocked exchange doing a read of an uninitialized variable. + // By definition there can't be a race when we're initializing the + // lock... + m->lock = 0; +} + +void spin_mutex_lock(struct spin_mutex *m) +{ + int count; + const int maxspin = 1000; /* SWAG */ + if (!TRY_ACQUIRE(m)) { + count = 0; + do { + do { + __cilkrts_short_pause(); + if (++count >= maxspin) { + /* let the OS reschedule every once in a while */ + __cilkrts_yield(); + count = 0; + } + } while (m->lock != 0); + } while (!TRY_ACQUIRE(m)); + } +} + +int spin_mutex_trylock(struct spin_mutex *m) +{ + return TRY_ACQUIRE(m); +} + +void spin_mutex_unlock(struct spin_mutex *m) +{ + RELEASE(m); +} + +void spin_mutex_destroy(struct spin_mutex *m) +{ + __cilkrts_free(m); +} + +/* End spin_mutex.c */ diff --git a/libcilkrts/runtime/spin_mutex.h b/libcilkrts/runtime/spin_mutex.h new file mode 100644 index 00000000000..f5612b97c69 --- /dev/null +++ b/libcilkrts/runtime/spin_mutex.h @@ -0,0 +1,124 @@ +/* spin_mutex.h -*-C++-*- + * + ************************************************************************* + * + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. + **************************************************************************/ + +/** + * @file spin_mutex.h + * + * @brief Support for Cilk runtime mutexes. + * + * Cilk runtime mutexes are implemented as simple spin loops. + * + * This file is similar to a worker_mutex, except it does not have an + * owner field. + * + * TBD: This class, worker_mutex, and os_mutex overlap quite a bit in + * functionality. Can we unify these mutexes somehow? + */ +#ifndef INCLUDED_SPIN_MUTEX_DOT_H +#define INCLUDED_SPIN_MUTEX_DOT_H + +#include <cilk/common.h> +#include "rts-common.h" +#include "cilk_malloc.h" + +__CILKRTS_BEGIN_EXTERN_C + +/** + * Mutexes are treated as an abstract data type within the Cilk + * runtime system. They are implemented as simple spin loops. + */ +typedef struct spin_mutex { + /** Mutex spin loop variable. 0 if unowned, 1 if owned. */ + volatile int lock; + + /** Padding so the mutex takes up a cache line. */ + char pad[64/sizeof(int) - 1]; +} spin_mutex; + + +/** + * @brief Create a new Cilk spin_mutex. + * + * @return Returns an initialized spin mutex. + */ +COMMON_PORTABLE +spin_mutex* spin_mutex_create(); + +/** + * @brief Initialize a Cilk spin_mutex. + * + * @param m Spin_Mutex to be initialized. + */ +COMMON_PORTABLE +void spin_mutex_init(spin_mutex *m); + +/** + * @brief Acquire a Cilk spin_mutex. + * + * If statistics are being gathered, the time spent + * acquiring the spin_mutex will be attributed to the specified worker. + * + * @param m Spin_Mutex to be initialized. + */ +COMMON_PORTABLE +void spin_mutex_lock(struct spin_mutex *m); +/** + * @brief Attempt to lock a Cilk spin_mutex and fail if it isn't available. + * + * @param m Spin_Mutex to be acquired. + * + * @return 1 if the spin_mutex was acquired. + * @return 0 if the spin_mutex was not acquired. + */ +COMMON_PORTABLE +int spin_mutex_trylock(struct spin_mutex *m); + +/** + * @brief Release a Cilk spin_mutex. + * + * @param m Spin_Mutex to be released. + */ +COMMON_PORTABLE +void spin_mutex_unlock(struct spin_mutex *m); + +/** + * @brief Deallocate a Cilk spin_mutex. Currently does nothing. + * + * @param m Spin_Mutex to be deallocated. + */ +COMMON_PORTABLE +void spin_mutex_destroy(struct spin_mutex *m); + +__CILKRTS_END_EXTERN_C + +#endif // ! defined(INCLUDED_SPIN_MUTEX_DOT_H) diff --git a/libcilkrts/runtime/stacks.c b/libcilkrts/runtime/stacks.c deleted file mode 100644 index e8be685ac8f..00000000000 --- a/libcilkrts/runtime/stacks.c +++ /dev/null @@ -1,192 +0,0 @@ -/* stacks.c -*-C-*- - * - ************************************************************************* - * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. - **************************************************************************/ - -#include "stacks.h" -#include "sysdep.h" -#include "local_state.h" -#include "frame_malloc.h" -#include "cilk-tbb-interop.h" - -static void move_to_global(__cilkrts_worker *w, unsigned int until) -{ - __cilkrts_stack_cache *local = &w->l->stack_cache; - __cilkrts_stack_cache *global = &w->g->stack_cache; - - /* If the global cache appears to be full do not take out the lock. */ - if (global->n >= global->size) - return; - - __cilkrts_mutex_lock(w, &global->lock); - while (global->n < global->size && local->n > until) { - global->stacks[global->n++] = local->stacks[--local->n]; - } - __cilkrts_mutex_unlock(w, &global->lock); -} - -static void push(__cilkrts_worker *w, __cilkrts_stack *sd) -{ - __cilkrts_stack_cache *local = &w->l->stack_cache; - const unsigned int local_size = local->size; - - /* If room in local, push sd to local stack-of-stacks */ - if (local->n < local_size) { - local->stacks[local->n++] = sd; - return; - } - - if (local_size == 0) { - __cilkrts_free_stack(w->g, sd); - return; - } - - /* No room in local stack-of-stacks. - * Push half (round down) of the free stacks */ - move_to_global(w, local_size / 2); - - /* If some of the stacks didn't get moved (i.e., because the global - * stack-of-stacks is full), then permanently destroy some stacks until we - * are back down to half */ - while (local->n > local_size / 2) - __cilkrts_free_stack(w->g, local->stacks[--local->n]); - - /* Push the stack onto our local stack-of-stacks */ - local->stacks[local->n++] = sd; - return; -} - -static __cilkrts_stack *pop(__cilkrts_worker *w) -{ - __cilkrts_stack_cache *local = &w->l->stack_cache; - __cilkrts_stack_cache *global = &w->g->stack_cache; - __cilkrts_stack *sd = 0; - if (local->n > 0) - return local->stacks[--local->n]; - if (global->n > 0) { - __cilkrts_mutex_lock(w, &global->lock); - if (global->n > 0) - sd = global->stacks[--global->n]; - __cilkrts_mutex_unlock(w, &global->lock); - } - return sd; -} - -#ifdef _WIN32 -# include "stacks-win.h" -# define okay_to_release(stack) (0 == (stack)->outstanding_references) -#else -# define okay_to_release(stack) (1) -#endif // _WIN32 - -void __cilkrts_release_stack(__cilkrts_worker *w, - __cilkrts_stack *sd) -{ - START_INTERVAL(w, INTERVAL_FREE_STACK); - if (sd && okay_to_release(sd)) { - __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_RELEASE,sd); - push(w, sd); - } - STOP_INTERVAL(w, INTERVAL_FREE_STACK); - return; -} - -__cilkrts_stack *__cilkrts_get_stack(__cilkrts_worker *w) -{ - __cilkrts_stack *sd; - - START_INTERVAL(w, INTERVAL_ALLOC_STACK); - sd = pop (w); - if (sd == NULL) - sd = __cilkrts_make_stack(w); - else - __cilkrts_sysdep_reset_stack(sd); - STOP_INTERVAL(w, INTERVAL_ALLOC_STACK); - return sd; -} - -static void flush(global_state_t *g, - __cilkrts_stack_cache *c) -{ - /*START_INTERVAL(w, INTERVAL_FREE_STACK);*/ - while (c->n > 0) - __cilkrts_free_stack(g, c->stacks[--c->n]); - /*STOP_INTERVAL(w, INTERVAL_FREE_STACK);*/ -} - -void __cilkrts_init_stack_cache(__cilkrts_worker *w, - __cilkrts_stack_cache *c, - unsigned int size) -{ - __cilkrts_mutex_init(&c->lock); - c->size = size; - c->n = 0; - c->stacks = __cilkrts_frame_malloc(w, size * sizeof(__cilkrts_stack *)); -#if 0 /* Causes problems on Linux due to generated call to intel_fast_memset */ - { - unsigned int i; - /* Not really needed -- only indices < n are valid */ - for (i = 0; i < size; i++) - c->stacks[i] = 0; - } -#else - if (size > 0) - c->stacks[0] = 0; -#endif -} - -void __cilkrts_destroy_stack_cache(__cilkrts_worker *w, - global_state_t *g, - __cilkrts_stack_cache *c) -{ - flush(g, c); - __cilkrts_frame_free(w, c->stacks, c->size * sizeof(__cilkrts_stack *)); - c->stacks = 0; - c->n = 0; - c->size = 0; - __cilkrts_mutex_destroy(w, &c->lock); -} - -/* Free all but one local stack, returning to the global pool if possible. */ - -void __cilkrts_trim_stack_cache(__cilkrts_worker *w) -{ - __cilkrts_stack_cache *local = &w->l->stack_cache; - - if (local->n <= 1) - return; - - START_INTERVAL(w, INTERVAL_FREE_STACK); - - move_to_global(w, 1); - - while (local->n > 1) - __cilkrts_free_stack(w->g, local->stacks[--local->n]); - - STOP_INTERVAL(w, INTERVAL_FREE_STACK); -} - -/* End stacks.c */ diff --git a/libcilkrts/runtime/stats.c b/libcilkrts/runtime/stats.c index a8c597437f2..2659ed5b25f 100644 --- a/libcilkrts/runtime/stats.c +++ b/libcilkrts/runtime/stats.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "stats.h" @@ -40,7 +45,8 @@ static const char *names[] = { /*[INTERVAL_IN_SCHEDULER]*/ "in scheduler", /*[INTERVAL_WORKING]*/ " of which: working", - /*[INTERVAL_STEALING]*/ " of which: stealing", + /*[INTERVAL_IN_RUNTIME]*/ " of which: in runtime", + /*[INTERVAL_STEALING]*/ " of which: stealing", /*[INTERVAL_STEAL_SUCCESS]*/ "steal success: detach", /*[INTERVAL_STEAL_FAIL_EMPTYQ]*/ "steal fail: empty queue", /*[INTERVAL_STEAL_FAIL_LOCK]*/ "steal fail: victim locked", @@ -64,15 +70,18 @@ static const char *names[] = { /*[INTERVAL_MUTEX_LOCK_SPINNING]*/ " spinning", /*[INTERVAL_MUTEX_LOCK_YIELDING]*/ " yielding", /*[INTERVAL_MUTEX_TRYLOCK]*/ "mutex trylock", - /*[INTERVAL_ALLOC_STACK]*/ "alloc stack", - /*[INTERVAL_FREE_STACK]*/ "free stack", + /*[INTERVAL_FIBER_ALLOCATE]*/ "fiber_allocate", + /*[INTERVAL_FIBER_DEALLOCATE]*/ "fiber_deallocate", + /*[INTERVAL_FIBER_ALLOCATE_FROM_THREAD]*/ "fiber_allocate_from_thread", + /*[INTERVAL_FIBER_DEALLOCATE_FROM_THREAD]*/ "fiber_deallocate (thread)", + /*[INTERVAL_SUSPEND_RESUME_OTHER]*/ "fiber suspend self + resume", + /*[INTERVAL_DEALLOCATE_RESUME_OTHER]*/ "fiber deallocate self + resume", }; #endif void __cilkrts_init_stats(statistics *s) { int i; - for (i = 0; i < INTERVAL_N; ++i) { s->start[i] = INVALID_START; s->accum[i] = 0; @@ -87,7 +96,7 @@ void __cilkrts_accum_stats(statistics *to, statistics *from) { int i; - for (i = 0; i < INTERVAL_N; ++i) { + for (i = 0; i < INTERVAL_N; ++i) { to->accum[i] += from->accum[i]; to->count[i] += from->count[i]; from->accum[i] = 0; @@ -102,7 +111,7 @@ void __cilkrts_accum_stats(statistics *to, statistics *from) void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i) { if (w) { - statistics *s = &w->l->stats; + statistics *s = w->l->stats; CILK_ASSERT(s->start[i] == INVALID_START); s->count[i]++; } @@ -111,7 +120,7 @@ void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i) void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i) { if (w) { - statistics *s = &w->l->stats; + statistics *s = w->l->stats; CILK_ASSERT(s->start[i] == INVALID_START); s->start[i] = __cilkrts_getticks(); s->count[i]++; @@ -121,7 +130,7 @@ void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i) void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i) { if (w) { - statistics *s = &w->l->stats; + statistics *s = w->l->stats; CILK_ASSERT(s->start[i] != INVALID_START); s->accum[i] += __cilkrts_getticks() - s->start[i]; s->start[i] = INVALID_START; diff --git a/libcilkrts/runtime/stats.h b/libcilkrts/runtime/stats.h index 98130d2463f..1fa0346b602 100644 --- a/libcilkrts/runtime/stats.h +++ b/libcilkrts/runtime/stats.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -39,6 +44,9 @@ #define INCLUDED_STATS_DOT_H /* #define CILK_PROFILE 1 */ +// @note The CILK_PROFILE flag and intervals is known to be broken +// in at least programs with Windows exceptions. +// Enable this flag at your own peril. :) #include <cilk/common.h> #include "rts-common.h" @@ -50,11 +58,12 @@ __CILKRTS_BEGIN_EXTERN_C -/** Events that we measure. */ +/** @brief Events that we measure. */ enum interval { - INTERVAL_IN_SCHEDULER, ///< Time spent in the scheduler + INTERVAL_IN_SCHEDULER, ///< Time threads spend "bound" to Cilk INTERVAL_WORKING, ///< Time spent working + INTERVAL_IN_RUNTIME, ///< Time spent executing runtime scheduling loop INTERVAL_STEALING, ///< Time spent stealing work INTERVAL_STEAL_SUCCESS, ///< Time to do a successful steal INTERVAL_STEAL_FAIL_EMPTYQ, ///< Count of steal failures due to lack of stealable work @@ -79,16 +88,21 @@ enum interval INTERVAL_MUTEX_LOCK_SPINNING, ///< Time spent spinning in __cilkrts_mutex_lock for a worker INTERVAL_MUTEX_LOCK_YIELDING, ///< Time spent yielding in __cilkrts_mutex_lock for a worker INTERVAL_MUTEX_TRYLOCK, ///< Count of calls to __cilkrts_mutex_trylock - INTERVAL_ALLOC_STACK, ///< Time spent allocating stacks - INTERVAL_FREE_STACK, ///< Time spent freeing stacks - + INTERVAL_FIBER_ALLOCATE, ///< Time spent calling cilk_fiber_allocate + INTERVAL_FIBER_DEALLOCATE, ///< Time spent calling cilk_fiber_deallocate (not from thread) + INTERVAL_FIBER_ALLOCATE_FROM_THREAD, ///< Time spent calling cilk_fiber_allocate_from_thread + INTERVAL_FIBER_DEALLOCATE_FROM_THREAD, ///< Time spent calling cilk_fiber_deallocate (from thread) + INTERVAL_SUSPEND_RESUME_OTHER, ///< Count of fiber suspend_self_and_resume_other + INTERVAL_DEALLOCATE_RESUME_OTHER, ///< Count of fiber deallocate_self_and_resume_other INTERVAL_N ///< Number of intervals, must be last }; /** - * Struct that collects of all runtime statistics. There is an instance of this - * structure in each worker's local_state, as well as one in the global_state_t - * which will be used to accumulate the per-worker stats. + * @brief Struct that collects of all runtime statistics. + * + * There is an instance of this structure in each worker's + * local_state, as well as one in the @c global_state_t which will be + * used to accumulate the per-worker stats. */ typedef struct statistics { @@ -115,26 +129,25 @@ typedef struct statistics /** * Initializes a statistics structure * - * @param to The statistics structure to initialize + * @param s The statistics structure to be initialized. */ COMMON_PORTABLE void __cilkrts_init_stats(statistics *s); /** - * Sums statistics from worker to the global struct + * @brief Sums statistics from worker to the global struct * - * @param to The statistics structure that will accumulate the information. - * This is g->stats. - * @param to The statistics structure that will be accumulated. This is the - * statistics kept per-worker. + * @param to The statistics structure that will accumulate the information. + * This structure is usually @c g->stats. + * @param from The statistics structure that will be accumulated. + * This structure is usually statistics kept per worker. */ COMMON_PORTABLE void __cilkrts_accum_stats(statistics *to, statistics *from); /** - * Mark the start of an interval by saving the current tick count. + * @brief Mark the start of an interval by saving the current tick count. * - * Precondition: - * - Start time == INVALID_START + * @pre Start time == INVALID_START * * @param w The worker we're accumulating stats for. * @param i The interval we're accumulating stats for. @@ -143,11 +156,10 @@ COMMON_PORTABLE void __cilkrts_start_interval(__cilkrts_worker *w, enum interval i); /** - * Mark the end of an interval by adding the ticks since the start to the - * accumulated time. + * @brief Mark the end of an interval by adding the ticks since the + * start to the accumulated time. * - * Precondition: - * - Start time != INVALID_START + * @pre Start time != INVALID_START * * @param w The worker we're accumulating stats for. * @param i The interval we're accumulating stats for. @@ -156,7 +168,7 @@ COMMON_PORTABLE void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i); /** - * Start and stop interval I, charging zero time against it + * @brief Start and stop interval I, charging zero time against it * * Precondition: * - Start time == INVALID_START @@ -167,15 +179,6 @@ void __cilkrts_stop_interval(__cilkrts_worker *w, enum interval i); COMMON_PORTABLE void __cilkrts_note_interval(__cilkrts_worker *w, enum interval i); - -/** - * Initialize an instance of the statistics structure - * - * @param s The statistics structure to be initialized. - */ -COMMON_PORTABLE -void __cilkrts_init_stats(statistics *s); - #ifdef CILK_PROFILE COMMON_PORTABLE void dump_stats_to_file(FILE *stat_file, statistics *s); diff --git a/libcilkrts/runtime/symbol_test.c b/libcilkrts/runtime/symbol_test.c index c5c8eb49a79..644bff62aa6 100644 --- a/libcilkrts/runtime/symbol_test.c +++ b/libcilkrts/runtime/symbol_test.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /* simple program to verify that there are no undefined symbols in the runtime. diff --git a/libcilkrts/runtime/sysdep-unix.c b/libcilkrts/runtime/sysdep-unix.c index 9b827502be5..b3a895a712a 100644 --- a/libcilkrts/runtime/sysdep-unix.c +++ b/libcilkrts/runtime/sysdep-unix.c @@ -3,28 +3,33 @@ * ************************************************************************* * - * Copyright (C) 2010-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2010-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. * ************************************************************************** */ @@ -48,6 +53,15 @@ #include "metacall_impl.h" +// On x86 processors (but not MIC processors), the compiler generated code to +// save the FP state (rounding mode and the like) before calling setjmp. We +// will need to restore that state when we resume. +#ifndef __MIC__ +# if defined(__i386__) || defined(__x86_64) +# define RESTORE_X86_FP_STATE +# endif // defined(__i386__) || defined(__x86_64) +#endif // __MIC__ + // contains notification macros for VTune. #include "cilk-ittnotify.h" @@ -61,28 +75,36 @@ #include <string.h> #include <pthread.h> #include <unistd.h> +#include <alloca.h> #ifdef __APPLE__ //# include <scheduler.h> // Angle brackets include Apple's scheduler.h, not ours. #endif + #ifdef __linux__ # include <sys/resource.h> # include <sys/sysinfo.h> #endif + #ifdef __FreeBSD__ # include <sys/resource.h> // BSD does not define MAP_ANONYMOUS, but *does* define MAP_ANON. Aren't standards great! # define MAP_ANONYMOUS MAP_ANON #endif - -static void internal_enforce_global_visibility(); +#ifdef __VXWORKS__ +# include <vxWorks.h> +# include <vxCpuLib.h> +#endif struct global_sysdep_state { - pthread_t *threads; - size_t pthread_t_size; /* for cilk_db */ -}; + pthread_t *threads; ///< Array of pthreads for system workers + size_t pthread_t_size; ///< for cilk_db +}; + +static void internal_enforce_global_visibility(); + COMMON_SYSDEP void __cilkrts_init_worker_sysdep(struct __cilkrts_worker *w) @@ -136,15 +158,15 @@ static void internal_run_scheduler_with_exceptions(__cilkrts_worker *w) __cilkrts_run_scheduler_with_exceptions(w); } + + /* - * __cilkrts_worker_stub + * scheduler_thread_proc_for_system_worker * * Thread start function called when we start a new worker. * - * This function is exported so Piersol's stack trace displays - * reasonable information */ -NON_COMMON void* __cilkrts_worker_stub(void *arg) +NON_COMMON void* scheduler_thread_proc_for_system_worker(void *arg) { /*int status;*/ __cilkrts_worker *w = (__cilkrts_worker *)arg; @@ -162,13 +184,72 @@ NON_COMMON void* __cilkrts_worker_stub(void *arg) CILK_ASSERT(w->l->type == WORKER_SYSTEM); /*status = pthread_mutex_unlock(&__cilkrts_global_mutex); CILK_ASSERT(status == 0);*/ - + __cilkrts_set_tls_worker(w); + + // Create a cilk fiber for this worker on this thread. + START_INTERVAL(w, INTERVAL_FIBER_ALLOCATE_FROM_THREAD) { + w->l->scheduling_fiber = cilk_fiber_allocate_from_thread(); + cilk_fiber_set_owner(w->l->scheduling_fiber, w); + } STOP_INTERVAL(w, INTERVAL_FIBER_ALLOCATE_FROM_THREAD); + internal_run_scheduler_with_exceptions(w); + START_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD) { + // Deallocate the scheduling fiber. This operation reverses the + // effect cilk_fiber_allocate_from_thread() and must be done in this + // thread before it exits. + int ref_count = cilk_fiber_deallocate_from_thread(w->l->scheduling_fiber); + // Scheduling fibers should never have extra references to them. + // We only get extra references into fibers because of Windows + // exceptions. + CILK_ASSERT(0 == ref_count); + w->l->scheduling_fiber = NULL; + } STOP_INTERVAL(w, INTERVAL_FIBER_DEALLOCATE_FROM_THREAD); + return 0; } + +/* + * __cilkrts_user_worker_scheduling_stub + * + * Routine for the scheduling fiber created for an imported user + * worker thread. This method is analogous to + * scheduler_thread_proc_for_system_worker. + * + */ +void __cilkrts_user_worker_scheduling_stub(cilk_fiber* fiber, void* null_arg) +{ + __cilkrts_worker *w = __cilkrts_get_tls_worker(); + + // Sanity check. + CILK_ASSERT(WORKER_USER == w->l->type); + + // Enter the scheduling loop on the user worker. + // This function will never return. + __cilkrts_run_scheduler_with_exceptions(w); + + // A WORKER_USER, at some point, will resume on the original stack and leave + // Cilk. Under no circumstances do we ever exit off of the bottom of this + // stack. + CILK_ASSERT(0); +} + +/** + * We are exporting a function with this name to Inspector? + * What a confusing name... + * + * This function is exported so Piersol's stack trace displays + * reasonable information. + */ +void* __cilkrts_worker_stub(void* arg) +{ + return scheduler_thread_proc_for_system_worker(arg); +} + + + // /* Return the lesser of the argument and the operating system // limit on the number of workers (threads) that may or ought // to be created. */ @@ -199,12 +280,13 @@ static void write_version_file (global_state_t *, int); */ static void create_threads(global_state_t *g, int base, int top) { - int i; - - for (i = base; i < top; i++) { - int status; - - status = pthread_create(&g->sysdep->threads[i], NULL, __cilkrts_worker_stub, g->workers[i]); + // TBD(11/30/12): We want to insert code providing the option of + // pinning system workers to cores. + for (int i = base; i < top; i++) { + int status = pthread_create(&g->sysdep->threads[i], + NULL, + scheduler_thread_proc_for_system_worker, + g->workers[i]); if (status != 0) __cilkrts_bug("Cilk runtime error: thread creation (%d) failed: %d\n", i, status); } @@ -224,7 +306,7 @@ static void * create_threads_and_work (void * arg) threads_created = 1; // Ideally this turns into a tail call that wipes out this stack frame. - return __cilkrts_worker_stub (arg); + return scheduler_thread_proc_for_system_worker(arg); } #endif void __cilkrts_start_workers(global_state_t *g, int n) @@ -304,261 +386,47 @@ void __cilkrts_stop_workers(global_state_t *g) return; } +#ifdef RESTORE_X86_FP_STATE + /* * Restore the floating point state that is stored in a stack frame at each * spawn. This should be called each time a frame is resumed. + * + * Only valid for IA32 and Intel64 processors. */ -static inline void restore_fp_state (__cilkrts_stack_frame *sf) { -#if defined __i386__ || defined __x86_64 +static inline void restore_x86_fp_state (__cilkrts_stack_frame *sf) { __asm__ ( "ldmxcsr %0\n\t" "fnclex\n\t" "fldcw %1" : : "m" (sf->mxcsr), "m" (sf->fpcsr)); -#else -# warning "unimplemented: code to restore the floating point state" -#endif } +#endif // RESTORE_X86_FP_STATE -/* Resume user code after a spawn or sync, possibly on a different stack. - - Note: Traditional BSD longjmp would fail with a "longjmp botch" - error rather than change the stack pointer in the wrong direction. - Linux appears to let the program take the chance. - - This function is called to resume after a sync or steal. In both cases - ff->sync_sp starts out containing the original stack pointer of the loot. - In the case of a steal, the stack pointer stored in sf points to the - thief's new stack. In the case of a sync, the stack pointer stored in sf - points into original stack (i.e., it is either the same as ff->sync_sp or a - small offset from it caused by pushes and pops between the spawn and the - sync). */ -NORETURN __cilkrts_resume(__cilkrts_worker *w, full_frame *ff, - __cilkrts_stack_frame *sf) -{ - // Assert: w is the only worker that knows about ff right now, no - // lock is needed on ff. - - const int flags = sf->flags; - void *sp; - - w->current_stack_frame = sf; - sf->worker = w; - CILK_ASSERT(flags & CILK_FRAME_SUSPENDED); - CILK_ASSERT(!sf->call_parent); - CILK_ASSERT(w->head == w->tail); - - if (ff->simulated_stolen) - /* We can't prevent __cilkrts_make_unrunnable_sysdep from discarding - * the stack pointer because there is no way to tell it that we are - * doing a simulated steal. Thus, we must recover the stack pointer - * here. */ - SP(sf) = ff->sync_sp; - - sp = SP(sf); - - /* Debugging: make sure stack is accessible. */ - ((volatile char *)sp)[-1]; - - __cilkrts_take_stack(ff, sp); - - /* The leftmost frame has no allocated stack */ - if (ff->simulated_stolen) - CILK_ASSERT(flags & CILK_FRAME_UNSYNCHED && ff->sync_sp == NULL); - else if (flags & CILK_FRAME_UNSYNCHED) - /* XXX By coincidence sync_sp could be null. */ - CILK_ASSERT(ff->stack_self != NULL && ff->sync_sp != NULL); - else - /* XXX This frame could be resumed unsynched on the leftmost stack */ - CILK_ASSERT((ff->sync_master == 0 || ff->sync_master == w) && - ff->sync_sp == 0); - /*if (w->l->type == WORKER_USER) - CILK_ASSERT(ff->stack_self == NULL);*/ - - // Notify the Intel tools that we're stealing code - ITT_SYNC_ACQUIRED(sf->worker); -#ifdef ENABLE_NOTIFY_ZC_INTRINSIC - __notify_zc_intrinsic("cilk_continue", sf); -#endif // defined ENABLE_NOTIFY_ZC_INTRINSIC - - if (ff->stack_self) { - // Notify TBB that we are resuming. - __cilkrts_invoke_stack_op(w, CILK_TBB_STACK_ADOPT, ff->stack_self); - } - - sf->flags &= ~CILK_FRAME_SUSPENDED; - -#ifndef __MIC__ - if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) { - // Restore the floating point state that was set in this frame at the - // last spawn. - // - // This feature is only available in ABI 1 or later frames. - restore_fp_state(sf); - } -#endif - - CILK_LONGJMP(sf->ctx); - /*NOTREACHED*/ - /* Intel's C compiler respects the preceding lint pragma */ -} - -#include <stddef.h> -#include <stdlib.h> -#include <string.h> -#include <sys/mman.h> -#include <errno.h> - -struct __cilkrts_stack -{ - /* If /size/ and /top/ are zero this is the system stack for thread /owner/. - If /top/ and /size/ are both nonzero this is an allocated stack and - /owner/ is undefined. */ - char *top; - size_t size; - pthread_t owner; - - /* Cilk/TBB interop callback routine/data. */ - __cilk_tbb_pfn_stack_op stack_op_routine; - void *stack_op_data; -}; - -void __cilkrts_set_stack_op(__cilkrts_stack *sd, - __cilk_tbb_stack_op_thunk o) -{ - sd->stack_op_routine = o.routine; - sd->stack_op_data = o.data; -} - -void __cilkrts_invoke_stack_op(__cilkrts_worker *w, - enum __cilk_tbb_stack_op op, - __cilkrts_stack *sd) -{ - // If we don't have a stack we can't do much, can we? - if (NULL == sd) - return; - - if (0 == sd->stack_op_routine) - { - return; - } - - (*sd->stack_op_routine)(op,sd->stack_op_data); - if (op == CILK_TBB_STACK_RELEASE) - { - sd->stack_op_routine = 0; - sd->stack_op_data = 0; - } -} - -/* - * tbb_interop_save_stack_op_info - * - * Save TBB interop information for an unbound thread. It will get picked - * up when the thread is bound to the runtime. - */ -void tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o) -{ - __cilk_tbb_stack_op_thunk *saved_thunk = - __cilkrts_get_tls_tbb_interop(); - - // If there is not already space allocated, allocate some. - if (NULL == saved_thunk) { - saved_thunk = (__cilk_tbb_stack_op_thunk*) - __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk)); - __cilkrts_set_tls_tbb_interop(saved_thunk); - } - - *saved_thunk = o; -} /* - * tbb_interop_save_info_from_stack + * @brief Returns the stack address for resuming execution of sf. * - * Save TBB interop information from the __cilkrts_stack. It will get picked - * up when the thread is bound to the runtime next time. - */ -void tbb_interop_save_info_from_stack(__cilkrts_stack *sd) -{ - __cilk_tbb_stack_op_thunk *saved_thunk; - - // If there is no TBB interop data, just return - if (NULL == sd || NULL == sd->stack_op_routine) return; - - saved_thunk = __cilkrts_get_tls_tbb_interop(); - - // If there is not already space allocated, allocate some. - if (NULL == saved_thunk) { - saved_thunk = (__cilk_tbb_stack_op_thunk*) - __cilkrts_malloc(sizeof(__cilk_tbb_stack_op_thunk)); - __cilkrts_set_tls_tbb_interop(saved_thunk); - } - - saved_thunk->routine = sd->stack_op_routine; - saved_thunk->data = sd->stack_op_data; -} - -/* - * tbb_interop_use_saved_stack_op_info + * This method takes in the top of the stack to use, and then returns + * a properly aligned address for resuming execution of sf. * - * If there's TBB interop information that was saved before the thread was - * bound, apply it now - */ -void tbb_interop_use_saved_stack_op_info(__cilkrts_worker *w, - __cilkrts_stack *sd) -{ - struct __cilk_tbb_stack_op_thunk *saved_thunk = - __cilkrts_get_tls_tbb_interop(); - - // If we haven't allocated a TBB interop index, we don't have any saved info - if (NULL == saved_thunk) return; - - // Associate the saved info with the __cilkrts_stack - __cilkrts_set_stack_op(sd, *saved_thunk); - - // Free the saved data. We'll save it again if needed when the code - // returns from the initial function - tbb_interop_free_stack_op_info(); -} - -/* - * tbb_interop_free_stack_op_info + * @param sf - The stack frame we want to resume executing. + * @param stack_base - The top of the stack we want to execute sf on. * - * Free saved TBB interop memory. Should only be called when the thread is - * not bound. */ -void tbb_interop_free_stack_op_info(void) -{ - struct __cilk_tbb_stack_op_thunk *saved_thunk = - __cilkrts_get_tls_tbb_interop(); - - // If we haven't allocated a TBB interop index, we don't have any saved info - if (NULL == saved_thunk) return; - - // Free the memory and wipe out the TLS value - __cilkrts_free(saved_thunk); - __cilkrts_set_tls_tbb_interop(NULL); -} - -void __cilkrts_bind_stack(full_frame *ff, char *new_sp, - __cilkrts_stack *parent_stack, - __cilkrts_worker *owner) -{ - __cilkrts_stack_frame *sf = ff->call_stack; - __cilkrts_stack *sd = ff->stack_self; - CILK_ASSERT(sizeof SP(sf) <= sizeof (size_t)); - - SP(sf) = new_sp; - - // Need to do something with parent_stack and owner? - return; -} - -char *__cilkrts_stack_to_pointer(__cilkrts_stack *s, __cilkrts_stack_frame *sf) -{ - if (!s) - return NULL; - +static char* get_sp_for_executing_sf(char* stack_base, + full_frame *ff, + __cilkrts_stack_frame *sf) +{ +// The original calculation that had been done to correct the stack +// pointer when resuming execution. +// +// But this code was never getting called in the eng branch anyway... +// +// TBD(11/30/12): This logic needs to be revisited to make sure that +// we are doing the proper calculation in reserving space for outgoing +// arguments on all platforms and architectures. +#if 0 /* Preserve outgoing argument space and stack alignment on steal. Outgoing argument space is bounded by the difference between stack and frame pointers. Some user code is known to rely on @@ -569,191 +437,139 @@ char *__cilkrts_stack_to_pointer(__cilkrts_stack *s, __cilkrts_stack_frame *sf) char *fp = FP(sf), *sp = SP(sf); int fp_align = (int)(size_t)fp & SMASK; ptrdiff_t space = fp - sp; - char *top_aligned = (char *)((((size_t)s->top - SMASK) & ~(size_t)SMASK) | fp_align); + + fprintf(stderr, "Here: fp = %p, sp = %p\n", fp, sp); + char *top_aligned = (char *)((((size_t)stack_base - SMASK) & ~(size_t)SMASK) | fp_align); /* Don't allocate an unreasonable amount of stack space. */ + + fprintf(stderr, "Here: stack_base = %p, top_aligned=%p, space=%ld\n", + stack_base, top_aligned, space); if (space < 32) space = 32 + (space & SMASK); else if (space > 40 * 1024) space = 40 * 1024 + (space & SMASK); + return top_aligned - space; } - return s->top - 256; -} - -#define PAGE 4096 +#endif -/* - * Return a pointer to the top of a "tiny" stack that is 64 KB (plus a buffer - * page on each end). - * - * No reasonable program should need more than 64 KB, so if we hit a buffer, - * we're doing it wrong. - */ -void *sysdep_make_tiny_stack (__cilkrts_worker *w) -{ - char *p; - __cilkrts_stack *s; - -#ifndef MAP_ANONYMOUS -#define MAP_ANONYMOUS MAP_ANON -#endif - - p = mmap(0, PAGE * 18, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, - -1, 0); - if (MAP_FAILED == p) { - // For whatever reason (probably ran out of memory), mmap() failed. - // There is no stack to return, so the program loses parallelism. - if (0 == __cilkrts_xchg(&w->g->failure_to_allocate_stack, 1)) { - cilkos_warning("Failed to allocate memory for a new stack.\n" - "Continuing with some loss of parallelism.\n"); +#define PERFORM_FRAME_SIZE_CALCULATION 0 + + char* new_stack_base = stack_base - 256; + +#if PERFORM_FRAME_SIZE_CALCULATION + // If there is a frame size saved, then use that as the + // correction instead of 256. + if (ff->frame_size > 0) { + if (ff->frame_size < 40*1024) { + new_stack_base = stack_base - ff->frame_size; + } + else { + // If for some reason, our frame size calculation is giving us + // a number which is bigger than about 10 pages, then + // there is likely something wrong here? Don't allocate + // an unreasonable amount of space. + new_stack_base = stack_base - 40*1024; } - return NULL; } - mprotect(p + (17 * PAGE), PAGE, PROT_NONE); - mprotect(p, PAGE, PROT_NONE); - - return (void*)(p + (17 * PAGE)); -} - -/* - * Free a "tiny" stack (created with sysdep_make_tiny_stack()). - */ -void sysdep_destroy_tiny_stack (void *p) -{ - char *s = (char*)p; - s = s - (17 * PAGE); - munmap((void*)s, 18 * PAGE); -} - -__cilkrts_stack *__cilkrts_make_stack(__cilkrts_worker *w) -{ - __cilkrts_stack *s; - char *p; - size_t stack_size; - -#if defined CILK_PROFILE && defined HAVE_SYNC_INTRINSICS -#define PROFILING_STACKS 1 -#else -#define PROFILING_STACKS 0 #endif - - if (PROFILING_STACKS || w->g->max_stacks > 0) { - if (w->g->max_stacks > 0 && w->g->stacks > w->g->max_stacks) { - /* No you can't have a stack. Not yours. */ - return NULL; - } else { - /* We think we are allowed to allocate a stack. Perform an atomic - increment on the counter and verify that there really are enough - stacks remaining for us. */ - long hwm = __sync_add_and_fetch(&w->g->stacks, 1); - if (w->g->max_stacks > 0 && hwm > w->g->max_stacks) { - /* Whoops! Another worker got to it before we did. - C'est la vie. */ - return NULL; - } - -#ifdef CILK_PROFILE - /* Keeping track of the largest stack count observed by this worker - is part of profiling. The copies will be merged at the end of - execution. */ - if (PROFILING_STACKS && hwm > w->l->stats.stack_hwm) { - w->l->stats.stack_hwm = hwm; - } + + // Whatever correction we choose, align the final stack top. + // This alignment seems to be necessary in particular on 32-bit + // Linux, and possibly Mac. (Is 32-byte alignment is sufficient?) + /* 256-byte alignment. Why not? */ + const uintptr_t align_mask = ~(256 -1); + new_stack_base = (char*)((size_t)new_stack_base & align_mask); + return new_stack_base; +} + +char* sysdep_reset_jump_buffers_for_resume(cilk_fiber* fiber, + full_frame *ff, + __cilkrts_stack_frame *sf) +{ +#if FIBER_DEBUG >= 4 + fprintf(stderr, "ThreadId=%p (fiber_proc_to_resume), Fiber %p. sf = %p. ff=%p, ff->sync_sp=%p\n", + cilkos_get_current_thread_id(), + fiber, + sf, + ff, ff->sync_sp); #endif - } - } - - stack_size = w->g->stack_size; - CILK_ASSERT(stack_size > 0); - - p = mmap(0, stack_size, PROT_READ|PROT_WRITE, MAP_PRIVATE|MAP_ANONYMOUS, - -1, 0); - if (MAP_FAILED == p) { - // For whatever reason (probably ran out of memory), mmap() failed. - // There is no stack to return, so the program loses parallelism. - if (0 == __cilkrts_xchg(&w->g->failure_to_allocate_stack, 1)) { - cilkos_warning("Failed to allocate memory for a new stack.\n" - "Continuing with some loss of parallelism.\n"); - } - return NULL; - } - mprotect(p + stack_size - PAGE, PAGE, PROT_NONE); - mprotect(p, PAGE, PROT_NONE); - s = __cilkrts_malloc(sizeof (struct __cilkrts_stack)); - CILK_ASSERT(s); - s->top = p + stack_size - PAGE; - s->size = stack_size - (PAGE + PAGE); - memset(&s->owner, 0, sizeof s->owner); + CILK_ASSERT(fiber); + void* sp = (void*)get_sp_for_executing_sf(cilk_fiber_get_stack_base(fiber), ff, sf); + SP(sf) = sp; - s->stack_op_routine = NULL; - s->stack_op_data = NULL; + /* Debugging: make sure stack is accessible. */ + ((volatile char *)sp)[-1]; - return s; + // Adjust the saved_sp to account for the SP we're about to run. This will + // allow us to track fluctations in the stack +#if FIBER_DEBUG >= 4 + fprintf(stderr, "ThreadId=%p, about to take stack ff=%p, sp=%p, sync_sp=%p\n", + cilkos_get_current_thread_id(), + ff, + sp, + ff->sync_sp); +#endif + __cilkrts_take_stack(ff, sp); + return sp; } -void __cilkrts_free_stack(global_state_t *g, - __cilkrts_stack *sd) -{ - char *s; - size_t size; - - CILK_ASSERT(g->max_stacks <= 0); -#if defined CILK_PROFILE && defined HAVE_SYNC_INTRINSICS - __sync_sub_and_fetch(&g->stacks, 1); +NORETURN sysdep_longjmp_to_sf(char* new_sp, + __cilkrts_stack_frame *sf, + full_frame *ff_for_exceptions /* UNUSED on Unix */) +{ +#if FIBER_DEBUG >= 3 + fprintf(stderr, + "ThreadId=%p. resume user code, sf=%p, new_sp = %p, original SP(sf) = %p, FP(sf) = %p\n", + cilkos_get_current_thread_id(), sf, new_sp, SP(sf), FP(sf)); #endif - s = sd->top; - size = sd->size; - - CILK_ASSERT(s && size); + // Set the stack pointer. + SP(sf) = new_sp; -#if __GNUC__ - { - char *fp = __builtin_frame_address(0); - CILK_ASSERT(fp < s - 10000 || fp > s); +#ifdef RESTORE_X86_FP_STATE + if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) { + // Restore the floating point state that was set in this frame at the + // last spawn. + // + // This feature is only available in ABI 1 or later frames, and only + // needed on IA64 or Intel64 processors. + restore_x86_fp_state(sf); } #endif - /* DEBUG: */ - ((volatile char *)s)[-1]; - s += PAGE; - size += PAGE + PAGE; - - if (munmap(s - size, size) < 0) - __cilkrts_bug("Cilk: stack release failed error %d", errno); + CILK_LONGJMP(sf->ctx); +} - sd->top = 0; - sd->size = 0; - __cilkrts_free(sd); - return; -} +#include <stddef.h> +#include <stdlib.h> +#include <string.h> +#include <sys/mman.h> +#include <errno.h> -void __cilkrts_sysdep_reset_stack(__cilkrts_stack *sd) -{ - CILK_ASSERT(sd->stack_op_routine == NULL); - CILK_ASSERT(sd->stack_op_data == NULL); - return; -} void __cilkrts_make_unrunnable_sysdep(__cilkrts_worker *w, full_frame *ff, __cilkrts_stack_frame *sf, - int state_valid, + int is_loot, const char *why) { (void)w; /* unused */ sf->except_data = 0; - if (state_valid && ff->frame_size == 0) + if (is_loot) + { + if (ff->frame_size == 0) ff->frame_size = __cilkrts_get_frame_size(sf); + // Null loot's sp for debugging purposes (so we'll know it's not valid) SP(sf) = 0; + } } - /* * __cilkrts_sysdep_is_worker_thread_id * @@ -765,7 +581,7 @@ int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g, int i, void *thread_id) { -#ifdef __linux__ +#if defined( __linux__) || defined(__VXWORKS__) pthread_t tid = *(pthread_t *)thread_id; if (i < 0 || i > g->total_workers) return 0; @@ -776,42 +592,7 @@ int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g, #endif } -int __cilkrts_sysdep_bind_thread(__cilkrts_worker *w) -{ - if (w->self < 0) { - // w->self < 0 means that this is an ad-hoc user worker not known to - // the global state. Nobody will ever try to steal from it, so it - // does not need a scheduler_stack. - return 0; // success - } - - // Allocate a scheduler_stack for this user worker if one does not - // already exist. - if (NULL == w->l->scheduler_stack) { - - // The scheduler stack does not need to be as large as a normal - // programm stack. Returns null on failure (probably indicating that - // we're out of memory). - w->l->scheduler_stack = sysdep_make_tiny_stack(w); - // Return success (zero) if we successfully allocated a scheduler - // stack and failure (non-zero) if stack allocation returned NULL. - return (NULL == w->l->scheduler_stack ? -1 : 0); - } - - return 0; // success -} - -void __cilkrts_sysdep_unbind_thread(__cilkrts_worker *w) -{ - // Needs to be implemented -} - -int __cilkrts_sysdep_get_stack_region_properties(__cilkrts_stack *sd, - struct __cilkrts_region_properties *props) -{ - return 0; -} /************************************************************* @@ -823,6 +604,10 @@ int __cilkrts_sysdep_get_stack_region_properties(__cilkrts_stack *sd, #include <stdio.h> #include <sys/utsname.h> +#ifdef __VXWORKS__ +#include <version.h> +# endif + /* (Non-static) dummy function is used by get_runtime_path() to find the path * to the .so containing the Cilk runtime. */ @@ -886,7 +671,14 @@ static void write_version_file (global_state_t *g, int n) VERSION_MINOR, VERSION_REV, VERSION_BUILD); +#ifdef __VXWORKS__ + char * vxWorksVer = VXWORKS_VERSION; + fprintf(fp, "Cross compiled for %s\n",vxWorksVer); + // user and host not avalible if VxWorks cross compiled on windows build host +#else fprintf(fp, "Built by "BUILD_USER" on host "BUILD_HOST"\n"); +#endif + fprintf(fp, "Compilation date: "__DATE__" "__TIME__"\n"); #ifdef __INTEL_COMPILER @@ -930,7 +722,11 @@ static void write_version_file (global_state_t *g, int n) fprintf(fp, "\nThread information\n"); fprintf(fp, "==================\n"); +#ifdef __VXWORKS__ + fprintf(fp, "System cores: %d\n", (int)__builtin_popcount(vxCpuEnabledGet())); +#else fprintf(fp, "System cores: %d\n", (int)sysconf(_SC_NPROCESSORS_ONLN)); +#endif fprintf(fp, "Cilk workers requested: %d\n", n); #if (PARALLEL_THREAD_CREATE) fprintf(fp, "Thread creator: Private (parallel)\n"); @@ -973,6 +769,7 @@ void __cilkrts_establish_c_stack(void) */ } + /* * internal_enforce_global_visibility * @@ -993,95 +790,18 @@ void internal_enforce_global_visibility() if( handle) dlclose(handle); } -/* - * Special scheduling entrypoint for a WORKER_USER. Ensure a new stack has been - * created and the stack pointer has been placed on it before entering - * worker_user_scheduler(). - * - * Call this function the first time a WORKER_USER has returned to a stolen - * parent and cannot continue. Every time after that, the worker can simply - * longjmp() like any other worker. - */ -static NOINLINE -void worker_user_scheduler() -{ - __cilkrts_worker *w = __cilkrts_get_tls_worker(); - - // This must be a user worker - CILK_ASSERT(WORKER_USER == w->l->type); - - // Run the continuation function passed to longjmp_into_runtime - run_scheduling_stack_fcn(w); - w->reducer_map = 0; - - cilkbug_assert_no_uncaught_exception(); - - STOP_INTERVAL(w, INTERVAL_IN_SCHEDULER); - STOP_INTERVAL(w, INTERVAL_WORKING); - - // Enter the scheduling loop on the user worker. This function will - // never return - __cilkrts_run_scheduler_with_exceptions(w); - - // A WORKER_USER, at some point, will resume on the original stack and - // leave Cilk. Under no circumstances do we ever exit off of the bottom - // of this stack. - CILK_ASSERT(0); -} - -/* - * __cilkrts_sysdep_import_user_thread - * - * Imports a user thread the first time it returns to a stolen parent - */ - -void __cilkrts_sysdep_import_user_thread(__cilkrts_worker *w) +void sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf) { - void *ctx[5]; // Jump buffer for __builtin_setjmp/longjmp. - - CILK_ASSERT(w->l->scheduler_stack); - - // It may be that this stack has been used before (i.e., the worker was - // bound to a thread), and in principle, we could just jump back into - // the runtime, but we'd have to keep around extra data to do that, and - // there is no harm in starting over, here. - - // Move the stack pointer onto the scheduler stack. The subsequent - // call will move execution onto that stack. We never return from - // that call, and every time we longjmp_into_runtime() after this, - // the w->l->env jump buffer will be populated. - if (0 == __builtin_setjmp(ctx)) { - ctx[2] = w->l->scheduler_stack; // replace the stack pointer. - __builtin_longjmp(ctx, 1); - } else { - // We can't just pass the worker through as a parameter to - // worker_user_scheduler because the generated code might try to - // retrieve w using stack-relative addressing instead of bp-relative - // addressing and would get a bogus value. - worker_user_scheduler(); // noinline, does not return. +// If we're not going to restore, don't bother saving it +#ifdef RESTORE_X86_FP_STATE + if (CILK_FRAME_VERSION_VALUE(sf->flags) >= 1) + { + __asm__ ("stmxcsr %0" : "=m" (sf->mxcsr)); + __asm__ ("fnstsw %0" : "=m" (sf->fpcsr)); } - - CILK_ASSERT(0); // Should never reach this point. -} - -/* - * Make a fake user stack descriptor to correspond to the user's stack. - */ -__cilkrts_stack *sysdep_make_user_stack (__cilkrts_worker *w) -{ - return calloc(1, sizeof(struct __cilkrts_stack)); -} - -/* - * Destroy the fake user stack descriptor that corresponds to the user's stack. - */ -void sysdep_destroy_user_stack (__cilkrts_stack *sd) -{ - free(sd); +#endif } - - /* Local Variables: ** c-file-style:"bsd" ** diff --git a/libcilkrts/runtime/sysdep.h b/libcilkrts/runtime/sysdep.h index 9804654cf5b..5b3c94fd58c 100644 --- a/libcilkrts/runtime/sysdep.h +++ b/libcilkrts/runtime/sysdep.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -43,34 +48,27 @@ #include "os.h" #include "os_mutex.h" -#include "cilk-tbb-interop.h" - -__CILKRTS_BEGIN_EXTERN_C - -// Part of inspector ABI -typedef struct __cilkrts_region_properties __cilkrts_region_properties; - /** - * Bind the __cilkrts_stack_frame to the stack + * @brief Default page size for Cilk stacks. * - * @param ff full_frame for the frame we're binding - * @param new_sp Not used. - * @param parent_stack __cilkrts_stack of this frames parent - * @param owner __cilkrts_worker for the user worker thread that captains - * the team that this stack is contributing to. + * All Cilk stacks should have size that is a multiple of this value. */ -COMMON_SYSDEP -void __cilkrts_bind_stack(full_frame *ff, - char *new_sp, - __cilkrts_stack *parent_stack, - __cilkrts_worker *owner); +#define PAGE 4096 /** - * Return an address on the specified stack. Mostly obsolete. - */ -COMMON_SYSDEP -char *__cilkrts_stack_to_pointer(__cilkrts_stack *sd, - __cilkrts_stack_frame *sf); + * @brief Size of a scheduling stack. + * + * A scheduling stack is used to by system workers to execute runtime + * code. Since this stack is only executing runtime functions, we + * don't need it to be a full size stack. + * + * The number "18" should be small since the runtime doesn't require a + * large stack, but large enough to call "printf" for debugging. + */ +#define CILK_SCHEDULING_STACK_SIZE (18*PAGE) + +__CILKRTS_BEGIN_EXTERN_C + /** * Code to initialize the system-dependent portion of the global_state_t @@ -95,73 +93,6 @@ void __cilkrts_destroy_global_sysdep(global_state_t *g); COMMON_SYSDEP void __cilkrts_establish_c_stack(void); -/** - * Allocate and initialize a __cilkrts_stack. - * - * @param w The worker to attribute this stack to - mostly used for stats. - * - * @return Pointer to the initilaized __cilkrts_stack. - * @return NULL if we failed to allocate the stack. - */ -COMMON_SYSDEP -__cilkrts_stack *__cilkrts_make_stack(__cilkrts_worker *w); - -/** - * Release any resources associated with a __cilkrts_stack - * - * @param g The global state - used for stats - * @param sd The __cilkrts_stack to be released - */ -COMMON_SYSDEP -void __cilkrts_free_stack(global_state_t *g, __cilkrts_stack *sd); - -/** - * Allocate a __cilkrts_stack with a small size for use as a scheduling stack. - * - * @param w The worker to attribute this stack to - mostly used for stats. - * - * @return Pointer to the initilaized __cilkrts_stack. - * @return NULL if we failed to allocate the stack. - */ -COMMON_SYSDEP -void *sysdep_make_tiny_stack (__cilkrts_worker *w); - -/** - * Release any resources associated with a __cilkrts_stack created as a - * scheduling stack. - * - * @param sd The __cilkrts_stack to be released - */ -COMMON_SYSDEP -void sysdep_destroy_tiny_stack (void *sd); - -/** - * Allocate and initialize a __cilkrts_stack to use to run user code. - * - * @param w The worker to attribute this stack to - mostly used for stats. - * - * @return Pointer to the initilaized __cilkrts_stack. - * @return NULL if we failed to allocate the stack. - */ -COMMON_SYSDEP -__cilkrts_stack *sysdep_make_user_stack (__cilkrts_worker *w); - -/** - * Release any resources associated with a __cilkrts_stack created as a - * user stack. - * - * @param sd The __cilkrts_stack to be released - */ -COMMON_SYSDEP -void sysdep_destroy_user_stack (__cilkrts_stack *sd); - -/** - * Reset stack-specific information so the stack can be cached and reused - * - * @param sd The __cilkrts_stack to be reset. - */ -COMMON_SYSDEP -void __cilkrts_sysdep_reset_stack(__cilkrts_stack *sd); /** * Save system dependent information in the full_frame and @@ -184,17 +115,6 @@ void __cilkrts_make_unrunnable_sysdep(__cilkrts_worker *w, int state_valid, const char *why); -/** - * Resume execution of the full frame. - * - * @param w The worker to resume execution on. - * @param ff The full_frame to resume executing. - * @param sf The __cilkrts_stack_frame to resume executing. - */ -COMMON_SYSDEP -NORETURN __cilkrts_resume(__cilkrts_worker *w, - full_frame *ff, - __cilkrts_stack_frame *sf); /** * OS-specific code to spawn worker threads. @@ -206,7 +126,7 @@ COMMON_SYSDEP void __cilkrts_start_workers(global_state_t *g, int n); /** - * OS-specific code to stop worker threads. + * @brief OS-specific code to stop worker threads. * * @param g The global state. */ @@ -214,27 +134,8 @@ COMMON_SYSDEP void __cilkrts_stop_workers(global_state_t *g); /** - * System dependent function called when a thread is bound to a worker. - * - * @param w Worker to bind to the currently executing thread. + * @brief Imports a user thread the first time it returns to a stolen parent. * - * @return 0 on success. - * @return non-zero on failure. - */ -COMMON_SYSDEP -int __cilkrts_sysdep_bind_thread(__cilkrts_worker *w); - -/** - * System dependent function called when a thread is unbound from a - * worker. - * - * @param w Worker to unbind from the currently executing thread. - */ -COMMON_SYSDEP -void __cilkrts_sysdep_unbind_thread(__cilkrts_worker *w); - -/** - * Imports a user thread the first time it returns to a stolen parent. * The thread has been bound to a worker, but additional steps need to * be taken to start running a scheduling loop. * @@ -244,35 +145,10 @@ COMMON_SYSDEP void __cilkrts_sysdep_import_user_thread(__cilkrts_worker *w); /** - * Fills in the __cilkrts_region_properties for a __cilkrts_stack. - * - * @param sd The stack that's being run on. - * @param properties Buffer to hold information about the stack region. - * - * @return 1 on success. - * @return 0 on failure. - */ -COMMON_SYSDEP -int __cilkrts_sysdep_get_stack_region_properties(__cilkrts_stack *sd, - __cilkrts_region_properties *properties); - -/** - * Returns true if the thread ID specified matches the thread ID we saved - * for a worker. - * - * @param g Pointer to the global state. Used to validate the index. - * @param i Index for the worker. - * @param thread_id Thread ID to be checked. - */ -COMMON_SYSDEP -int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g, - int i, - cilkos_thread_id_t thread_id); - -/** - * Function to be run for each of the system worker threads. - * This declaration also appears in cilk/cilk_undocumented.h -- don't change - * one declaration without also changing the other. + * @brief Function to be run for each of the system worker threads. + * + * This declaration also appears in cilk/cilk_undocumented.h -- don't + * change one declaration without also changing the other. * * @param arg The context value passed to the thread creation routine for * the OS we're running on. @@ -284,7 +160,7 @@ int __cilkrts_sysdep_is_worker_thread_id(global_state_t *g, CILK_EXPORT unsigned __CILKRTS_NOTHROW __stdcall __cilkrts_worker_stub(void *arg); #else -/* Do not use CILK_API because __cilkrts_worker_stub have defauld visibility */ +/* Do not use CILK_API because __cilkrts_worker_stub have default visibility */ __attribute__((visibility("default"))) void* __CILKRTS_NOTHROW __cilkrts_worker_stub(void *arg); #endif @@ -322,72 +198,73 @@ COMMON_SYSDEP void __cilkrts_setup_for_execution_sysdep(__cilkrts_worker *w, full_frame *ff); -/**************************************************************************** - * TBB interop functions - * **************************************************************************/ - /** - * Set the TBB callback information for a stack + * @brief OS-specific implementaton of resetting fiber and frame state + * to resume exeuction. * - * @param sd The stack to set the TBB callback information for - * @param o The TBB callback thunk. Specifies the callback address and - * context value. - */ -COMMON_SYSDEP -void __cilkrts_set_stack_op(__cilkrts_stack *sd, - __cilk_tbb_stack_op_thunk o); - -/** - * Call TBB to tell it about an "interesting" occurrance + * This method: + * 1. Calculates the value of stack pointer where we should resume + * execution of "sf". This calculation uses info stored in the + * fiber, and takes into account alignment and frame size. + * 2. Updates sf and ff to match the calculated stack pointer. * - * @param w The worker the stack is running on - * @param op Value specifying the "interesting" occurrance - * @param sd The stack TBB is being notified about - */ -COMMON_SYSDEP - -void __cilkrts_invoke_stack_op(__cilkrts_worker *w, - enum __cilk_tbb_stack_op op, - __cilkrts_stack *sd); - -/** - * Save TBB the TBB callback address and context value in thread-local - * storage. We'll use it later when the thread binds to a worker. + * On Unix, the stack pointer calculation looks up the base of the + * stack from the fiber. * - * @param o The TBB callback thunk which is to be saved. - */ -COMMON_SYSDEP -void tbb_interop_save_stack_op_info(__cilk_tbb_stack_op_thunk o); - -/** - * Called when we bind a thread to the runtime. If there is any TBB - * interop information in thread-local storage, bind it to the - * stack now. + * On Windows, this calculation is calls "alloca" to find a stack + * pointer on the currently executing stack. Thus, the Windows code + * assumes @c fiber is the currently executing fiber. * - * @param w The worker that has been bound to the thread. - * @param sd The stack that should take over the TBB interop information. + * @param fiber fiber to resume execution on. + * @param ff full_frame for the frame we're resuming. + * @param sf __cilkrts_stack_frame that we should resume + * @return The calculated stack pointer. */ COMMON_SYSDEP -void tbb_interop_use_saved_stack_op_info(__cilkrts_worker *w, - __cilkrts_stack *sd); +char* sysdep_reset_jump_buffers_for_resume(cilk_fiber* fiber, + full_frame *ff, + __cilkrts_stack_frame *sf); /** - * Free any TBB interop information saved in thread-local storage + * @brief System-dependent longjmp to user code for resuming execution + * of a @c __cilkrts_stack_frame. + * + * This method: + * - Changes the stack pointer in @c sf to @c new_sp. + * - If @c ff_for_exceptions is not NULL, changes fields in @c sf and + * @c ff_for_exceptions for exception processing. + * - Restores any floating point state + * - Finishes with a longjmp to user code, never to return. + * + * @param new_sp stack pointer where we should resume execution + * @param sf @c __cilkrts_stack_frame for the frame we're resuming. + * @param ff_for_exceptions full_frame to safe exception info into, if necessary */ COMMON_SYSDEP -void tbb_interop_free_stack_op_info(void); +NORETURN +sysdep_longjmp_to_sf(char* new_sp, + __cilkrts_stack_frame *sf, + full_frame *ff_for_exceptions); /** - * Migrate any TBB interop information from a __cilkrts_stack to - * thread-local storage. Returns immediately if no TBB interop information - * has been associated with the stack. + * @brief System-dependent code to save floating point control information + * to a @c __cilkrts_stack_frame. This function will be called by compilers + * that cannot inline the code. * - * @param sd The __cilkrts_stack who's TBB interop information should be - * saved in thread-local storage. + * Note that this function does *not* save the current floating point + * registers. It saves the floating point control words that control + * precision and rounding and stuff like that. + * + * This function will be a noop for architectures that don't have warts + * like the floating point control words, or where the information is + * already being saved by the setjmp. + * + * @param sf @c __cilkrts_stack_frame for the frame we're + * saving the floating point control information in. */ COMMON_SYSDEP -void tbb_interop_save_info_from_stack(__cilkrts_stack *sd); - +void +sysdep_save_fp_ctrl_state(__cilkrts_stack_frame *sf); __CILKRTS_END_EXTERN_C diff --git a/libcilkrts/runtime/unix_symbols.t b/libcilkrts/runtime/unix_symbols.t index 98d55984f47..1c4fdfd6ac1 100644 --- a/libcilkrts/runtime/unix_symbols.t +++ b/libcilkrts/runtime/unix_symbols.t @@ -1,25 +1,30 @@ -# Copyright (C) 2011 -# Intel Corporation -# -# This file is part of the Intel Cilk Plus Library. This library is free -# software; you can redistribute it and/or modify it under the -# terms of the GNU General Public License as published by the -# Free Software Foundation; either version 3, or (at your option) -# any later version. -# -# This library is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# Under Section 7 of GPL version 3, you are granted additional -# permissions described in the GCC Runtime Library Exception, version -# 3.1, as published by the Free Software Foundation. -# -# You should have received a copy of the GNU General Public License and -# a copy of the GCC Runtime Library Exception along with this program; -# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see -# <http://www.gnu.org/licenses/>. +# @copyright +# Copyright (C) 2011 +# Intel Corporation +# +# @copyright +# This file is part of the Intel Cilk Plus Library. This library is free +# software; you can redistribute it and/or modify it under the +# terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3, or (at your option) +# any later version. +# +# @copyright +# This library is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# @copyright +# Under Section 7 of GPL version 3, you are granted additional +# permissions described in the GCC Runtime Library Exception, version +# 3.1, as published by the Free Software Foundation. +# +# @copyright +# You should have received a copy of the GNU General Public License and +# a copy of the GCC Runtime Library Exception along with this program; +# see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +# <http://www.gnu.org/licenses/>. __cilkrts_bind_thread __cilkrts_bind_thread_1 @@ -41,8 +46,6 @@ __cilkrts_get_nworkers __cilkrts_get_pedigree_info __cilkrts_get_pedigree_internal __cilkrts_get_sf -__cilkrts_get_stack_region_id -__cilkrts_get_stack_region_properties __cilkrts_get_stack_size __cilkrts_get_tls_worker __cilkrts_get_tls_worker_fast @@ -57,13 +60,15 @@ __cilkrts_hyperobject_alloc __cilkrts_hyperobject_dealloc __cilkrts_hyperobject_noop_destroy __cilkrts_init -# __cilkrts_init_worker_sysdep __cilkrts_irml_version __cilkrts_leave_frame __cilkrts_metacall __cilkrts_rethrow __cilkrts_return_exception +__cilkrts_save_fp_ctrl_state __cilkrts_set_param +__cilkrts_stack_alloc +__cilkrts_stack_free __cilkrts_sync __cilkrts_synched __cilkrts_watch_stack diff --git a/libcilkrts/runtime/worker_mutex.c b/libcilkrts/runtime/worker_mutex.c index 51e4d4b47bd..d83b4b4bbff 100644 --- a/libcilkrts/runtime/worker_mutex.c +++ b/libcilkrts/runtime/worker_mutex.c @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ #include "worker_mutex.h" @@ -60,24 +65,23 @@ void __cilkrts_mutex_lock(__cilkrts_worker *w, struct mutex *m) NOTE_INTERVAL(w, INTERVAL_MUTEX_LOCK); if (!TRY_ACQUIRE(m)) { - START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); - count = 0; - do { - do { - __cilkrts_short_pause(); - - if (++count >= maxspin) { - STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); - START_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING); - /* let the OS reschedule every once in a while */ - __cilkrts_yield(); - STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING); - START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); - count = 0; - } - } while (m->lock != 0); - } while (!TRY_ACQUIRE(m)); - STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); + START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); + count = 0; + do { + do { + __cilkrts_short_pause(); + if (++count >= maxspin) { + STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); + START_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING); + /* let the OS reschedule every once in a while */ + __cilkrts_yield(); + STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_YIELDING); + START_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); + count = 0; + } + } while (m->lock != 0); + } while (!TRY_ACQUIRE(m)); + STOP_INTERVAL(w, INTERVAL_MUTEX_LOCK_SPINNING); } CILK_ASSERT(m->owner == 0); diff --git a/libcilkrts/runtime/worker_mutex.h b/libcilkrts/runtime/worker_mutex.h index 2dacf48980d..e016faf8916 100644 --- a/libcilkrts/runtime/worker_mutex.h +++ b/libcilkrts/runtime/worker_mutex.h @@ -2,28 +2,33 @@ * ************************************************************************* * - * Copyright (C) 2009-2011 - * Intel Corporation - * - * This file is part of the Intel Cilk Plus Library. This library is free - * software; you can redistribute it and/or modify it under the - * terms of the GNU General Public License as published by the - * Free Software Foundation; either version 3, or (at your option) - * any later version. - * - * This library is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * Under Section 7 of GPL version 3, you are granted additional - * permissions described in the GCC Runtime Library Exception, version - * 3.1, as published by the Free Software Foundation. - * - * You should have received a copy of the GNU General Public License and - * a copy of the GCC Runtime Library Exception along with this program; - * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - * <http://www.gnu.org/licenses/>. + * @copyright + * Copyright (C) 2009-2011 + * Intel Corporation + * + * @copyright + * This file is part of the Intel Cilk Plus Library. This library is free + * software; you can redistribute it and/or modify it under the + * terms of the GNU General Public License as published by the + * Free Software Foundation; either version 3, or (at your option) + * any later version. + * + * @copyright + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * @copyright + * Under Section 7 of GPL version 3, you are granted additional + * permissions described in the GCC Runtime Library Exception, version + * 3.1, as published by the Free Software Foundation. + * + * @copyright + * You should have received a copy of the GNU General Public License and + * a copy of the GCC Runtime Library Exception along with this program; + * see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + * <http://www.gnu.org/licenses/>. **************************************************************************/ /** @@ -42,9 +47,6 @@ __CILKRTS_BEGIN_EXTERN_C -// Forwarded declarations -typedef struct __cilkrts_worker __cilkrts_worker; - /** * Mutexes are treated as an abstract data type within the Cilk * runtime system. They are implemented as simple spin loops and @@ -59,7 +61,7 @@ typedef struct mutex { } mutex; /** - * Initialize a Cilk mutex. + * @brief Initialize a Cilk mutex. * * @param m Mutex to be initialized. */ @@ -67,7 +69,9 @@ COMMON_PORTABLE void __cilkrts_mutex_init(struct mutex *m); /** - * Acquire a Cilk mutex. If statistics are being gathered, the time spent + * @brief Acquire a Cilk mutex. + * + * If statistics are being gathered, the time spent * acquiring the mutex will be attributed to the specified worker. * * @param w Worker that will become the owner of this mutex. @@ -77,9 +81,10 @@ COMMON_PORTABLE void __cilkrts_mutex_lock(__cilkrts_worker *w, struct mutex *m); /** - * Attempt to lock a Cilk mutex and fail if it isn't available. If statistics - * are being gathered, the time spent acquiring the mutex will be attributed - * to the specified worker. + * @brief Attempt to lock a Cilk mutex and fail if it isn't available. + * + * If statistics are being gathered, the time spent acquiring the + * mutex will be attributed to the specified worker. * * @param w Worker that will become the owner of this mutex. * @param m Mutex to be acquired. @@ -92,11 +97,12 @@ int __cilkrts_mutex_trylock(__cilkrts_worker *w, struct mutex *m); /** - * Release a Cilk mutex. If statistics are being gathered, the time spent + * @brief Release a Cilk mutex. + * + * If statistics are being gathered, the time spent * acquiring the mutex will be attributed to the specified worker. * - * Preconditions: - * - The mutex must be owned by the worker. + * @pre The mutex must be owned by the worker. * * @param w Worker that owns this mutex. * @param m Mutex to be released. @@ -106,7 +112,8 @@ void __cilkrts_mutex_unlock(__cilkrts_worker *w, struct mutex *m); /** - * Deallocated a Cilk mutex. Currently does nothing. + * @brief Deallocate a Cilk mutex. Currently does nothing. + * * @param w Unused. * @param m Mutex to be deallocated. */ |