diff options
author | cltang <cltang@138bc75d-0d04-0410-961f-82ee72b054a4> | 2018-11-06 13:09:52 +0000 |
---|---|---|
committer | cltang <cltang@138bc75d-0d04-0410-961f-82ee72b054a4> | 2018-11-06 13:09:52 +0000 |
commit | 2e51c6a29146bcd76d63d1fac4647a5ff2d2c3ef (patch) | |
tree | 7d54fbc4ad86d32bf11c35fbebbe030294a143b6 /libgomp/oacc-mem.c | |
parent | 7d136f71bc7462ded31e3262a87f29a84cc8ff4b (diff) |
2018-11-06 Chung-Lin Tang <cltang@codesourcery.com>
Reviewed-by: Thomas Schwinge <thomas@codesourcery.com>
libgomp/
* oacc-mem.c (memcpy_tofrom_device): New function, combined from
acc_memcpy_to/from_device functions, now with async parameter.
(acc_memcpy_to_device): Modify to use memcpy_tofrom_device.
(acc_memcpy_from_device): Likewise.
(acc_memcpy_to_device_async): New API function.
(acc_memcpy_from_device_async): Likewise.
(present_create_copy): Add async parameter and async setting/unsetting.
(acc_create): Adjust present_create_copy call.
(acc_copyin): Likewise.
(acc_present_or_create): Likewise.
(acc_present_or_copyin): Likewise.
(acc_create_async): New API function.
(acc_copyin_async): New API function.
(delete_copyout): Add async parameter and async setting/unsetting.
(acc_delete): Adjust delete_copyout call.
(acc_copyout): Likewise.
(acc_delete_async): New API function.
(acc_copyout_async): Likewise.
(update_dev_host): Add async parameter and async setting/unsetting.
(acc_update_device): Adjust update_dev_host call.
(acc_update_self): Likewise.
(acc_update_device_async): New API function.
(acc_update_self_async): Likewise.
* openacc.h (acc_copyin_async): Declare new API function.
(acc_create_async): Likewise.
(acc_copyout_async): Likewise.
(acc_delete_async): Likewise.
(acc_update_device_async): Likewise.
(acc_update_self_async): Likewise.
(acc_memcpy_to_device_async): Likewise.
(acc_memcpy_from_device_async): Likewise.
* openacc_lib.h (acc_copyin_async_32_h): New subroutine.
(acc_copyin_async_64_h): New subroutine.
(acc_copyin_async_array_h): New subroutine.
(acc_create_async_32_h): New subroutine.
(acc_create_async_64_h): New subroutine.
(acc_create_async_array_h): New subroutine.
(acc_copyout_async_32_h): New subroutine.
(acc_copyout_async_64_h): New subroutine.
(acc_copyout_async_array_h): New subroutine.
(acc_delete_async_32_h): New subroutine.
(acc_delete_async_64_h): New subroutine.
(acc_delete_async_array_h): New subroutine.
(acc_update_device_async_32_h): New subroutine.
(acc_update_device_async_64_h): New subroutine.
(acc_update_device_async_array_h): New subroutine.
(acc_update_self_async_32_h): New subroutine.
(acc_update_self_async_64_h): New subroutine.
(acc_update_self_async_array_h): New subroutine.
* openacc.f90 (acc_copyin_async_32_h): New subroutine.
(acc_copyin_async_64_h): New subroutine.
(acc_copyin_async_array_h): New subroutine.
(acc_create_async_32_h): New subroutine.
(acc_create_async_64_h): New subroutine.
(acc_create_async_array_h): New subroutine.
(acc_copyout_async_32_h): New subroutine.
(acc_copyout_async_64_h): New subroutine.
(acc_copyout_async_array_h): New subroutine.
(acc_delete_async_32_h): New subroutine.
(acc_delete_async_64_h): New subroutine.
(acc_delete_async_array_h): New subroutine.
(acc_update_device_async_32_h): New subroutine.
(acc_update_device_async_64_h): New subroutine.
(acc_update_device_async_array_h): New subroutine.
(acc_update_self_async_32_h): New subroutine.
(acc_update_self_async_64_h): New subroutine.
(acc_update_self_async_array_h): New subroutine.
* libgomp.map (OACC_2.5): Add acc_copyin_async*, acc_copyout_async*,
acc_copyout_finalize_async*, acc_create_async*, acc_delete_async*,
acc_delete_finalize_async*, acc_memcpy_from_device_async*,
acc_memcpy_to_device_async*, acc_update_device_async*, and
acc_update_self_async* entries.
* testsuite/libgomp.oacc-c-c++-common/lib-94.c: New test.
* testsuite/libgomp.oacc-c-c++-common/lib-95.c: New test.
* testsuite/libgomp.oacc-fortran/lib-16.f90: New test.
git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@265842 138bc75d-0d04-0410-961f-82ee72b054a4
Diffstat (limited to 'libgomp/oacc-mem.c')
-rw-r--r-- | libgomp/oacc-mem.c | 142 |
1 files changed, 109 insertions, 33 deletions
diff --git a/libgomp/oacc-mem.c b/libgomp/oacc-mem.c index 3787ce49e38..72414b74897 100644 --- a/libgomp/oacc-mem.c +++ b/libgomp/oacc-mem.c @@ -153,8 +153,9 @@ acc_free (void *d) gomp_fatal ("error in freeing device memory in %s", __FUNCTION__); } -void -acc_memcpy_to_device (void *d, void *h, size_t s) +static void +memcpy_tofrom_device (bool from, void *d, void *h, size_t s, int async, + const char *libfnname) { /* No need to call lazy open here, as the device pointer must have been obtained from a routine that did that. */ @@ -164,31 +165,49 @@ acc_memcpy_to_device (void *d, void *h, size_t s) if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) { - memmove (d, h, s); + if (from) + memmove (h, d, s); + else + memmove (d, h, s); return; } - if (!thr->dev->host2dev_func (thr->dev->target_id, d, h, s)) - gomp_fatal ("error in %s", __FUNCTION__); + if (async > acc_async_sync) + thr->dev->openacc.async_set_async_func (async); + + bool ret = (from + ? thr->dev->dev2host_func (thr->dev->target_id, h, d, s) + : thr->dev->host2dev_func (thr->dev->target_id, d, h, s)); + + if (async > acc_async_sync) + thr->dev->openacc.async_set_async_func (acc_async_sync); + + if (!ret) + gomp_fatal ("error in %s", libfnname); } void -acc_memcpy_from_device (void *h, void *d, size_t s) +acc_memcpy_to_device (void *d, void *h, size_t s) { - /* No need to call lazy open here, as the device pointer must have - been obtained from a routine that did that. */ - struct goacc_thread *thr = goacc_thread (); + memcpy_tofrom_device (false, d, h, s, acc_async_sync, __FUNCTION__); +} - assert (thr && thr->dev); +void +acc_memcpy_to_device_async (void *d, void *h, size_t s, int async) +{ + memcpy_tofrom_device (false, d, h, s, async, __FUNCTION__); +} - if (thr->dev->capabilities & GOMP_OFFLOAD_CAP_SHARED_MEM) - { - memmove (h, d, s); - return; - } +void +acc_memcpy_from_device (void *h, void *d, size_t s) +{ + memcpy_tofrom_device (true, d, h, s, acc_async_sync, __FUNCTION__); +} - if (!thr->dev->dev2host_func (thr->dev->target_id, h, d, s)) - gomp_fatal ("error in %s", __FUNCTION__); +void +acc_memcpy_from_device_async (void *h, void *d, size_t s, int async) +{ + memcpy_tofrom_device (true, d, h, s, async, __FUNCTION__); } /* Return the device pointer that corresponds to host data H. Or NULL @@ -428,7 +447,7 @@ acc_unmap_data (void *h) #define FLAG_COPY (1 << 2) static void * -present_create_copy (unsigned f, void *h, size_t s) +present_create_copy (unsigned f, void *h, size_t s, int async) { void *d; splay_tree_key n; @@ -490,11 +509,17 @@ present_create_copy (unsigned f, void *h, size_t s) gomp_mutex_unlock (&acc_dev->lock); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + tgt = gomp_map_vars (acc_dev, mapnum, &hostaddrs, NULL, &s, &kinds, true, GOMP_MAP_VARS_OPENACC); /* Initialize dynamic refcount. */ tgt->list[0].key->dynamic_refcount = 1; + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); + gomp_mutex_lock (&acc_dev->lock); d = tgt->to_free; @@ -510,19 +535,32 @@ present_create_copy (unsigned f, void *h, size_t s) void * acc_create (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync); +} + +void +acc_create_async (void *h, size_t s, int async) +{ + present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, async); } void * acc_copyin (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, + acc_async_sync); +} + +void +acc_copyin_async (void *h, size_t s, int async) +{ + present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, async); } void * acc_present_or_create (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE, h, s, acc_async_sync); } /* acc_pcreate is acc_present_or_create by a different name. */ @@ -539,7 +577,8 @@ acc_pcreate (void *h, size_t s) void * acc_present_or_copyin (void *h, size_t s) { - return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s); + return present_create_copy (FLAG_PRESENT | FLAG_CREATE | FLAG_COPY, h, s, + acc_async_sync); } /* acc_pcopyin is acc_present_or_copyin by a different name. */ @@ -557,7 +596,7 @@ acc_pcopyin (void *h, size_t s) #define FLAG_FINALIZE (1 << 1) static void -delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) +delete_copyout (unsigned f, void *h, size_t s, int async, const char *libfnname) { size_t host_size; splay_tree_key n; @@ -633,7 +672,13 @@ delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) } if (f & FLAG_COPYOUT) - acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + { + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); + } gomp_remove_var (acc_dev, n); } @@ -644,41 +689,54 @@ delete_copyout (unsigned f, void *h, size_t s, const char *libfnname) void acc_delete (void *h , size_t s) { - delete_copyout (0, h, s, __FUNCTION__); + delete_copyout (0, h, s, acc_async_sync, __FUNCTION__); +} + +void +acc_delete_async (void *h , size_t s, int async) +{ + delete_copyout (0, h, s, async, __FUNCTION__); } void acc_delete_finalize (void *h , size_t s) { - delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_FINALIZE, h, s, acc_async_sync, __FUNCTION__); } void acc_delete_finalize_async (void *h , size_t s, int async) { - delete_copyout (FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_FINALIZE, h, s, async, __FUNCTION__); } void acc_copyout (void *h, size_t s) { - delete_copyout (FLAG_COPYOUT, h, s, __FUNCTION__); + delete_copyout (FLAG_COPYOUT, h, s, acc_async_sync, __FUNCTION__); +} + +void +acc_copyout_async (void *h, size_t s, int async) +{ + delete_copyout (FLAG_COPYOUT, h, s, async, __FUNCTION__); } void acc_copyout_finalize (void *h, size_t s) { - delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, acc_async_sync, + __FUNCTION__); } void acc_copyout_finalize_async (void *h, size_t s, int async) { - delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, __FUNCTION__); + delete_copyout (FLAG_COPYOUT | FLAG_FINALIZE, h, s, async, __FUNCTION__); } static void -update_dev_host (int is_dev, void *h, size_t s) +update_dev_host (int is_dev, void *h, size_t s, int async) { splay_tree_key n; void *d; @@ -704,24 +762,42 @@ update_dev_host (int is_dev, void *h, size_t s) d = (void *) (n->tgt->tgt_start + n->tgt_offset + (uintptr_t) h - n->host_start); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (async); + if (is_dev) acc_dev->host2dev_func (acc_dev->target_id, d, h, s); else acc_dev->dev2host_func (acc_dev->target_id, h, d, s); + if (async > acc_async_sync) + acc_dev->openacc.async_set_async_func (acc_async_sync); + gomp_mutex_unlock (&acc_dev->lock); } void acc_update_device (void *h, size_t s) { - update_dev_host (1, h, s); + update_dev_host (1, h, s, acc_async_sync); +} + +void +acc_update_device_async (void *h, size_t s, int async) +{ + update_dev_host (1, h, s, async); } void acc_update_self (void *h, size_t s) { - update_dev_host (0, h, s); + update_dev_host (0, h, s, acc_async_sync); +} + +void +acc_update_self_async (void *h, size_t s, int async) +{ + update_dev_host (0, h, s, async); } void |